Skip to content

Commit

Permalink
create: implement --paths-from-stdin and --paths-from-command (#5538)
Browse files Browse the repository at this point in the history
create: implement --paths-from-stdin and --paths-from-command, see #5492

These switches read paths to archive from stdin. Delimiter can specified
by --paths-delimiter=DELIM. Paths read will be added honoring every
option but exclusion options and --one-file-system. Directories aren't
recursed into.
  • Loading branch information
Lapin0t authored Dec 6, 2020
1 parent 810d823 commit e1af909
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 3 deletions.
48 changes: 45 additions & 3 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from binascii import unhexlify
from contextlib import contextmanager
from datetime import datetime, timedelta
from io import TextIOWrapper

from .logger import create_logger, setup_logging

Expand All @@ -51,7 +52,7 @@
from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode
from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes
from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
from .helpers import timestamp
from .helpers import get_cache_dir, os_stat
Expand All @@ -73,6 +74,7 @@
from .helpers import flags_root, flags_dir, flags_special_follow, flags_special
from .helpers import msgpack
from .helpers import sig_int
from .helpers import iter_separated
from .nanorst import rst_to_terminal
from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
from .patterns import PatternMatcher
Expand Down Expand Up @@ -533,6 +535,37 @@ def create_inner(archive, cache, fso):
else:
status = '-'
self.print_file_status(status, path)
elif args.paths_from_command or args.paths_from_stdin:
paths_sep = eval_escapes(args.paths_delimiter) if args.paths_delimiter is not None else '\n'
if args.paths_from_command:
try:
proc = subprocess.Popen(args.paths, stdout=subprocess.PIPE)
except (FileNotFoundError, PermissionError) as e:
self.print_error('Failed to execute command: %s', e)
return self.exit_code
pipe_bin = proc.stdout
else: # args.paths_from_stdin == True
pipe_bin = sys.stdin.buffer
pipe = TextIOWrapper(pipe_bin, errors='surrogateescape')
for path in iter_separated(pipe, paths_sep):
try:
with backup_io('stat'):
st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
status = self._process_any(path=path, parent_fd=None, name=None, st=st, fso=fso,
cache=cache, read_special=args.read_special, dry_run=dry_run)
except (BackupOSError, BackupError) as e:
self.print_warning('%s: %s', path, e)
status = 'E'
if status == 'C':
self.print_warning('%s: file changed while we backed it up', path)
if status is None:
status = '?'
self.print_file_status(status, path)
if args.paths_from_command:
rc = proc.wait()
if rc != 0:
self.print_error('Command %r exited with status %d', args.paths[0], rc)
return self.exit_code
else:
for path in args.paths:
if path == '-': # stdin
Expand Down Expand Up @@ -3277,6 +3310,13 @@ def define_borg_mount(parser):
subparser.add_argument('--content-from-command', action='store_true',
help='interpret PATH as command and store its stdout. See also section Reading from'
' stdin below.')
subparser.add_argument('--paths-from-stdin', action='store_true',
help='read DELIM-separated list of paths to backup from stdin. Will not '
'recurse into directories.')
subparser.add_argument('--paths-from-command', action='store_true',
help='interpret PATH as command and treat its output as ``--paths-from-stdin``')
subparser.add_argument('--paths-delimiter', metavar='DELIM',
help='set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: \\n) ')

exclude_group = define_exclusion_group(subparser, tag_files=True)
exclude_group.add_argument('--exclude-nodump', dest='exclude_nodump', action='store_true',
Expand Down Expand Up @@ -4522,10 +4562,12 @@ def parse_args(self, args=None):
args = parser.parse_args(args or ['-h'])
parser.common_options.resolve(args)
func = get_func(args)
if func == self.do_create and args.paths and args.paths_from_stdin:
parser.error('Must not pass PATH with ``--paths-from-stdin``.')
if func == self.do_create and not args.paths:
if args.content_from_command:
if args.content_from_command or args.paths_from_command:
parser.error('No command given.')
else:
elif not args.paths_from_stdin:
# need at least 1 path but args.paths may also be populated from patterns
parser.error('Need at least one PATH argument.')
if not getattr(args, 'lock', True): # Option --bypass-lock sets args.lock = False
Expand Down
17 changes: 17 additions & 0 deletions src/borg/helpers/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,20 @@ def write(self, s):
except OSError:
pass
return len(s)


def iter_separated(fd, sep=None, read_size=4096):
"""Iter over chunks of open file ``fd`` delimited by ``sep``. Doesn't trim."""
buf = fd.read(read_size)
is_str = isinstance(buf, str)
part = '' if is_str else b''
sep = sep or ('\n' if is_str else b'\n')
while len(buf) > 0:
part2, *items = buf.split(sep)
*full, part = (part + part2, *items)
yield from full
buf = fd.read(read_size)
# won't yield an empty part if stream ended with `sep`
# or if there was no data before EOF
if len(part) > 0:
yield part
5 changes: 5 additions & 0 deletions src/borg/helpers/parseformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ def remove_surrogates(s, errors='replace'):
return s.encode('utf-8', errors).decode('utf-8')


def eval_escapes(s):
"""Evaluate literal escape sequences in a string (eg `\\n` -> `\n`)."""
return s.encode('ascii', 'backslashreplace').decode('unicode-escape')


def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
for key in keys:
if isinstance(d.get(key), bytes):
Expand Down
41 changes: 41 additions & 0 deletions src/borg/testsuite/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,47 @@ def test_create_content_from_command_missing_command(self):
output = self.cmd('create', '--content-from-command', self.repository_location + '::test', exit_code=2)
assert output.endswith('No command given.\n')

def test_create_paths_from_stdin(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
self.create_regular_file("file1", size=1024 * 80)
self.create_regular_file("dir1/file2", size=1024 * 80)
self.create_regular_file("dir1/file3", size=1024 * 80)
self.create_regular_file("file4", size=1024 * 80)

input_data = b'input/file1\0input/dir1\0input/file4'
self.cmd('create', '--paths-from-stdin', '--paths-delimiter', '\\0',
self.repository_location + '::test', input=input_data)
archive_list = self.cmd('list', '--json-lines', self.repository_location + '::test')
paths = [json.loads(line)['path'] for line in archive_list.split('\n') if line]
assert paths == ['input/file1', 'input/dir1', 'input/file4']

def test_create_paths_from_command(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
self.create_regular_file("file1", size=1024 * 80)
self.create_regular_file("file2", size=1024 * 80)
self.create_regular_file("file3", size=1024 * 80)
self.create_regular_file("file4", size=1024 * 80)

input_data = 'input/file1\ninput/file2\ninput/file3'
self.cmd('create', '--paths-from-command',
self.repository_location + '::test', '--', 'echo', input_data)
archive_list = self.cmd('list', '--json-lines', self.repository_location + '::test')
paths = [json.loads(line)['path'] for line in archive_list.split('\n') if line]
assert paths == ['input/file1', 'input/file2', 'input/file3']

def test_create_paths_from_command_with_failed_command(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
output = self.cmd('create', '--paths-from-command', self.repository_location + '::test',
'--', 'sh', '-c', 'exit 73;', exit_code=2)
assert output.endswith("Command 'sh' exited with status 73\n")
archive_list = json.loads(self.cmd('list', '--json', self.repository_location))
assert archive_list['archives'] == []

def test_create_paths_from_command_missing_command(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
output = self.cmd('create', '--paths-from-command', self.repository_location + '::test', exit_code=2)
assert output.endswith('No command given.\n')

def test_create_without_root(self):
"""test create without a root"""
self.cmd('init', '--encryption=repokey', self.repository_location)
Expand Down
27 changes: 27 additions & 0 deletions src/borg/testsuite/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys
from argparse import ArgumentTypeError
from datetime import datetime, timezone, timedelta
from io import StringIO, BytesIO
from time import sleep

import pytest
Expand All @@ -27,6 +28,8 @@
from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS
from ..helpers import popen_with_error_handling
from ..helpers import dash_open
from ..helpers import iter_separated
from ..helpers import eval_escapes

from . import BaseTestCase, FakeInputs

Expand Down Expand Up @@ -1022,3 +1025,27 @@ def test_dash_open():
assert dash_open('-', 'w') is sys.stdout
assert dash_open('-', 'rb') is sys.stdin.buffer
assert dash_open('-', 'wb') is sys.stdout.buffer


def test_iter_separated():
# newline and utf-8
sep, items = '\n', ['foo', 'bar/baz', 'αáčő']
fd = StringIO(sep.join(items))
assert list(iter_separated(fd)) == items
# null and bogus ending
sep, items = '\0', ['foo/bar', 'baz', 'spam']
fd = StringIO(sep.join(items) + '\0')
assert list(iter_separated(fd, sep=sep)) == ['foo/bar', 'baz', 'spam']
# multichar
sep, items = 'SEP', ['foo/bar', 'baz', 'spam']
fd = StringIO(sep.join(items))
assert list(iter_separated(fd, sep=sep)) == items
# bytes
sep, items = b'\n', [b'foo', b'blop\t', b'gr\xe4ezi']
fd = BytesIO(sep.join(items))
assert list(iter_separated(fd)) == items


def test_eval_escapes():
assert eval_escapes('\\n\\0\\x23') == '\n\0#'
assert eval_escapes('äç\\n') == 'äç\n'

0 comments on commit e1af909

Please sign in to comment.