diff --git a/borg/archiver.py b/borg/archiver.py index 1ac16318c..ce70e831d 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -19,9 +19,9 @@ import collections from . import __version__ from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \ - parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, bin_to_hex, \ - get_cache_dir, prune_within, prune_split, \ - Manifest, NoManifestError, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ + parse_pattern, parse_exclude_pattern, ArgparsePatternAction, ArgparsePatternFileAction, ArgparseExcludeFileAction, \ + PathPrefixPattern, to_localtime, timestamp, safe_timestamp, bin_to_hex, get_cache_dir, prune_within, prune_split, \ + Manifest, NoManifestError, remove_surrogates, format_archive, check_extension_modules, Statistics, \ dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, PrefixSpec, is_slow_msgpack, yes, sysinfo, \ EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ErrorIgnoringTextIOWrapper from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm @@ -122,6 +122,18 @@ class Archiver: if self.output_list and (self.output_filter is None or status in self.output_filter): logger.info("%1s %s", status, remove_surrogates(path)) + @staticmethod + def build_matcher(inclexcl_patterns, paths): + matcher = PatternMatcher() + if inclexcl_patterns: + matcher.add_inclexcl(inclexcl_patterns) + include_patterns = [] + if paths: + include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths) + matcher.add(include_patterns, True) + matcher.fallback = not include_patterns + return matcher, include_patterns + def do_serve(self, args): """Start in server mode. This command is usually not used manually. """ @@ -243,8 +255,7 @@ class Archiver: def do_create(self, args, repository, manifest=None, key=None): """Create new archive""" matcher = PatternMatcher(fallback=True) - if args.excludes: - matcher.add(args.excludes, False) + matcher.add_inclexcl(args.patterns) def create_inner(archive, cache): # Add cache dir to inode_skip list @@ -434,17 +445,7 @@ class Archiver: if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )): logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8') - matcher = PatternMatcher() - if args.excludes: - matcher.add(args.excludes, False) - - include_patterns = [] - - if args.paths: - include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths) - matcher.add(include_patterns, True) - - matcher.fallback = not include_patterns + matcher, include_patterns = self.build_matcher(args.patterns, args.paths) output_list = args.output_list dry_run = args.dry_run @@ -907,8 +908,9 @@ class Archiver: helptext = collections.OrderedDict() helptext['patterns'] = textwrap.dedent(''' - Exclusion patterns support four separate styles, fnmatch, shell, regular - expressions and path prefixes. By default, fnmatch is used. If followed + File patterns support four separate styles: fnmatch, shell, regular + expressions and path prefixes. By default, fnmatch is used for + `--exclude` patterns and shell-style is used for `--pattern`. If followed by a colon (':') the first two characters of a pattern are used as a style selector. Explicit style selection is necessary when a non-default style is desired or when the desired pattern starts with @@ -916,12 +918,12 @@ class Archiver: `Fnmatch `_, selector `fm:` - This is the default style. These patterns use a variant of shell - pattern syntax, with '*' matching any number of characters, '?' - matching any single character, '[...]' matching any single - character specified, including ranges, and '[!...]' matching any - character not specified. For the purpose of these patterns, the - path separator ('\\' for Windows and '/' on other systems) is not + This is the default style for --exclude and --exclude-from. + These patterns use a variant of shell pattern syntax, with '*' matching + any number of characters, '?' matching any single character, '[...]' + matching any single character specified, including ranges, and '[!...]' + matching any character not specified. For the purpose of these patterns, + the path separator ('\\' for Windows and '/' on other systems) is not treated specially. Wrap meta-characters in brackets for a literal match (i.e. `[?]` to match the literal character `?`). For a path to match a pattern, it must completely match from start to end, or @@ -932,6 +934,7 @@ class Archiver: Shell-style patterns, selector `sh:` + This is the default style for --pattern and --patterns-from. Like fnmatch patterns these are similar to shell patterns. The difference is that the pattern may include `**/` for matching zero or more directory levels, `*` for matching zero or more arbitrary characters with the @@ -992,7 +995,39 @@ class Archiver: re:^/home/[^/]\.tmp/ sh:/home/*/.thumbnails EOF - $ borg create --exclude-from exclude.txt backup /\n\n''') + $ borg create --exclude-from exclude.txt backup / + + + A more general and easier to use way to define filename matching patterns exists + with the `--pattern` and `--patterns-from` options. Using these, you may specify + the backup roots (starting points) and patterns for inclusion/exclusion. A + root path starts with the prefix `R`, followed by a path (a plain path, not a + file pattern). An include rule starts with the prefix +, an exclude rule starts + with the prefix -, both followed by a pattern. + Inclusion patterns are useful to include pathes that are contained in an excluded + path. The first matching pattern is used so if an include pattern matches before + an exclude pattern, the file is backed up. + + Note that the default pattern style for `--pattern` and `--patterns-from` is + shell style (`sh:`), so those patterns behave similar to rsync include/exclude + patterns. + + Patterns (`--pattern`) and excludes (`--exclude`) from the command line are + considered first (in the order of appearance). Then patterns from `--patterns-from` + are added. Exclusion patterns from `--exclude-from` files are appended last. + + An example `--patterns-from` file could look like that:: + + R / + # can be rebuild + - /home/*/.cache + # they're downloads for a reason + - /home/*/Downloads + # susan is a nice person + # include susans home + + /home/susan + # don't backup the other home directories + - /home/*\n\n''') helptext['placeholders'] = textwrap.dedent(''' Repository (or Archive) URLs, --prefix and --remote-path values support these placeholders: @@ -1109,6 +1144,9 @@ class Archiver: help='show version number and exit') subparsers = parser.add_subparsers(title='required arguments', metavar='') + # some empty defaults for all subparsers + common_parser.set_defaults(paths=[], patterns=[]) + serve_epilog = textwrap.dedent(""" This command starts a repository server process. This command is usually not used manually. """) @@ -1359,11 +1397,10 @@ class Archiver: help='output verbose list of items (files, dirs, ...)') subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS', help='only display items with the given status characters') - subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', + subparser.add_argument('-e', '--exclude', dest='patterns', + type=parse_exclude_pattern, action='append', metavar="PATTERN", help='exclude paths matching PATTERN') - subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', + subparser.add_argument('--exclude-from', action=ArgparseExcludeFileAction, metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') subparser.add_argument('--exclude-caches', dest='exclude_caches', action='store_true', default=False, @@ -1374,6 +1411,10 @@ class Archiver: subparser.add_argument('--keep-tag-files', dest='keep_tag_files', action='store_true', default=False, help='keep tag files of excluded caches/directories') + subparser.add_argument('--pattern', action=ArgparsePatternAction, + metavar="PATTERN", help='include/exclude paths matching PATTERN') + subparser.add_argument('--patterns-from', action=ArgparsePatternFileAction, + metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line') subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval', type=int, default=300, metavar='SECONDS', help='write checkpoint every SECONDS seconds (Default: 300)') @@ -1420,7 +1461,7 @@ class Archiver: subparser.add_argument('location', metavar='ARCHIVE', type=location_validator(archive=True), help='name of archive to create (must be also a valid directory name)') - subparser.add_argument('paths', metavar='PATH', nargs='+', type=str, + subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, help='paths to archive') extract_epilog = textwrap.dedent(""" @@ -1443,12 +1484,15 @@ class Archiver: subparser.add_argument('-n', '--dry-run', dest='dry_run', default=False, action='store_true', help='do not actually change any files') - subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', + subparser.add_argument('-e', '--exclude', dest='patterns', + type=parse_exclude_pattern, action='append', metavar="PATTERN", help='exclude paths matching PATTERN') - subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', + subparser.add_argument('--exclude-from', action=ArgparseExcludeFileAction, metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') + subparser.add_argument('--pattern', action=ArgparsePatternAction, + metavar="PATTERN", help='include/exclude paths matching PATTERN') + subparser.add_argument('--patterns-from', action=ArgparsePatternFileAction, + metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line') subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', default=False, help='only obey numeric user and group identifiers') @@ -2010,7 +2054,10 @@ class Archiver: args = self.preprocess_args(args) parser = self.build_parser(args) args = parser.parse_args(args or ['-h']) - update_excludes(args) + if args.func == self.do_create: + # need at least 1 path but args.paths may also be populated from patterns + if not args.paths: + parser.error('Need at least one PATH argument.') return args def run(self, args): diff --git a/borg/helpers.py b/borg/helpers.py index 80eed5000..35d96cdc3 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -320,22 +320,52 @@ def parse_timestamp(timestamp): return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc) -def load_excludes(fh): - """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on - both line ends are ignored. - """ - patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#')) - return [parse_pattern(pattern) for pattern in patterns if pattern] +def parse_add_pattern(patternstr, roots, patterns): + """Parse a pattern string and add it to roots or patterns depending on the pattern type.""" + pattern = parse_inclexcl_pattern(patternstr) + if pattern.ptype is RootPath: + roots.append(pattern.pattern) + else: + patterns.append(pattern) -def update_excludes(args): - """Merge exclude patterns from files with those on command line.""" - if hasattr(args, 'exclude_files') and args.exclude_files: - if not hasattr(args, 'excludes') or args.excludes is None: - args.excludes = [] - for file in args.exclude_files: - args.excludes += load_excludes(file) - file.close() +def load_pattern_file(fileobj, roots, patterns): + for patternstr in clean_lines(fileobj): + parse_add_pattern(patternstr, roots, patterns) + + +def load_exclude_file(fileobj, patterns): + for patternstr in clean_lines(fileobj): + patterns.append(parse_exclude_pattern(patternstr)) + + +class ArgparsePatternAction(argparse.Action): + def __init__(self, nargs=1, **kw): + super().__init__(nargs=nargs, **kw) + + def __call__(self, parser, args, values, option_string=None): + parse_add_pattern(values[0], args.paths, args.patterns) + + +class ArgparsePatternFileAction(argparse.Action): + def __init__(self, nargs=1, **kw): + super().__init__(nargs=nargs, **kw) + + def __call__(self, parser, args, values, option_string=None): + """Load and parse patterns from a file. + Lines empty or starting with '#' after stripping whitespace on both line ends are ignored. + """ + filename = values[0] + with open(filename) as f: + self.parse(f, args) + + def parse(self, fobj, args): + load_pattern_file(fobj, args.roots, args.patterns) + + +class ArgparseExcludeFileAction(ArgparsePatternFileAction): + def parse(self, fobj, args): + load_exclude_file(fobj, args.patterns) class PatternMatcher: @@ -351,6 +381,12 @@ class PatternMatcher: """ self._items.extend((i, value) for i in patterns) + def add_inclexcl(self, patterns): + """Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from + the match function when one of the given patterns matches. + """ + self._items.extend(patterns) + def match(self, path): for (pattern, value) in self._items: if pattern.match(path): @@ -502,6 +538,9 @@ _PATTERN_STYLES = set([ _PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES) +InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype') +RootPath = object() + def parse_pattern(pattern, fallback=FnmatchPattern): """Read pattern from string and return an instance of the appropriate implementation class. @@ -519,6 +558,35 @@ def parse_pattern(pattern, fallback=FnmatchPattern): return cls(pattern) +def parse_exclude_pattern(pattern, fallback=FnmatchPattern): + """Read pattern from string and return an instance of the appropriate implementation class. + """ + epattern = parse_pattern(pattern, fallback) + return InclExclPattern(epattern, False) + + +def parse_inclexcl_pattern(pattern, fallback=ShellPattern): + """Read pattern from string and return a InclExclPattern object.""" + type_prefix_map = { + '-': False, + '+': True, + 'R': RootPath, + 'r': RootPath, + } + try: + ptype = type_prefix_map[pattern[0]] + pattern = pattern[1:].lstrip() + if not pattern: + raise ValueError("Missing pattern!") + except (IndexError, KeyError, ValueError): + raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern)) + if ptype is RootPath: + pobj = pattern + else: + pobj = parse_pattern(pattern, fallback) + return InclExclPattern(pobj, ptype) + + def timestamp(s): """Convert a --timestamp=s argument to a datetime object""" try: @@ -1304,6 +1372,30 @@ def signal_handler(sig, handler): signal.signal(sig, orig_handler) +def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True): + """ + clean lines (usually read from a config file): + 1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments. + note: only "pure comment lines" are supported, no support for "trailing comments". + :param lines: input line iterator (e.g. list or open text file) that gives unclean input lines + :param lstrip: lstrip call arguments or False, if lstripping is not desired + :param rstrip: rstrip call arguments or False, if rstripping is not desired + :param remove_comments: remove comment lines (lines starting with "#") + :param remove_empty: remove empty lines + :return: yields processed lines + """ + for line in lines: + if lstrip is not False: + line = line.lstrip(lstrip) + if rstrip is not False: + line = line.rstrip(rstrip) + if remove_empty and not line: + continue + if remove_comments and line.startswith('#'): + continue + yield line + + def raising_signal_handler(exc_cls): def handler(sig_no, frame): # setting SIG_IGN avoids that an incoming second signal of this diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 6968ec33b..bf1429ba1 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -652,6 +652,50 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2") self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"]) + def test_create_without_root(self): + """test create without a root""" + self.cmd('init', self.repository_location) + args = ['create', self.repository_location + '::test'] + if self.FORK_DEFAULT: + self.cmd(*args, exit_code=2) + else: + self.assert_raises(SystemExit, lambda: self.cmd(*args)) + + def test_create_pattern_root(self): + """test create with only a root pattern""" + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + output = self.cmd('create', '-v', '--list', '--pattern=R input', self.repository_location + '::test') + self.assert_in("A input/file1", output) + self.assert_in("A input/file2", output) + + def test_create_pattern(self): + """test file patterns during create""" + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + self.create_regular_file('file_important', size=1024 * 80) + output = self.cmd('create', '-v', '--list', + '--pattern=+input/file_important', '--pattern=-input/file*', + self.repository_location + '::test', 'input') + self.assert_in("A input/file_important", output) + self.assert_in("A input/file_important", output) + self.assert_not_in('file1', output) + self.assert_not_in('file2', output) + + def test_extract_pattern_opt(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + self.create_regular_file('file_important', size=1024 * 80) + self.cmd('create', self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', + '--pattern=+input/file_important', '--pattern=-input/file*', + self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file_important']) + def test_exclude_caches(self): self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index dcf1859ec..67f141f19 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -9,12 +9,13 @@ import sys import msgpack import msgpack.fallback import time +import argparse from ..helpers import Location, format_file_size, format_timedelta, format_line, PlaceholderError, make_path_safe, \ prune_within, prune_split, get_cache_dir, get_keys_dir, get_security_dir, Statistics, is_slow_msgpack, \ yes, TRUISH, FALSISH, DEFAULTISH, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ - ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ + ProgressIndicatorPercent, ProgressIndicatorEndless, parse_pattern, load_exclude_file, load_pattern_file, \ PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, \ Buffer from . import BaseTestCase, FakeInputs @@ -428,8 +429,13 @@ def test_invalid_unicode_pattern(pattern): (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]), (["pp:aaabbb"], None), (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), + (["/nomatch", "/more/*"], + ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), + # the order of exclude patterns shouldn't matter + (["/more/*", "/nomatch"], + ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), ]) -def test_patterns_from_file(tmpdir, lines, expected): +def test_exclude_patterns_from_file(tmpdir, lines, expected): files = [ '/data/something00.txt', '/more/data', '/home', ' #/wsfoobar', @@ -438,8 +444,10 @@ def test_patterns_from_file(tmpdir, lines, expected): ] def evaluate(filename): + patterns = [] + load_exclude_file(open(filename, "rt"), patterns) matcher = PatternMatcher(fallback=True) - matcher.add(load_excludes(open(filename, "rt")), False) + matcher.add_inclexcl(patterns) return [path for path in files if matcher.match(path)] exclfile = tmpdir.join("exclude.txt") @@ -450,6 +458,104 @@ def test_patterns_from_file(tmpdir, lines, expected): assert evaluate(str(exclfile)) == (files if expected is None else expected) +@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [ + # "None" means all files, i.e. none excluded + ([], [], 0), + (["# Comment only"], [], 0), + (["- *"], [], 1), + (["+fm:*/something00.txt", + "-/data"], [], 2), + (["R /"], ["/"], 0), + (["R /", + "# comment"], ["/"], 0), + (["# comment", + "- /data", + "R /home"], ["/home"], 1), +]) +def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns): + def evaluate(filename): + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + return roots, len(inclexclpatterns) + patternfile = tmpdir.join("patterns.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + roots, numpatterns = evaluate(str(patternfile)) + assert roots == expected_roots + assert numpatterns == expected_numpatterns + + +@pytest.mark.parametrize("lines", [ + (["X /data"]), # illegal pattern type prefix + (["/data"]), # need a pattern type prefix +]) +def test_load_invalid_patterns_from_file(tmpdir, lines): + patternfile = tmpdir.join("patterns.txt") + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + filename = str(patternfile) + with pytest.raises(argparse.ArgumentTypeError): + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + + +@pytest.mark.parametrize("lines, expected", [ + # "None" means all files, i.e. none excluded + ([], None), + (["# Comment only"], None), + (["- *"], []), + # default match type is sh: for patterns -> * doesn't match a / + (["-*/something0?.txt"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something00.txt"], + ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something0?.txt"], + ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+/*/something0?.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+fm:*/something00.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + # include /home/leo and exclude the rest of /home: + (["+/home/leo", + "-/home/*"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), + # wrong order, /home/leo is already excluded by -/home/*: + (["-/home/*", + "+/home/leo"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']), + (["+fm:/home/leo", + "-/home/"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), +]) +def test_inclexcl_patterns_from_file(tmpdir, lines, expected): + files = [ + '/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other' + ] + + def evaluate(filename): + matcher = PatternMatcher(fallback=True) + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + matcher.add_inclexcl(inclexclpatterns) + return [path for path in files if matcher.match(path)] + + patternfile = tmpdir.join("patterns.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + assert evaluate(str(patternfile)) == (files if expected is None else expected) + + @pytest.mark.parametrize("pattern, cls", [ ("", FnmatchPattern),