From 30a5c5e44b185f0d9955d93913f38d6554c5fbed Mon Sep 17 00:00:00 2001 From: Alexander 'Leo' Bergolth Date: Tue, 2 Aug 2016 16:02:02 +0200 Subject: [PATCH] add two new options --pattern and --patterns-from as discussed in #1406 # Conflicts: # src/borg/archiver.py # src/borg/helpers.py # src/borg/testsuite/helpers.py Original-Commit: 876b670d --- src/borg/archiver.py | 190 ++++++++++++++++++++++++--------- src/borg/helpers.py | 95 ++++++++++++++--- src/borg/testsuite/archiver.py | 47 ++++++++ src/borg/testsuite/helpers.py | 114 +++++++++++++++++++- 4 files changed, 378 insertions(+), 68 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index bca7a04c5..f86fa7260 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -44,7 +44,8 @@ from .helpers import to_localtime, timestamp from .helpers import get_cache_dir from .helpers import Manifest from .helpers import StableDict -from .helpers import update_excludes, check_extension_modules +from .helpers import check_extension_modules +from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo from .helpers import log_multi from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern @@ -128,7 +129,7 @@ class Archiver: def __init__(self, lock_wait=None, prog=None): self.exit_code = EXIT_SUCCESS self.lock_wait = lock_wait - self.parser = self.build_parser(prog) + self.prog = prog def print_error(self, msg, *args): msg = args and msg % args or msg @@ -172,10 +173,10 @@ class Archiver: bi += slicelen @staticmethod - def build_matcher(excludes, paths): + def build_matcher(inclexcl_patterns, paths): matcher = PatternMatcher() - if excludes: - matcher.add(excludes, False) + if inclexcl_patterns: + matcher.add_inclexcl(inclexcl_patterns) include_patterns = [] if paths: include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths) @@ -316,8 +317,7 @@ class Archiver: def do_create(self, args, repository, manifest=None, key=None): """Create new archive""" matcher = PatternMatcher(fallback=True) - if args.excludes: - matcher.add(args.excludes, False) + matcher.add_inclexcl(args.patterns) def create_inner(archive, cache): # Add cache dir to inode_skip list @@ -523,7 +523,7 @@ class Archiver: if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )): logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8') - matcher, include_patterns = self.build_matcher(args.excludes, args.paths) + matcher, include_patterns = self.build_matcher(args.patterns, args.paths) progress = args.progress output_list = args.output_list @@ -793,7 +793,7 @@ class Archiver: 'If you know for certain that they are the same, pass --same-chunker-params ' 'to override this check.') - matcher, include_patterns = self.build_matcher(args.excludes, args.paths) + matcher, include_patterns = self.build_matcher(args.patterns, args.paths) compare_archives(archive1, archive2, matcher) @@ -927,7 +927,7 @@ class Archiver: return self._list_repository(args, manifest, write) def _list_archive(self, args, repository, manifest, key, write): - matcher, _ = self.build_matcher(args.excludes, args.paths) + matcher, _ = self.build_matcher(args.patterns, args.paths) with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache: archive = Archive(repository, key, manifest, args.location.archive, cache=cache, consider_part_files=args.consider_part_files) @@ -1157,7 +1157,7 @@ class Archiver: env_var_override='BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING'): return EXIT_ERROR - matcher, include_patterns = self.build_matcher(args.excludes, args.paths) + matcher, include_patterns = self.build_matcher(args.patterns, args.paths) self.output_list = args.output_list self.output_filter = args.output_filter @@ -1401,8 +1401,9 @@ class Archiver: helptext = collections.OrderedDict() helptext['patterns'] = textwrap.dedent(''' - Exclusion patterns support four separate styles, fnmatch, shell, regular - expressions and path prefixes. By default, fnmatch is used. If followed + File patterns support four separate styles: fnmatch, shell, regular + expressions and path prefixes. By default, fnmatch is used for + `--exclude` patterns and shell-style is used for `--pattern`. If followed by a colon (':') the first two characters of a pattern are used as a style selector. Explicit style selection is necessary when a non-default style is desired or when the desired pattern starts with @@ -1410,12 +1411,12 @@ class Archiver: `Fnmatch `_, selector `fm:` - This is the default style. These patterns use a variant of shell - pattern syntax, with '*' matching any number of characters, '?' - matching any single character, '[...]' matching any single - character specified, including ranges, and '[!...]' matching any - character not specified. For the purpose of these patterns, the - path separator ('\\' for Windows and '/' on other systems) is not + This is the default style for --exclude and --exclude-from. + These patterns use a variant of shell pattern syntax, with '*' matching + any number of characters, '?' matching any single character, '[...]' + matching any single character specified, including ranges, and '[!...]' + matching any character not specified. For the purpose of these patterns, + the path separator ('\\' for Windows and '/' on other systems) is not treated specially. Wrap meta-characters in brackets for a literal match (i.e. `[?]` to match the literal character `?`). For a path to match a pattern, it must completely match from start to end, or @@ -1426,6 +1427,7 @@ class Archiver: Shell-style patterns, selector `sh:` + This is the default style for --pattern and --patterns-from. Like fnmatch patterns these are similar to shell patterns. The difference is that the pattern may include `**/` for matching zero or more directory levels, `*` for matching zero or more arbitrary characters with the @@ -1486,7 +1488,39 @@ class Archiver: re:^/home/[^/]\.tmp/ sh:/home/*/.thumbnails EOF - $ borg create --exclude-from exclude.txt backup /\n\n''') + $ borg create --exclude-from exclude.txt backup / + + + A more general and easier to use way to define filename matching patterns exists + with the `--pattern` and `--patterns-from` options. Using these, you may specify + the backup roots (starting points) and patterns for inclusion/exclusion. A + root path starts with the prefix `R`, followed by a path (a plain path, not a + file pattern). An include rule starts with the prefix +, an exclude rule starts + with the prefix -, both followed by a pattern. + Inclusion patterns are useful to include pathes that are contained in an excluded + path. The first matching pattern is used so if an include pattern matches before + an exclude pattern, the file is backed up. + + Note that the default pattern style for `--pattern` and `--patterns-from` is + shell style (`sh:`), so those patterns behave similar to rsync include/exclude + patterns. + + Patterns (`--pattern`) and excludes (`--exclude`) from the command line are + considered first (in the order of appearance). Then patterns from `--patterns-from` + are added. Exclusion patterns from `--exclude-from` files are appended last. + + An example `--patterns-from` file could look like that:: + + R / + # can be rebuild + - /home/*/.cache + # they're downloads for a reason + - /home/*/Downloads + # susan is a nice person + # include susans home + + /home/susan + # don't backup the other home directories + - /home/*\n\n''') helptext['placeholders'] = textwrap.dedent(''' Repository (or Archive) URLs, --prefix and --remote-path values support these placeholders: @@ -1717,6 +1751,9 @@ class Archiver: help='show version number and exit') subparsers = parser.add_subparsers(title='required arguments', metavar='') + # some empty defaults for all subparsers + common_parser.set_defaults(paths=[], patterns=[]) + serve_epilog = process_epilog(""" This command starts a repository server process. This command is usually not used manually. """) @@ -2114,11 +2151,10 @@ class Archiver: help='only display items with the given status characters') exclude_group = subparser.add_argument_group('Exclusion options') - exclude_group.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', + exclude_group.add_argument('-e', '--exclude', dest='patterns', + type=parse_exclude_pattern, action='append', metavar="PATTERN", help='exclude paths matching PATTERN') - exclude_group.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', + exclude_group.add_argument('--exclude-from', action=ArgparseExcludeFileAction, metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') exclude_group.add_argument('--exclude-caches', dest='exclude_caches', action='store_true', default=False, @@ -2132,6 +2168,11 @@ class Archiver: action='store_true', default=False, help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise ' 'excluded caches/directories') + exclude_group.add_argument('--pattern', + action=ArgparsePatternAction, + metavar="PATTERN", help='include/exclude paths matching PATTERN') + exclude_group.add_argument('--patterns-from', action=ArgparsePatternFileAction, + metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line') fs_group = subparser.add_argument_group('Filesystem options') fs_group.add_argument('-x', '--one-file-system', dest='one_file_system', @@ -2183,7 +2224,7 @@ class Archiver: subparser.add_argument('location', metavar='ARCHIVE', type=location_validator(archive=True), help='name of archive to create (must be also a valid directory name)') - subparser.add_argument('paths', metavar='PATH', nargs='+', type=str, + subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, help='paths to archive') extract_epilog = process_epilog(""" @@ -2213,12 +2254,15 @@ class Archiver: subparser.add_argument('-n', '--dry-run', dest='dry_run', default=False, action='store_true', help='do not actually change any files') - subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', + subparser.add_argument('-e', '--exclude', dest='patterns', + type=parse_exclude_pattern, action='append', metavar="PATTERN", help='exclude paths matching PATTERN') - subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', + subparser.add_argument('--exclude-from', action=ArgparseExcludeFileAction, metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') + subparser.add_argument('--pattern', action=ArgparsePatternAction, + metavar="PATTERN", help='include/exclude paths matching PATTERN') + subparser.add_argument('--patterns-from', action=ArgparsePatternFileAction, + metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line') subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', default=False, help='only obey numeric user and group identifiers') @@ -2261,12 +2305,6 @@ class Archiver: formatter_class=argparse.RawDescriptionHelpFormatter, help='find differences in archive contents') subparser.set_defaults(func=self.do_diff) - subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', - metavar="PATTERN", help='exclude paths matching PATTERN') - subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', - metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', default=False, help='only consider numeric user and group identifiers') @@ -2285,6 +2323,30 @@ class Archiver: subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, help='paths of items inside the archives to compare; patterns are supported') + exclude_group = subparser.add_argument_group('Exclusion options') + exclude_group.add_argument('-e', '--exclude', dest='patterns', + type=parse_exclude_pattern, action='append', + metavar="PATTERN", help='exclude paths matching PATTERN') + exclude_group.add_argument('--exclude-from', action=ArgparseExcludeFileAction, + metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') + exclude_group.add_argument('--exclude-caches', dest='exclude_caches', + action='store_true', default=False, + help='exclude directories that contain a CACHEDIR.TAG file (' + 'http://www.brynosaurus.com/cachedir/spec.html)') + exclude_group.add_argument('--exclude-if-present', dest='exclude_if_present', + metavar='NAME', action='append', type=str, + help='exclude directories that are tagged by containing a filesystem object with ' + 'the given NAME') + exclude_group.add_argument('--keep-exclude-tags', '--keep-tag-files', dest='keep_exclude_tags', + action='store_true', default=False, + help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise ' + 'excluded caches/directories') + exclude_group.add_argument('--pattern', + action=ArgparsePatternAction, + metavar="PATTERN", help='include/exclude paths matching PATTERN') + exclude_group.add_argument('--patterns-from', action=ArgparsePatternFileAction, + metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line') + rename_epilog = process_epilog(""" This command renames an archive in the repository. @@ -2365,12 +2427,6 @@ class Archiver: subparser.add_argument('--format', '--list-format', dest='format', type=str, help="""specify format for file listing (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""") - subparser.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', - metavar="PATTERN", help='exclude paths matching PATTERN') - subparser.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', - metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', type=location_validator(), help='repository/archive to list contents of') @@ -2378,6 +2434,30 @@ class Archiver: help='paths to list; patterns are supported') self.add_archives_filters_args(subparser) + exclude_group = subparser.add_argument_group('Exclusion options') + exclude_group.add_argument('-e', '--exclude', dest='patterns', + type=parse_exclude_pattern, action='append', + metavar="PATTERN", help='exclude paths matching PATTERN') + exclude_group.add_argument('--exclude-from', action=ArgparseExcludeFileAction, + metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') + exclude_group.add_argument('--exclude-caches', dest='exclude_caches', + action='store_true', default=False, + help='exclude directories that contain a CACHEDIR.TAG file (' + 'http://www.brynosaurus.com/cachedir/spec.html)') + exclude_group.add_argument('--exclude-if-present', dest='exclude_if_present', + metavar='NAME', action='append', type=str, + help='exclude directories that are tagged by containing a filesystem object with ' + 'the given NAME') + exclude_group.add_argument('--keep-exclude-tags', '--keep-tag-files', dest='keep_exclude_tags', + action='store_true', default=False, + help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise ' + 'excluded caches/directories') + exclude_group.add_argument('--pattern', + action=ArgparsePatternAction, + metavar="PATTERN", help='include/exclude paths matching PATTERN') + exclude_group.add_argument('--patterns-from', action=ArgparsePatternFileAction, + metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line') + mount_epilog = process_epilog(""" This command mounts an archive as a FUSE filesystem. This can be useful for browsing an archive or restoring individual files. Unless the ``--foreground`` @@ -2718,11 +2798,10 @@ class Archiver: help='print statistics at end') exclude_group = subparser.add_argument_group('Exclusion options') - exclude_group.add_argument('-e', '--exclude', dest='excludes', - type=parse_pattern, action='append', + exclude_group.add_argument('-e', '--exclude', dest='patterns', + type=parse_exclude_pattern, action='append', metavar="PATTERN", help='exclude paths matching PATTERN') - exclude_group.add_argument('--exclude-from', dest='exclude_files', - type=argparse.FileType('r'), action='append', + exclude_group.add_argument('--exclude-from', action=ArgparseExcludeFileAction, metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line') exclude_group.add_argument('--exclude-caches', dest='exclude_caches', action='store_true', default=False, @@ -2730,12 +2809,17 @@ class Archiver: 'http://www.brynosaurus.com/cachedir/spec.html)') exclude_group.add_argument('--exclude-if-present', dest='exclude_if_present', metavar='NAME', action='append', type=str, - help='exclude directories that are tagged by containing a filesystem object with \ - the given NAME') + help='exclude directories that are tagged by containing a filesystem object with ' + 'the given NAME') exclude_group.add_argument('--keep-exclude-tags', '--keep-tag-files', dest='keep_exclude_tags', action='store_true', default=False, - help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise \ - excluded caches/directories') + help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise ' + 'excluded caches/directories') + exclude_group.add_argument('--pattern', + action=ArgparsePatternAction, + metavar="PATTERN", help='include/exclude paths matching PATTERN') + exclude_group.add_argument('--patterns-from', action=ArgparsePatternFileAction, + metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line') archive_group = subparser.add_argument_group('Archive options') archive_group.add_argument('--target', dest='target', metavar='TARGET', default=None, @@ -2998,8 +3082,12 @@ class Archiver: # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: args = self.preprocess_args(args) - args = self.parser.parse_args(args or ['-h']) - update_excludes(args) + parser = self.build_parser(self.prog) + args = parser.parse_args(args or ['-h']) + if args.func == self.do_create: + # need at least 1 path but args.paths may also be populated from patterns + if not args.paths: + parser.error('Need at least one PATH argument.') return args def prerun_checks(self, logger): diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 20b0d1341..cf0af1e0e 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -362,21 +362,52 @@ def parse_timestamp(timestamp): return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc) -def load_excludes(fh): - """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on - both line ends are ignored. - """ - return [parse_pattern(pattern) for pattern in clean_lines(fh)] +def parse_add_pattern(patternstr, roots, patterns): + """Parse a pattern string and add it to roots or patterns depending on the pattern type.""" + pattern = parse_inclexcl_pattern(patternstr) + if pattern.ptype is RootPath: + roots.append(pattern.pattern) + else: + patterns.append(pattern) -def update_excludes(args): - """Merge exclude patterns from files with those on command line.""" - if hasattr(args, 'exclude_files') and args.exclude_files: - if not hasattr(args, 'excludes') or args.excludes is None: - args.excludes = [] - for file in args.exclude_files: - args.excludes += load_excludes(file) - file.close() +def load_pattern_file(fileobj, roots, patterns): + for patternstr in clean_lines(fileobj): + parse_add_pattern(patternstr, roots, patterns) + + +def load_exclude_file(fileobj, patterns): + for patternstr in clean_lines(fileobj): + patterns.append(parse_exclude_pattern(patternstr)) + + +class ArgparsePatternAction(argparse.Action): + def __init__(self, nargs=1, **kw): + super().__init__(nargs=nargs, **kw) + + def __call__(self, parser, args, values, option_string=None): + parse_add_pattern(values[0], args.paths, args.patterns) + + +class ArgparsePatternFileAction(argparse.Action): + def __init__(self, nargs=1, **kw): + super().__init__(nargs=nargs, **kw) + + def __call__(self, parser, args, values, option_string=None): + """Load and parse patterns from a file. + Lines empty or starting with '#' after stripping whitespace on both line ends are ignored. + """ + filename = values[0] + with open(filename) as f: + self.parse(f, args) + + def parse(self, fobj, args): + load_pattern_file(fobj, args.roots, args.patterns) + + +class ArgparseExcludeFileAction(ArgparsePatternFileAction): + def parse(self, fobj, args): + load_exclude_file(fobj, args.patterns) class PatternMatcher: @@ -395,6 +426,12 @@ class PatternMatcher: """ self._items.extend((i, value) for i in patterns) + def add_inclexcl(self, patterns): + """Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from + the match function when one of the given patterns matches. + """ + self._items.extend(patterns) + def match(self, path): for (pattern, value) in self._items: if pattern.match(path): @@ -546,6 +583,9 @@ _PATTERN_STYLES = set([ _PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES) +InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype') +RootPath = object() + def parse_pattern(pattern, fallback=FnmatchPattern): """Read pattern from string and return an instance of the appropriate implementation class. @@ -563,6 +603,35 @@ def parse_pattern(pattern, fallback=FnmatchPattern): return cls(pattern) +def parse_exclude_pattern(pattern, fallback=FnmatchPattern): + """Read pattern from string and return an instance of the appropriate implementation class. + """ + epattern = parse_pattern(pattern, fallback) + return InclExclPattern(epattern, False) + + +def parse_inclexcl_pattern(pattern, fallback=ShellPattern): + """Read pattern from string and return a InclExclPattern object.""" + type_prefix_map = { + '-': False, + '+': True, + 'R': RootPath, + 'r': RootPath, + } + try: + ptype = type_prefix_map[pattern[0]] + pattern = pattern[1:].lstrip() + if not pattern: + raise ValueError("Missing pattern!") + except (IndexError, KeyError, ValueError): + raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern)) + if ptype is RootPath: + pobj = pattern + else: + pobj = parse_pattern(pattern, fallback) + return InclExclPattern(pobj, ptype) + + def timestamp(s): """Convert a --timestamp=s argument to a datetime object""" try: diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 5bfaca120..2b0709be4 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -877,6 +877,53 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.mkdir('input/cache3') os.link('input/cache1/%s' % CACHE_TAG_NAME, 'input/cache3/%s' % CACHE_TAG_NAME) + def test_create_without_root(self): + """test create without a root""" + self.cmd('init', self.repository_location) + args = ['create', self.repository_location + '::test'] + if self.FORK_DEFAULT: + self.cmd(*args, exit_code=2) + else: + self.assert_raises(SystemExit, lambda: self.cmd(*args)) + + def test_create_pattern_root(self): + """test create with only a root pattern""" + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + output = self.cmd('create', '-v', '--list', '--pattern=R input', self.repository_location + '::test') + self.assert_in("A input/file1", output) + self.assert_in("A input/file2", output) + + def test_create_pattern(self): + """test file patterns during create""" + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + self.create_regular_file('file_important', size=1024 * 80) + output = self.cmd('create', '-v', '--list', + '--pattern=+input/file_important', '--pattern=-input/file*', + self.repository_location + '::test', 'input') + self.assert_in("A input/file_important", output) + self.assert_in("A input/file_important", output) + self.assert_in('x input/file1', output) + self.assert_in('x input/file2', output) + + def test_extract_pattern_opt(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + self.create_regular_file('file_important', size=1024 * 80) + self.cmd('create', self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', + '--pattern=+input/file_important', '--pattern=-input/file*', + self.repository_location + '::test') + self.assert_equal(sorted(os.listdir('output/input')), ['file_important']) + + def test_exclude_caches(self): + self.cmd('init', self.repository_location) + def _assert_test_caches(self): with changedir('output'): self.cmd('extract', self.repository_location + '::test') diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index 49f32dfd4..8dca6a392 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -1,11 +1,12 @@ +import argparse import hashlib -import logging import os import sys from datetime import datetime, timezone, timedelta from time import mktime, strptime, sleep import pytest + import msgpack import msgpack.fallback @@ -21,7 +22,7 @@ from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH from ..helpers import StableDict, bin_to_hex from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless -from ..helpers import load_excludes +from ..helpers import load_exclude_file, load_pattern_file from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2 from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern from ..helpers import swidth_slice @@ -431,8 +432,13 @@ def test_invalid_unicode_pattern(pattern): (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]), (["pp:aaabbb"], None), (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]), + (["/nomatch", "/more/*"], + ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), + # the order of exclude patterns shouldn't matter + (["/more/*", "/nomatch"], + ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']), ]) -def test_patterns_from_file(tmpdir, lines, expected): +def test_exclude_patterns_from_file(tmpdir, lines, expected): files = [ '/data/something00.txt', '/more/data', '/home', ' #/wsfoobar', @@ -441,8 +447,10 @@ def test_patterns_from_file(tmpdir, lines, expected): ] def evaluate(filename): + patterns = [] + load_exclude_file(open(filename, "rt"), patterns) matcher = PatternMatcher(fallback=True) - matcher.add(load_excludes(open(filename, "rt")), False) + matcher.add_inclexcl(patterns) return [path for path in files if matcher.match(path)] exclfile = tmpdir.join("exclude.txt") @@ -453,6 +461,104 @@ def test_patterns_from_file(tmpdir, lines, expected): assert evaluate(str(exclfile)) == (files if expected is None else expected) +@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [ + # "None" means all files, i.e. none excluded + ([], [], 0), + (["# Comment only"], [], 0), + (["- *"], [], 1), + (["+fm:*/something00.txt", + "-/data"], [], 2), + (["R /"], ["/"], 0), + (["R /", + "# comment"], ["/"], 0), + (["# comment", + "- /data", + "R /home"], ["/home"], 1), +]) +def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns): + def evaluate(filename): + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + return roots, len(inclexclpatterns) + patternfile = tmpdir.join("patterns.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + roots, numpatterns = evaluate(str(patternfile)) + assert roots == expected_roots + assert numpatterns == expected_numpatterns + + +@pytest.mark.parametrize("lines", [ + (["X /data"]), # illegal pattern type prefix + (["/data"]), # need a pattern type prefix +]) +def test_load_invalid_patterns_from_file(tmpdir, lines): + patternfile = tmpdir.join("patterns.txt") + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + filename = str(patternfile) + with pytest.raises(argparse.ArgumentTypeError): + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + + +@pytest.mark.parametrize("lines, expected", [ + # "None" means all files, i.e. none excluded + ([], None), + (["# Comment only"], None), + (["- *"], []), + # default match type is sh: for patterns -> * doesn't match a / + (["-*/something0?.txt"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something00.txt"], + ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["-fm:*/something0?.txt"], + ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+/*/something0?.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + (["+fm:*/something00.txt", + "-/data"], + ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']), + # include /home/leo and exclude the rest of /home: + (["+/home/leo", + "-/home/*"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), + # wrong order, /home/leo is already excluded by -/home/*: + (["-/home/*", + "+/home/leo"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']), + (["+fm:/home/leo", + "-/home/"], + ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']), +]) +def test_inclexcl_patterns_from_file(tmpdir, lines, expected): + files = [ + '/data', '/data/something00.txt', '/data/subdir/something01.txt', + '/home', '/home/leo', '/home/leo/t', '/home/other' + ] + + def evaluate(filename): + matcher = PatternMatcher(fallback=True) + roots = [] + inclexclpatterns = [] + load_pattern_file(open(filename, "rt"), roots, inclexclpatterns) + matcher.add_inclexcl(inclexclpatterns) + return [path for path in files if matcher.match(path)] + + patternfile = tmpdir.join("patterns.txt") + + with patternfile.open("wt") as fh: + fh.write("\n".join(lines)) + + assert evaluate(str(patternfile)) == (files if expected is None else expected) + + @pytest.mark.parametrize("pattern, cls", [ ("", FnmatchPattern),