add two new options --pattern and --patterns-from as discussed in #1406

# Conflicts:
#	src/borg/archiver.py
#	src/borg/helpers.py
#	src/borg/testsuite/helpers.py

Original-Commit: 876b670d
This commit is contained in:
Alexander 'Leo' Bergolth 2016-08-02 16:02:02 +02:00 committed by Marian Beermann
parent 8d432b01e1
commit 30a5c5e44b
4 changed files with 378 additions and 68 deletions

View File

@ -44,7 +44,8 @@ from .helpers import to_localtime, timestamp
from .helpers import get_cache_dir
from .helpers import Manifest
from .helpers import StableDict
from .helpers import update_excludes, check_extension_modules
from .helpers import check_extension_modules
from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
from .helpers import log_multi
from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
@ -128,7 +129,7 @@ class Archiver:
def __init__(self, lock_wait=None, prog=None):
self.exit_code = EXIT_SUCCESS
self.lock_wait = lock_wait
self.parser = self.build_parser(prog)
self.prog = prog
def print_error(self, msg, *args):
msg = args and msg % args or msg
@ -172,10 +173,10 @@ class Archiver:
bi += slicelen
@staticmethod
def build_matcher(excludes, paths):
def build_matcher(inclexcl_patterns, paths):
matcher = PatternMatcher()
if excludes:
matcher.add(excludes, False)
if inclexcl_patterns:
matcher.add_inclexcl(inclexcl_patterns)
include_patterns = []
if paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths)
@ -316,8 +317,7 @@ class Archiver:
def do_create(self, args, repository, manifest=None, key=None):
"""Create new archive"""
matcher = PatternMatcher(fallback=True)
if args.excludes:
matcher.add(args.excludes, False)
matcher.add_inclexcl(args.patterns)
def create_inner(archive, cache):
# Add cache dir to inode_skip list
@ -523,7 +523,7 @@ class Archiver:
if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )):
logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8')
matcher, include_patterns = self.build_matcher(args.excludes, args.paths)
matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
progress = args.progress
output_list = args.output_list
@ -793,7 +793,7 @@ class Archiver:
'If you know for certain that they are the same, pass --same-chunker-params '
'to override this check.')
matcher, include_patterns = self.build_matcher(args.excludes, args.paths)
matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
compare_archives(archive1, archive2, matcher)
@ -927,7 +927,7 @@ class Archiver:
return self._list_repository(args, manifest, write)
def _list_archive(self, args, repository, manifest, key, write):
matcher, _ = self.build_matcher(args.excludes, args.paths)
matcher, _ = self.build_matcher(args.patterns, args.paths)
with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
consider_part_files=args.consider_part_files)
@ -1157,7 +1157,7 @@ class Archiver:
env_var_override='BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING'):
return EXIT_ERROR
matcher, include_patterns = self.build_matcher(args.excludes, args.paths)
matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
self.output_list = args.output_list
self.output_filter = args.output_filter
@ -1401,8 +1401,9 @@ class Archiver:
helptext = collections.OrderedDict()
helptext['patterns'] = textwrap.dedent('''
Exclusion patterns support four separate styles, fnmatch, shell, regular
expressions and path prefixes. By default, fnmatch is used. If followed
File patterns support four separate styles: fnmatch, shell, regular
expressions and path prefixes. By default, fnmatch is used for
`--exclude` patterns and shell-style is used for `--pattern`. If followed
by a colon (':') the first two characters of a pattern are used as a
style selector. Explicit style selection is necessary when a
non-default style is desired or when the desired pattern starts with
@ -1410,12 +1411,12 @@ class Archiver:
`Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
This is the default style. These patterns use a variant of shell
pattern syntax, with '*' matching any number of characters, '?'
matching any single character, '[...]' matching any single
character specified, including ranges, and '[!...]' matching any
character not specified. For the purpose of these patterns, the
path separator ('\\' for Windows and '/' on other systems) is not
This is the default style for --exclude and --exclude-from.
These patterns use a variant of shell pattern syntax, with '*' matching
any number of characters, '?' matching any single character, '[...]'
matching any single character specified, including ranges, and '[!...]'
matching any character not specified. For the purpose of these patterns,
the path separator ('\\' for Windows and '/' on other systems) is not
treated specially. Wrap meta-characters in brackets for a literal
match (i.e. `[?]` to match the literal character `?`). For a path
to match a pattern, it must completely match from start to end, or
@ -1426,6 +1427,7 @@ class Archiver:
Shell-style patterns, selector `sh:`
This is the default style for --pattern and --patterns-from.
Like fnmatch patterns these are similar to shell patterns. The difference
is that the pattern may include `**/` for matching zero or more directory
levels, `*` for matching zero or more arbitrary characters with the
@ -1486,7 +1488,39 @@ class Archiver:
re:^/home/[^/]\.tmp/
sh:/home/*/.thumbnails
EOF
$ borg create --exclude-from exclude.txt backup /\n\n''')
$ borg create --exclude-from exclude.txt backup /
A more general and easier to use way to define filename matching patterns exists
with the `--pattern` and `--patterns-from` options. Using these, you may specify
the backup roots (starting points) and patterns for inclusion/exclusion. A
root path starts with the prefix `R`, followed by a path (a plain path, not a
file pattern). An include rule starts with the prefix +, an exclude rule starts
with the prefix -, both followed by a pattern.
Inclusion patterns are useful to include pathes that are contained in an excluded
path. The first matching pattern is used so if an include pattern matches before
an exclude pattern, the file is backed up.
Note that the default pattern style for `--pattern` and `--patterns-from` is
shell style (`sh:`), so those patterns behave similar to rsync include/exclude
patterns.
Patterns (`--pattern`) and excludes (`--exclude`) from the command line are
considered first (in the order of appearance). Then patterns from `--patterns-from`
are added. Exclusion patterns from `--exclude-from` files are appended last.
An example `--patterns-from` file could look like that::
R /
# can be rebuild
- /home/*/.cache
# they're downloads for a reason
- /home/*/Downloads
# susan is a nice person
# include susans home
+ /home/susan
# don't backup the other home directories
- /home/*\n\n''')
helptext['placeholders'] = textwrap.dedent('''
Repository (or Archive) URLs, --prefix and --remote-path values support these
placeholders:
@ -1717,6 +1751,9 @@ class Archiver:
help='show version number and exit')
subparsers = parser.add_subparsers(title='required arguments', metavar='<command>')
# some empty defaults for all subparsers
common_parser.set_defaults(paths=[], patterns=[])
serve_epilog = process_epilog("""
This command starts a repository server process. This command is usually not used manually.
""")
@ -2114,11 +2151,10 @@ class Archiver:
help='only display items with the given status characters')
exclude_group = subparser.add_argument_group('Exclusion options')
exclude_group.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
exclude_group.add_argument('-e', '--exclude', dest='patterns',
type=parse_exclude_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
exclude_group.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
exclude_group.add_argument('--exclude-from', action=ArgparseExcludeFileAction,
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
exclude_group.add_argument('--exclude-caches', dest='exclude_caches',
action='store_true', default=False,
@ -2132,6 +2168,11 @@ class Archiver:
action='store_true', default=False,
help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise '
'excluded caches/directories')
exclude_group.add_argument('--pattern',
action=ArgparsePatternAction,
metavar="PATTERN", help='include/exclude paths matching PATTERN')
exclude_group.add_argument('--patterns-from', action=ArgparsePatternFileAction,
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
fs_group = subparser.add_argument_group('Filesystem options')
fs_group.add_argument('-x', '--one-file-system', dest='one_file_system',
@ -2183,7 +2224,7 @@ class Archiver:
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='name of archive to create (must be also a valid directory name)')
subparser.add_argument('paths', metavar='PATH', nargs='+', type=str,
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to archive')
extract_epilog = process_epilog("""
@ -2213,12 +2254,15 @@ class Archiver:
subparser.add_argument('-n', '--dry-run', dest='dry_run',
default=False, action='store_true',
help='do not actually change any files')
subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
subparser.add_argument('-e', '--exclude', dest='patterns',
type=parse_exclude_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
subparser.add_argument('--exclude-from', action=ArgparseExcludeFileAction,
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('--pattern', action=ArgparsePatternAction,
metavar="PATTERN", help='include/exclude paths matching PATTERN')
subparser.add_argument('--patterns-from', action=ArgparsePatternFileAction,
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
subparser.add_argument('--numeric-owner', dest='numeric_owner',
action='store_true', default=False,
help='only obey numeric user and group identifiers')
@ -2261,12 +2305,6 @@ class Archiver:
formatter_class=argparse.RawDescriptionHelpFormatter,
help='find differences in archive contents')
subparser.set_defaults(func=self.do_diff)
subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('--numeric-owner', dest='numeric_owner',
action='store_true', default=False,
help='only consider numeric user and group identifiers')
@ -2285,6 +2323,30 @@ class Archiver:
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths of items inside the archives to compare; patterns are supported')
exclude_group = subparser.add_argument_group('Exclusion options')
exclude_group.add_argument('-e', '--exclude', dest='patterns',
type=parse_exclude_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
exclude_group.add_argument('--exclude-from', action=ArgparseExcludeFileAction,
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
exclude_group.add_argument('--exclude-caches', dest='exclude_caches',
action='store_true', default=False,
help='exclude directories that contain a CACHEDIR.TAG file ('
'http://www.brynosaurus.com/cachedir/spec.html)')
exclude_group.add_argument('--exclude-if-present', dest='exclude_if_present',
metavar='NAME', action='append', type=str,
help='exclude directories that are tagged by containing a filesystem object with '
'the given NAME')
exclude_group.add_argument('--keep-exclude-tags', '--keep-tag-files', dest='keep_exclude_tags',
action='store_true', default=False,
help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise '
'excluded caches/directories')
exclude_group.add_argument('--pattern',
action=ArgparsePatternAction,
metavar="PATTERN", help='include/exclude paths matching PATTERN')
exclude_group.add_argument('--patterns-from', action=ArgparsePatternFileAction,
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
rename_epilog = process_epilog("""
This command renames an archive in the repository.
@ -2365,12 +2427,6 @@ class Archiver:
subparser.add_argument('--format', '--list-format', dest='format', type=str,
help="""specify format for file listing
(default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
type=location_validator(),
help='repository/archive to list contents of')
@ -2378,6 +2434,30 @@ class Archiver:
help='paths to list; patterns are supported')
self.add_archives_filters_args(subparser)
exclude_group = subparser.add_argument_group('Exclusion options')
exclude_group.add_argument('-e', '--exclude', dest='patterns',
type=parse_exclude_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
exclude_group.add_argument('--exclude-from', action=ArgparseExcludeFileAction,
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
exclude_group.add_argument('--exclude-caches', dest='exclude_caches',
action='store_true', default=False,
help='exclude directories that contain a CACHEDIR.TAG file ('
'http://www.brynosaurus.com/cachedir/spec.html)')
exclude_group.add_argument('--exclude-if-present', dest='exclude_if_present',
metavar='NAME', action='append', type=str,
help='exclude directories that are tagged by containing a filesystem object with '
'the given NAME')
exclude_group.add_argument('--keep-exclude-tags', '--keep-tag-files', dest='keep_exclude_tags',
action='store_true', default=False,
help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise '
'excluded caches/directories')
exclude_group.add_argument('--pattern',
action=ArgparsePatternAction,
metavar="PATTERN", help='include/exclude paths matching PATTERN')
exclude_group.add_argument('--patterns-from', action=ArgparsePatternFileAction,
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
mount_epilog = process_epilog("""
This command mounts an archive as a FUSE filesystem. This can be useful for
browsing an archive or restoring individual files. Unless the ``--foreground``
@ -2718,11 +2798,10 @@ class Archiver:
help='print statistics at end')
exclude_group = subparser.add_argument_group('Exclusion options')
exclude_group.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
exclude_group.add_argument('-e', '--exclude', dest='patterns',
type=parse_exclude_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
exclude_group.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
exclude_group.add_argument('--exclude-from', action=ArgparseExcludeFileAction,
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
exclude_group.add_argument('--exclude-caches', dest='exclude_caches',
action='store_true', default=False,
@ -2730,12 +2809,17 @@ class Archiver:
'http://www.brynosaurus.com/cachedir/spec.html)')
exclude_group.add_argument('--exclude-if-present', dest='exclude_if_present',
metavar='NAME', action='append', type=str,
help='exclude directories that are tagged by containing a filesystem object with \
the given NAME')
help='exclude directories that are tagged by containing a filesystem object with '
'the given NAME')
exclude_group.add_argument('--keep-exclude-tags', '--keep-tag-files', dest='keep_exclude_tags',
action='store_true', default=False,
help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise \
excluded caches/directories')
help='keep tag objects (i.e.: arguments to --exclude-if-present) in otherwise '
'excluded caches/directories')
exclude_group.add_argument('--pattern',
action=ArgparsePatternAction,
metavar="PATTERN", help='include/exclude paths matching PATTERN')
exclude_group.add_argument('--patterns-from', action=ArgparsePatternFileAction,
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
archive_group = subparser.add_argument_group('Archive options')
archive_group.add_argument('--target', dest='target', metavar='TARGET', default=None,
@ -2998,8 +3082,12 @@ class Archiver:
# We can't use argparse for "serve" since we don't want it to show up in "Available commands"
if args:
args = self.preprocess_args(args)
args = self.parser.parse_args(args or ['-h'])
update_excludes(args)
parser = self.build_parser(self.prog)
args = parser.parse_args(args or ['-h'])
if args.func == self.do_create:
# need at least 1 path but args.paths may also be populated from patterns
if not args.paths:
parser.error('Need at least one PATH argument.')
return args
def prerun_checks(self, logger):

View File

@ -362,21 +362,52 @@ def parse_timestamp(timestamp):
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)
def load_excludes(fh):
"""Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on
both line ends are ignored.
"""
return [parse_pattern(pattern) for pattern in clean_lines(fh)]
def parse_add_pattern(patternstr, roots, patterns):
"""Parse a pattern string and add it to roots or patterns depending on the pattern type."""
pattern = parse_inclexcl_pattern(patternstr)
if pattern.ptype is RootPath:
roots.append(pattern.pattern)
else:
patterns.append(pattern)
def update_excludes(args):
"""Merge exclude patterns from files with those on command line."""
if hasattr(args, 'exclude_files') and args.exclude_files:
if not hasattr(args, 'excludes') or args.excludes is None:
args.excludes = []
for file in args.exclude_files:
args.excludes += load_excludes(file)
file.close()
def load_pattern_file(fileobj, roots, patterns):
for patternstr in clean_lines(fileobj):
parse_add_pattern(patternstr, roots, patterns)
def load_exclude_file(fileobj, patterns):
for patternstr in clean_lines(fileobj):
patterns.append(parse_exclude_pattern(patternstr))
class ArgparsePatternAction(argparse.Action):
def __init__(self, nargs=1, **kw):
super().__init__(nargs=nargs, **kw)
def __call__(self, parser, args, values, option_string=None):
parse_add_pattern(values[0], args.paths, args.patterns)
class ArgparsePatternFileAction(argparse.Action):
def __init__(self, nargs=1, **kw):
super().__init__(nargs=nargs, **kw)
def __call__(self, parser, args, values, option_string=None):
"""Load and parse patterns from a file.
Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
"""
filename = values[0]
with open(filename) as f:
self.parse(f, args)
def parse(self, fobj, args):
load_pattern_file(fobj, args.roots, args.patterns)
class ArgparseExcludeFileAction(ArgparsePatternFileAction):
def parse(self, fobj, args):
load_exclude_file(fobj, args.patterns)
class PatternMatcher:
@ -395,6 +426,12 @@ class PatternMatcher:
"""
self._items.extend((i, value) for i in patterns)
def add_inclexcl(self, patterns):
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
the match function when one of the given patterns matches.
"""
self._items.extend(patterns)
def match(self, path):
for (pattern, value) in self._items:
if pattern.match(path):
@ -546,6 +583,9 @@ _PATTERN_STYLES = set([
_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES)
InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype')
RootPath = object()
def parse_pattern(pattern, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
@ -563,6 +603,35 @@ def parse_pattern(pattern, fallback=FnmatchPattern):
return cls(pattern)
def parse_exclude_pattern(pattern, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
epattern = parse_pattern(pattern, fallback)
return InclExclPattern(epattern, False)
def parse_inclexcl_pattern(pattern, fallback=ShellPattern):
"""Read pattern from string and return a InclExclPattern object."""
type_prefix_map = {
'-': False,
'+': True,
'R': RootPath,
'r': RootPath,
}
try:
ptype = type_prefix_map[pattern[0]]
pattern = pattern[1:].lstrip()
if not pattern:
raise ValueError("Missing pattern!")
except (IndexError, KeyError, ValueError):
raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern))
if ptype is RootPath:
pobj = pattern
else:
pobj = parse_pattern(pattern, fallback)
return InclExclPattern(pobj, ptype)
def timestamp(s):
"""Convert a --timestamp=s argument to a datetime object"""
try:

View File

@ -877,6 +877,53 @@ class ArchiverTestCase(ArchiverTestCaseBase):
os.mkdir('input/cache3')
os.link('input/cache1/%s' % CACHE_TAG_NAME, 'input/cache3/%s' % CACHE_TAG_NAME)
def test_create_without_root(self):
"""test create without a root"""
self.cmd('init', self.repository_location)
args = ['create', self.repository_location + '::test']
if self.FORK_DEFAULT:
self.cmd(*args, exit_code=2)
else:
self.assert_raises(SystemExit, lambda: self.cmd(*args))
def test_create_pattern_root(self):
"""test create with only a root pattern"""
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('file2', size=1024 * 80)
output = self.cmd('create', '-v', '--list', '--pattern=R input', self.repository_location + '::test')
self.assert_in("A input/file1", output)
self.assert_in("A input/file2", output)
def test_create_pattern(self):
"""test file patterns during create"""
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('file2', size=1024 * 80)
self.create_regular_file('file_important', size=1024 * 80)
output = self.cmd('create', '-v', '--list',
'--pattern=+input/file_important', '--pattern=-input/file*',
self.repository_location + '::test', 'input')
self.assert_in("A input/file_important", output)
self.assert_in("A input/file_important", output)
self.assert_in('x input/file1', output)
self.assert_in('x input/file2', output)
def test_extract_pattern_opt(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('file2', size=1024 * 80)
self.create_regular_file('file_important', size=1024 * 80)
self.cmd('create', self.repository_location + '::test', 'input')
with changedir('output'):
self.cmd('extract',
'--pattern=+input/file_important', '--pattern=-input/file*',
self.repository_location + '::test')
self.assert_equal(sorted(os.listdir('output/input')), ['file_important'])
def test_exclude_caches(self):
self.cmd('init', self.repository_location)
def _assert_test_caches(self):
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')

View File

@ -1,11 +1,12 @@
import argparse
import hashlib
import logging
import os
import sys
from datetime import datetime, timezone, timedelta
from time import mktime, strptime, sleep
import pytest
import msgpack
import msgpack.fallback
@ -21,7 +22,7 @@ from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
from ..helpers import StableDict, bin_to_hex
from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
from ..helpers import load_excludes
from ..helpers import load_exclude_file, load_pattern_file
from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2
from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
from ..helpers import swidth_slice
@ -431,8 +432,13 @@ def test_invalid_unicode_pattern(pattern):
(["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
(["pp:aaabbb"], None),
(["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
(["/nomatch", "/more/*"],
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
# the order of exclude patterns shouldn't matter
(["/more/*", "/nomatch"],
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
])
def test_patterns_from_file(tmpdir, lines, expected):
def test_exclude_patterns_from_file(tmpdir, lines, expected):
files = [
'/data/something00.txt', '/more/data', '/home',
' #/wsfoobar',
@ -441,8 +447,10 @@ def test_patterns_from_file(tmpdir, lines, expected):
]
def evaluate(filename):
patterns = []
load_exclude_file(open(filename, "rt"), patterns)
matcher = PatternMatcher(fallback=True)
matcher.add(load_excludes(open(filename, "rt")), False)
matcher.add_inclexcl(patterns)
return [path for path in files if matcher.match(path)]
exclfile = tmpdir.join("exclude.txt")
@ -453,6 +461,104 @@ def test_patterns_from_file(tmpdir, lines, expected):
assert evaluate(str(exclfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
# "None" means all files, i.e. none excluded
([], [], 0),
(["# Comment only"], [], 0),
(["- *"], [], 1),
(["+fm:*/something00.txt",
"-/data"], [], 2),
(["R /"], ["/"], 0),
(["R /",
"# comment"], ["/"], 0),
(["# comment",
"- /data",
"R /home"], ["/home"], 1),
])
def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
def evaluate(filename):
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
return roots, len(inclexclpatterns)
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
roots, numpatterns = evaluate(str(patternfile))
assert roots == expected_roots
assert numpatterns == expected_numpatterns
@pytest.mark.parametrize("lines", [
(["X /data"]), # illegal pattern type prefix
(["/data"]), # need a pattern type prefix
])
def test_load_invalid_patterns_from_file(tmpdir, lines):
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
filename = str(patternfile)
with pytest.raises(argparse.ArgumentTypeError):
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
@pytest.mark.parametrize("lines, expected", [
# "None" means all files, i.e. none excluded
([], None),
(["# Comment only"], None),
(["- *"], []),
# default match type is sh: for patterns -> * doesn't match a /
(["-*/something0?.txt"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something00.txt"],
['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something0?.txt"],
["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+/*/something0?.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+fm:*/something00.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
# include /home/leo and exclude the rest of /home:
(["+/home/leo",
"-/home/*"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
# wrong order, /home/leo is already excluded by -/home/*:
(["-/home/*",
"+/home/leo"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
(["+fm:/home/leo",
"-/home/"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
])
def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
files = [
'/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other'
]
def evaluate(filename):
matcher = PatternMatcher(fallback=True)
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
matcher.add_inclexcl(inclexclpatterns)
return [path for path in files if matcher.match(path)]
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
assert evaluate(str(patternfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("pattern, cls", [
("", FnmatchPattern),