Merge pull request #2322 from edgimar/master

allow excluding parent and including child, fixes #2314
This commit is contained in:
TW 2017-04-23 12:58:10 +02:00 committed by GitHub
commit 6f47b797f9
5 changed files with 219 additions and 100 deletions

View File

@ -36,7 +36,7 @@ from .helpers import StableDict
from .helpers import bin_to_hex
from .helpers import safe_ns
from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
from .helpers import PathPrefixPattern, FnmatchPattern
from .helpers import PathPrefixPattern, FnmatchPattern, IECommand
from .item import Item, ArchiveItem
from .key import key_factory
from .platform import acl_get, acl_set, set_flags, get_flags, swidth
@ -1721,10 +1721,10 @@ class ArchiveRecreater:
"""Add excludes to the matcher created by exclude_cache and exclude_if_present."""
def exclude(dir, tag_item):
if self.keep_exclude_tags:
tag_files.append(PathPrefixPattern(tag_item.path))
tagged_dirs.append(FnmatchPattern(dir + '/'))
tag_files.append(PathPrefixPattern(tag_item.path, recurse_dir=False))
tagged_dirs.append(FnmatchPattern(dir + '/', recurse_dir=False))
else:
tagged_dirs.append(PathPrefixPattern(dir))
tagged_dirs.append(PathPrefixPattern(dir, recurse_dir=False))
matcher = self.matcher
tag_files = []
@ -1747,8 +1747,8 @@ class ArchiveRecreater:
file = open_item(archive, cachedir_masters[item.source])
if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
exclude(dir, item)
matcher.add(tag_files, True)
matcher.add(tagged_dirs, False)
matcher.add(tag_files, IECommand.Include)
matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse)
def create_target(self, archive, target_name=None):
"""Create target archive."""

View File

@ -54,7 +54,7 @@ from .helpers import check_extension_modules
from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
from .helpers import log_multi
from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
from .helpers import PatternMatcher
from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
from .helpers import ErrorIgnoringTextIOWrapper
from .helpers import ProgressIndicatorPercent
@ -190,16 +190,11 @@ class Archiver:
bi += slicelen
@staticmethod
def build_matcher(inclexcl_patterns, paths):
def build_matcher(inclexcl_patterns, include_paths):
matcher = PatternMatcher()
if inclexcl_patterns:
matcher.add_inclexcl(inclexcl_patterns)
include_patterns = []
if paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths)
matcher.add(include_patterns, True)
matcher.fallback = not include_patterns
return matcher, include_patterns
matcher.add_inclexcl(inclexcl_patterns)
matcher.add_includepaths(include_paths)
return matcher
def do_serve(self, args):
"""Start in server mode. This command is usually not used manually."""
@ -493,13 +488,20 @@ class Archiver:
This should only raise on critical errors. Per-item errors must be handled within this method.
"""
if st is None:
with backup_io('stat'):
st = os.lstat(path)
recurse_excluded_dir = False
if not matcher.match(path):
self.print_file_status('x', path)
return
if stat.S_ISDIR(st.st_mode) and matcher.recurse_dir:
recurse_excluded_dir = True
else:
return
try:
if st is None:
with backup_io('stat'):
st = os.lstat(path)
if (st.st_ino, st.st_dev) in skip_inodes:
return
# if restrict_dev is given, we do not want to recurse into a new filesystem,
@ -527,7 +529,8 @@ class Archiver:
read_special=read_special, dry_run=dry_run)
return
if not dry_run:
status = archive.process_dir(path, st)
if not recurse_excluded_dir:
status = archive.process_dir(path, st)
if recurse:
with backup_io('scandir'):
entries = helpers.scandir_inorder(path)
@ -590,7 +593,9 @@ class Archiver:
status = '?' # need to add a status code somewhere
else:
status = '-' # dry run, item was not backed up
self.print_file_status(status, path)
if not recurse_excluded_dir:
self.print_file_status(status, path)
@staticmethod
def build_filter(matcher, peek_and_store_hardlink_masters, strip_components):
@ -616,7 +621,7 @@ class Archiver:
if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )):
logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8')
matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
matcher = self.build_matcher(args.patterns, args.paths)
progress = args.progress
output_list = args.output_list
@ -681,9 +686,8 @@ class Archiver:
archive.extract_item(dir_item)
except BackupOSError as e:
self.print_warning('%s: %s', remove_surrogates(dir_item.path), e)
for pattern in include_patterns:
if pattern.match_count == 0:
self.print_warning("Include pattern '%s' never matched.", pattern)
for pattern in matcher.get_unmatched_include_patterns():
self.print_warning("Include pattern '%s' never matched.", pattern)
if pi:
# clear progress output
pi.finish()
@ -893,13 +897,13 @@ class Archiver:
'If you know for certain that they are the same, pass --same-chunker-params '
'to override this check.')
matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
matcher = self.build_matcher(args.patterns, args.paths)
compare_archives(archive1, archive2, matcher)
for pattern in include_patterns:
if pattern.match_count == 0:
self.print_warning("Include pattern '%s' never matched.", pattern)
for pattern in matcher.get_unmatched_include_patterns():
self.print_warning("Include pattern '%s' never matched.", pattern)
return self.exit_code
@with_repository(exclusive=True, cache=True)
@ -1048,7 +1052,7 @@ class Archiver:
return self._list_repository(args, manifest, write)
def _list_archive(self, args, repository, manifest, key, write):
matcher, _ = self.build_matcher(args.patterns, args.paths)
matcher = self.build_matcher(args.patterns, args.paths)
if args.format is not None:
format = args.format
elif args.short:
@ -1330,7 +1334,7 @@ class Archiver:
env_var_override='BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING'):
return EXIT_ERROR
matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
matcher = self.build_matcher(args.patterns, args.paths)
self.output_list = args.output_list
self.output_filter = args.output_filter
recompress = args.recompress != 'never'

View File

@ -23,6 +23,7 @@ import uuid
from binascii import hexlify
from collections import namedtuple, deque, abc, Counter
from datetime import datetime, timezone, timedelta
from enum import Enum
from fnmatch import translate
from functools import wraps, partial, lru_cache
from itertools import islice
@ -388,23 +389,24 @@ def parse_timestamp(timestamp):
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)
def parse_add_pattern(patternstr, roots, patterns, fallback):
"""Parse a pattern string and add it to roots or patterns depending on the pattern type."""
pattern = parse_inclexcl_pattern(patternstr, fallback=fallback)
if pattern.ptype is RootPath:
roots.append(pattern.pattern)
elif pattern.ptype is PatternStyle:
fallback = pattern.pattern
def parse_patternfile_line(line, roots, ie_commands, fallback):
"""Parse a pattern-file line and act depending on which command it represents."""
ie_command = parse_inclexcl_command(line, fallback=fallback)
if ie_command.cmd is IECommand.RootPath:
roots.append(ie_command.val)
elif ie_command.cmd is IECommand.PatternStyle:
fallback = ie_command.val
else:
patterns.append(pattern)
# it is some kind of include/exclude command
ie_commands.append(ie_command)
return fallback
def load_pattern_file(fileobj, roots, patterns, fallback=None):
def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
if fallback is None:
fallback = ShellPattern # ShellPattern is defined later in this module
for patternstr in clean_lines(fileobj):
fallback = parse_add_pattern(patternstr, roots, patterns, fallback)
for line in clean_lines(fileobj):
fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
def load_exclude_file(fileobj, patterns):
@ -417,7 +419,7 @@ class ArgparsePatternAction(argparse.Action):
super().__init__(nargs=nargs, **kw)
def __call__(self, parser, args, values, option_string=None):
parse_add_pattern(values[0], args.paths, args.patterns, ShellPattern)
parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
class ArgparsePatternFileAction(argparse.Action):
@ -442,6 +444,11 @@ class ArgparseExcludeFileAction(ArgparsePatternFileAction):
class PatternMatcher:
"""Represents a collection of pattern objects to match paths against.
*fallback* is a boolean value that *match()* returns if no matching patterns are found.
"""
def __init__(self, fallback=None):
self._items = []
@ -451,42 +458,88 @@ class PatternMatcher:
# optimizations
self._path_full_patterns = {} # full path -> return value
# indicates whether the last match() call ended on a pattern for which
# we should recurse into any matching folder. Will be set to True or
# False when calling match().
self.recurse_dir = None
# whether to recurse into directories when no match is found
# TODO: allow modification as a config option?
self.recurse_dir_default = True
self.include_patterns = []
# TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
self.is_include_cmd = {
IECommand.Exclude: False,
IECommand.ExcludeNoRecurse: False,
IECommand.Include: True
}
def empty(self):
return not len(self._items) and not len(self._path_full_patterns)
def _add(self, pattern, value):
def _add(self, pattern, cmd):
"""*cmd* is an IECommand value.
"""
if isinstance(pattern, PathFullPattern):
key = pattern.pattern # full, normalized path
self._path_full_patterns[key] = value
self._path_full_patterns[key] = cmd
else:
self._items.append((pattern, value))
self._items.append((pattern, cmd))
def add(self, patterns, value):
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
given patterns matches.
def add(self, patterns, cmd):
"""Add list of patterns to internal list. *cmd* indicates whether the
pattern is an include/exclude pattern, and whether recursion should be
done on excluded folders.
"""
for pattern in patterns:
self._add(pattern, value)
self._add(pattern, cmd)
def add_includepaths(self, include_paths):
"""Used to add inclusion-paths from args.paths (from commandline).
"""
include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
self.add(include_patterns, IECommand.Include)
self.fallback = not include_patterns
self.include_patterns = include_patterns
def get_unmatched_include_patterns(self):
"Note that this only returns patterns added via *add_includepaths*."
return [p for p in self.include_patterns if p.match_count == 0]
def add_inclexcl(self, patterns):
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
the match function when one of the given patterns matches.
"""Add list of patterns (of type CmdTuple) to internal list.
"""
for pattern, pattern_type in patterns:
self._add(pattern, pattern_type)
for pattern, cmd in patterns:
self._add(pattern, cmd)
def match(self, path):
"""Return True or False depending on whether *path* is matched.
If no match is found among the patterns in this matcher, then the value
in self.fallback is returned (defaults to None).
"""
path = normalize_path(path)
# do a fast lookup for full path matches (note: we do not count such matches):
non_existent = object()
value = self._path_full_patterns.get(path, non_existent)
if value is not non_existent:
# we have a full path match!
# TODO: get from pattern; don't hard-code
self.recurse_dir = True
return value
# this is the slow way, if we have many patterns in self._items:
for (pattern, value) in self._items:
for (pattern, cmd) in self._items:
if pattern.match(path, normalize=False):
return value
self.recurse_dir = pattern.recurse_dir
return self.is_include_cmd[cmd]
# by default we will recurse if there is no match
self.recurse_dir = self.recurse_dir_default
return self.fallback
@ -502,14 +555,15 @@ class PatternBase:
"""
PREFIX = NotImplemented
def __init__(self, pattern):
def __init__(self, pattern, recurse_dir=False):
self.pattern_orig = pattern
self.match_count = 0
pattern = normalize_path(pattern)
self._prepare(pattern)
self.recurse_dir = recurse_dir
def match(self, path, normalize=True):
"""match the given path against this pattern.
"""Return a boolean indicating whether *path* is matched by this pattern.
If normalize is True (default), the path will get normalized using normalize_path(),
otherwise it is assumed that it already is normalized using that function.
@ -528,6 +582,7 @@ class PatternBase:
return self.pattern_orig
def _prepare(self, pattern):
"Should set the value of self.pattern"
raise NotImplementedError
def _match(self, path):
@ -625,7 +680,7 @@ class RegexPattern(PatternBase):
return (self.regex.search(path) is not None)
_PATTERN_STYLES = set([
_PATTERN_CLASSES = set([
FnmatchPattern,
PathFullPattern,
PathPrefixPattern,
@ -633,65 +688,86 @@ _PATTERN_STYLES = set([
ShellPattern,
])
_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES)
_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype')
RootPath = object()
PatternStyle = object()
CmdTuple = namedtuple('CmdTuple', 'val cmd')
def get_pattern_style(prefix):
class IECommand(Enum):
"""A command that an InclExcl file line can represent.
"""
RootPath = 1
PatternStyle = 2
Include = 3
Exclude = 4
ExcludeNoRecurse = 5
def get_pattern_class(prefix):
try:
return _PATTERN_STYLE_BY_PREFIX[prefix]
return _PATTERN_CLASS_BY_PREFIX[prefix]
except KeyError:
raise ValueError("Unknown pattern style: {}".format(prefix)) from None
def parse_pattern(pattern, fallback=FnmatchPattern):
def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
(style, pattern) = (pattern[:2], pattern[3:])
cls = get_pattern_style(style)
cls = get_pattern_class(style)
else:
cls = fallback
return cls(pattern)
return cls(pattern, recurse_dir)
def parse_exclude_pattern(pattern, fallback=FnmatchPattern):
def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
epattern = parse_pattern(pattern, fallback)
return InclExclPattern(epattern, False)
epattern_obj = parse_pattern(pattern_str, fallback)
return CmdTuple(epattern_obj, IECommand.Exclude)
def parse_inclexcl_pattern(pattern, fallback=ShellPattern):
"""Read pattern from string and return a InclExclPattern object."""
type_prefix_map = {
'-': False,
'+': True,
'R': RootPath,
'r': RootPath,
'P': PatternStyle,
'p': PatternStyle,
def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
"""Read a --patterns-from command from string and return a CmdTuple object."""
cmd_prefix_map = {
'-': IECommand.Exclude,
'!': IECommand.ExcludeNoRecurse,
'+': IECommand.Include,
'R': IECommand.RootPath,
'r': IECommand.RootPath,
'P': IECommand.PatternStyle,
'p': IECommand.PatternStyle,
}
try:
ptype = type_prefix_map[pattern[0]]
pattern = pattern[1:].lstrip()
if not pattern:
raise ValueError("Missing pattern!")
cmd = cmd_prefix_map[cmd_line_str[0]]
# remaining text on command-line following the command character
remainder_str = cmd_line_str[1:].lstrip()
if not remainder_str:
raise ValueError("Missing pattern/information!")
except (IndexError, KeyError, ValueError):
raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern))
if ptype is RootPath:
pobj = pattern
elif ptype is PatternStyle:
raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
if cmd is IECommand.RootPath:
# TODO: validate string?
val = remainder_str
elif cmd is IECommand.PatternStyle:
# then remainder_str is something like 're' or 'sh'
try:
pobj = get_pattern_style(pattern)
val = get_pattern_class(remainder_str)
except ValueError:
raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern))
raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
else:
pobj = parse_pattern(pattern, fallback)
return InclExclPattern(pobj, ptype)
# determine recurse_dir based on command type
recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
val = parse_pattern(remainder_str, fallback, recurse_dir)
return CmdTuple(val, cmd)
def timestamp(s):

View File

@ -37,6 +37,7 @@ from ..helpers import PatternMatcher, parse_pattern, Location, get_security_dir
from ..helpers import Manifest
from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
from ..helpers import bin_to_hex
from ..helpers import IECommand
from ..item import Item
from ..key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
from ..keymanager import RepoIdMismatch, NotABorgKeyFile
@ -929,6 +930,40 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.assert_in('x input/file2', output)
self.assert_in('x input/otherfile', output)
def test_create_pattern_exclude_folder_but_recurse(self):
"""test when patterns exclude a parent folder, but include a child"""
self.patterns_file_path2 = os.path.join(self.tmpdir, 'patterns2')
with open(self.patterns_file_path2, 'wb') as fd:
fd.write(b'+ input/x/b\n- input/x*\n')
self.cmd('init', '--encryption=repokey', self.repository_location)
self.create_regular_file('x/a/foo_a', size=1024 * 80)
self.create_regular_file('x/b/foo_b', size=1024 * 80)
self.create_regular_file('y/foo_y', size=1024 * 80)
output = self.cmd('create', '-v', '--list',
'--patterns-from=' + self.patterns_file_path2,
self.repository_location + '::test', 'input')
self.assert_in('x input/x/a/foo_a', output)
self.assert_in("A input/x/b/foo_b", output)
self.assert_in('A input/y/foo_y', output)
def test_create_pattern_exclude_folder_no_recurse(self):
"""test when patterns exclude a parent folder and, but include a child"""
self.patterns_file_path2 = os.path.join(self.tmpdir, 'patterns2')
with open(self.patterns_file_path2, 'wb') as fd:
fd.write(b'+ input/x/b\n! input/x*\n')
self.cmd('init', '--encryption=repokey', self.repository_location)
self.create_regular_file('x/a/foo_a', size=1024 * 80)
self.create_regular_file('x/b/foo_b', size=1024 * 80)
self.create_regular_file('y/foo_y', size=1024 * 80)
output = self.cmd('create', '-v', '--list',
'--patterns-from=' + self.patterns_file_path2,
self.repository_location + '::test', 'input')
self.assert_not_in('input/x/a/foo_a', output)
self.assert_not_in('input/x/a', output)
self.assert_in('A input/y/foo_y', output)
def test_extract_pattern_opt(self):
self.cmd('init', '--encryption=repokey', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
@ -2889,7 +2924,7 @@ class TestBuildFilter:
def test_basic(self):
matcher = PatternMatcher()
matcher.add([parse_pattern('included')], True)
matcher.add([parse_pattern('included')], IECommand.Include)
filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
assert filter(Item(path='included'))
assert filter(Item(path='included/file'))

View File

@ -557,12 +557,12 @@ def test_switch_patterns_style():
roots, patterns = [], []
load_pattern_file(pattern_file, roots, patterns)
assert len(patterns) == 6
assert isinstance(patterns[0].pattern, ShellPattern)
assert isinstance(patterns[1].pattern, FnmatchPattern)
assert isinstance(patterns[2].pattern, RegexPattern)
assert isinstance(patterns[3].pattern, RegexPattern)
assert isinstance(patterns[4].pattern, PathPrefixPattern)
assert isinstance(patterns[5].pattern, ShellPattern)
assert isinstance(patterns[0].val, ShellPattern)
assert isinstance(patterns[1].val, FnmatchPattern)
assert isinstance(patterns[2].val, RegexPattern)
assert isinstance(patterns[3].val, RegexPattern)
assert isinstance(patterns[4].val, PathPrefixPattern)
assert isinstance(patterns[5].val, ShellPattern)
@pytest.mark.parametrize("lines", [
@ -682,6 +682,10 @@ def test_pattern_matcher():
for i in ["", "foo", "bar"]:
assert pm.match(i) is None
# add extra entries to aid in testing
for target in ["A", "B", "Empty", "FileNotFound"]:
pm.is_include_cmd[target] = target
pm.add([RegexPattern("^a")], "A")
pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
pm.add([RegexPattern("^$")], "Empty")