Merge pull request #1971 from leo-b/1.0-maint-patterns

new --pattern and --patterns-from options
This commit is contained in:
enkore 2017-02-12 19:06:35 +01:00 committed by GitHub
commit d188886269
4 changed files with 341 additions and 52 deletions

View File

@ -19,9 +19,9 @@ import collections
from . import __version__
from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \
parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, bin_to_hex, \
get_cache_dir, prune_within, prune_split, \
Manifest, NoManifestError, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
parse_pattern, parse_exclude_pattern, ArgparsePatternAction, ArgparsePatternFileAction, ArgparseExcludeFileAction, \
PathPrefixPattern, to_localtime, timestamp, safe_timestamp, bin_to_hex, get_cache_dir, prune_within, prune_split, \
Manifest, NoManifestError, remove_surrogates, format_archive, check_extension_modules, Statistics, \
dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, PrefixSpec, is_slow_msgpack, yes, sysinfo, \
EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ErrorIgnoringTextIOWrapper
from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
@ -122,6 +122,18 @@ class Archiver:
if self.output_list and (self.output_filter is None or status in self.output_filter):
logger.info("%1s %s", status, remove_surrogates(path))
@staticmethod
def build_matcher(inclexcl_patterns, paths):
matcher = PatternMatcher()
if inclexcl_patterns:
matcher.add_inclexcl(inclexcl_patterns)
include_patterns = []
if paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths)
matcher.add(include_patterns, True)
matcher.fallback = not include_patterns
return matcher, include_patterns
def do_serve(self, args):
"""Start in server mode. This command is usually not used manually.
"""
@ -243,8 +255,7 @@ class Archiver:
def do_create(self, args, repository, manifest=None, key=None):
"""Create new archive"""
matcher = PatternMatcher(fallback=True)
if args.excludes:
matcher.add(args.excludes, False)
matcher.add_inclexcl(args.patterns)
def create_inner(archive, cache):
# Add cache dir to inode_skip list
@ -434,17 +445,7 @@ class Archiver:
if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )):
logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8')
matcher = PatternMatcher()
if args.excludes:
matcher.add(args.excludes, False)
include_patterns = []
if args.paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths)
matcher.add(include_patterns, True)
matcher.fallback = not include_patterns
matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
output_list = args.output_list
dry_run = args.dry_run
@ -907,8 +908,9 @@ class Archiver:
helptext = collections.OrderedDict()
helptext['patterns'] = textwrap.dedent('''
Exclusion patterns support four separate styles, fnmatch, shell, regular
expressions and path prefixes. By default, fnmatch is used. If followed
File patterns support four separate styles: fnmatch, shell, regular
expressions and path prefixes. By default, fnmatch is used for
`--exclude` patterns and shell-style is used for `--pattern`. If followed
by a colon (':') the first two characters of a pattern are used as a
style selector. Explicit style selection is necessary when a
non-default style is desired or when the desired pattern starts with
@ -916,12 +918,12 @@ class Archiver:
`Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
This is the default style. These patterns use a variant of shell
pattern syntax, with '*' matching any number of characters, '?'
matching any single character, '[...]' matching any single
character specified, including ranges, and '[!...]' matching any
character not specified. For the purpose of these patterns, the
path separator ('\\' for Windows and '/' on other systems) is not
This is the default style for --exclude and --exclude-from.
These patterns use a variant of shell pattern syntax, with '*' matching
any number of characters, '?' matching any single character, '[...]'
matching any single character specified, including ranges, and '[!...]'
matching any character not specified. For the purpose of these patterns,
the path separator ('\\' for Windows and '/' on other systems) is not
treated specially. Wrap meta-characters in brackets for a literal
match (i.e. `[?]` to match the literal character `?`). For a path
to match a pattern, it must completely match from start to end, or
@ -932,6 +934,7 @@ class Archiver:
Shell-style patterns, selector `sh:`
This is the default style for --pattern and --patterns-from.
Like fnmatch patterns these are similar to shell patterns. The difference
is that the pattern may include `**/` for matching zero or more directory
levels, `*` for matching zero or more arbitrary characters with the
@ -992,7 +995,39 @@ class Archiver:
re:^/home/[^/]\.tmp/
sh:/home/*/.thumbnails
EOF
$ borg create --exclude-from exclude.txt backup /\n\n''')
$ borg create --exclude-from exclude.txt backup /
A more general and easier to use way to define filename matching patterns exists
with the `--pattern` and `--patterns-from` options. Using these, you may specify
the backup roots (starting points) and patterns for inclusion/exclusion. A
root path starts with the prefix `R`, followed by a path (a plain path, not a
file pattern). An include rule starts with the prefix +, an exclude rule starts
with the prefix -, both followed by a pattern.
Inclusion patterns are useful to include pathes that are contained in an excluded
path. The first matching pattern is used so if an include pattern matches before
an exclude pattern, the file is backed up.
Note that the default pattern style for `--pattern` and `--patterns-from` is
shell style (`sh:`), so those patterns behave similar to rsync include/exclude
patterns.
Patterns (`--pattern`) and excludes (`--exclude`) from the command line are
considered first (in the order of appearance). Then patterns from `--patterns-from`
are added. Exclusion patterns from `--exclude-from` files are appended last.
An example `--patterns-from` file could look like that::
R /
# can be rebuild
- /home/*/.cache
# they're downloads for a reason
- /home/*/Downloads
# susan is a nice person
# include susans home
+ /home/susan
# don't backup the other home directories
- /home/*\n\n''')
helptext['placeholders'] = textwrap.dedent('''
Repository (or Archive) URLs, --prefix and --remote-path values support these
placeholders:
@ -1109,6 +1144,9 @@ class Archiver:
help='show version number and exit')
subparsers = parser.add_subparsers(title='required arguments', metavar='<command>')
# some empty defaults for all subparsers
common_parser.set_defaults(paths=[], patterns=[])
serve_epilog = textwrap.dedent("""
This command starts a repository server process. This command is usually not used manually.
""")
@ -1359,11 +1397,10 @@ class Archiver:
help='output verbose list of items (files, dirs, ...)')
subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS',
help='only display items with the given status characters')
subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
subparser.add_argument('-e', '--exclude', dest='patterns',
type=parse_exclude_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
subparser.add_argument('--exclude-from', action=ArgparseExcludeFileAction,
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('--exclude-caches', dest='exclude_caches',
action='store_true', default=False,
@ -1374,6 +1411,10 @@ class Archiver:
subparser.add_argument('--keep-tag-files', dest='keep_tag_files',
action='store_true', default=False,
help='keep tag files of excluded caches/directories')
subparser.add_argument('--pattern', action=ArgparsePatternAction,
metavar="PATTERN", help='include/exclude paths matching PATTERN')
subparser.add_argument('--patterns-from', action=ArgparsePatternFileAction,
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
type=int, default=300, metavar='SECONDS',
help='write checkpoint every SECONDS seconds (Default: 300)')
@ -1420,7 +1461,7 @@ class Archiver:
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='name of archive to create (must be also a valid directory name)')
subparser.add_argument('paths', metavar='PATH', nargs='+', type=str,
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to archive')
extract_epilog = textwrap.dedent("""
@ -1443,12 +1484,15 @@ class Archiver:
subparser.add_argument('-n', '--dry-run', dest='dry_run',
default=False, action='store_true',
help='do not actually change any files')
subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
subparser.add_argument('-e', '--exclude', dest='patterns',
type=parse_exclude_pattern, action='append',
metavar="PATTERN", help='exclude paths matching PATTERN')
subparser.add_argument('--exclude-from', dest='exclude_files',
type=argparse.FileType('r'), action='append',
subparser.add_argument('--exclude-from', action=ArgparseExcludeFileAction,
metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
subparser.add_argument('--pattern', action=ArgparsePatternAction,
metavar="PATTERN", help='include/exclude paths matching PATTERN')
subparser.add_argument('--patterns-from', action=ArgparsePatternFileAction,
metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
subparser.add_argument('--numeric-owner', dest='numeric_owner',
action='store_true', default=False,
help='only obey numeric user and group identifiers')
@ -2010,7 +2054,10 @@ class Archiver:
args = self.preprocess_args(args)
parser = self.build_parser(args)
args = parser.parse_args(args or ['-h'])
update_excludes(args)
if args.func == self.do_create:
# need at least 1 path but args.paths may also be populated from patterns
if not args.paths:
parser.error('Need at least one PATH argument.')
return args
def run(self, args):

View File

@ -320,22 +320,52 @@ def parse_timestamp(timestamp):
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)
def load_excludes(fh):
"""Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on
both line ends are ignored.
"""
patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
return [parse_pattern(pattern) for pattern in patterns if pattern]
def parse_add_pattern(patternstr, roots, patterns):
"""Parse a pattern string and add it to roots or patterns depending on the pattern type."""
pattern = parse_inclexcl_pattern(patternstr)
if pattern.ptype is RootPath:
roots.append(pattern.pattern)
else:
patterns.append(pattern)
def update_excludes(args):
"""Merge exclude patterns from files with those on command line."""
if hasattr(args, 'exclude_files') and args.exclude_files:
if not hasattr(args, 'excludes') or args.excludes is None:
args.excludes = []
for file in args.exclude_files:
args.excludes += load_excludes(file)
file.close()
def load_pattern_file(fileobj, roots, patterns):
for patternstr in clean_lines(fileobj):
parse_add_pattern(patternstr, roots, patterns)
def load_exclude_file(fileobj, patterns):
for patternstr in clean_lines(fileobj):
patterns.append(parse_exclude_pattern(patternstr))
class ArgparsePatternAction(argparse.Action):
def __init__(self, nargs=1, **kw):
super().__init__(nargs=nargs, **kw)
def __call__(self, parser, args, values, option_string=None):
parse_add_pattern(values[0], args.paths, args.patterns)
class ArgparsePatternFileAction(argparse.Action):
def __init__(self, nargs=1, **kw):
super().__init__(nargs=nargs, **kw)
def __call__(self, parser, args, values, option_string=None):
"""Load and parse patterns from a file.
Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
"""
filename = values[0]
with open(filename) as f:
self.parse(f, args)
def parse(self, fobj, args):
load_pattern_file(fobj, args.roots, args.patterns)
class ArgparseExcludeFileAction(ArgparsePatternFileAction):
def parse(self, fobj, args):
load_exclude_file(fobj, args.patterns)
class PatternMatcher:
@ -351,6 +381,12 @@ class PatternMatcher:
"""
self._items.extend((i, value) for i in patterns)
def add_inclexcl(self, patterns):
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
the match function when one of the given patterns matches.
"""
self._items.extend(patterns)
def match(self, path):
for (pattern, value) in self._items:
if pattern.match(path):
@ -502,6 +538,9 @@ _PATTERN_STYLES = set([
_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES)
InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype')
RootPath = object()
def parse_pattern(pattern, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
@ -519,6 +558,35 @@ def parse_pattern(pattern, fallback=FnmatchPattern):
return cls(pattern)
def parse_exclude_pattern(pattern, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
epattern = parse_pattern(pattern, fallback)
return InclExclPattern(epattern, False)
def parse_inclexcl_pattern(pattern, fallback=ShellPattern):
"""Read pattern from string and return a InclExclPattern object."""
type_prefix_map = {
'-': False,
'+': True,
'R': RootPath,
'r': RootPath,
}
try:
ptype = type_prefix_map[pattern[0]]
pattern = pattern[1:].lstrip()
if not pattern:
raise ValueError("Missing pattern!")
except (IndexError, KeyError, ValueError):
raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern))
if ptype is RootPath:
pobj = pattern
else:
pobj = parse_pattern(pattern, fallback)
return InclExclPattern(pobj, ptype)
def timestamp(s):
"""Convert a --timestamp=s argument to a datetime object"""
try:
@ -1304,6 +1372,30 @@ def signal_handler(sig, handler):
signal.signal(sig, orig_handler)
def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True):
"""
clean lines (usually read from a config file):
1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments.
note: only "pure comment lines" are supported, no support for "trailing comments".
:param lines: input line iterator (e.g. list or open text file) that gives unclean input lines
:param lstrip: lstrip call arguments or False, if lstripping is not desired
:param rstrip: rstrip call arguments or False, if rstripping is not desired
:param remove_comments: remove comment lines (lines starting with "#")
:param remove_empty: remove empty lines
:return: yields processed lines
"""
for line in lines:
if lstrip is not False:
line = line.lstrip(lstrip)
if rstrip is not False:
line = line.rstrip(rstrip)
if remove_empty and not line:
continue
if remove_comments and line.startswith('#'):
continue
yield line
def raising_signal_handler(exc_cls):
def handler(sig_no, frame):
# setting SIG_IGN avoids that an incoming second signal of this

View File

@ -652,6 +652,50 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2")
self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"])
def test_create_without_root(self):
"""test create without a root"""
self.cmd('init', self.repository_location)
args = ['create', self.repository_location + '::test']
if self.FORK_DEFAULT:
self.cmd(*args, exit_code=2)
else:
self.assert_raises(SystemExit, lambda: self.cmd(*args))
def test_create_pattern_root(self):
"""test create with only a root pattern"""
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('file2', size=1024 * 80)
output = self.cmd('create', '-v', '--list', '--pattern=R input', self.repository_location + '::test')
self.assert_in("A input/file1", output)
self.assert_in("A input/file2", output)
def test_create_pattern(self):
"""test file patterns during create"""
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('file2', size=1024 * 80)
self.create_regular_file('file_important', size=1024 * 80)
output = self.cmd('create', '-v', '--list',
'--pattern=+input/file_important', '--pattern=-input/file*',
self.repository_location + '::test', 'input')
self.assert_in("A input/file_important", output)
self.assert_in("A input/file_important", output)
self.assert_not_in('file1', output)
self.assert_not_in('file2', output)
def test_extract_pattern_opt(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('file2', size=1024 * 80)
self.create_regular_file('file_important', size=1024 * 80)
self.cmd('create', self.repository_location + '::test', 'input')
with changedir('output'):
self.cmd('extract',
'--pattern=+input/file_important', '--pattern=-input/file*',
self.repository_location + '::test')
self.assert_equal(sorted(os.listdir('output/input')), ['file_important'])
def test_exclude_caches(self):
self.cmd('init', self.repository_location)
self.create_regular_file('file1', size=1024 * 80)

View File

@ -9,12 +9,13 @@ import sys
import msgpack
import msgpack.fallback
import time
import argparse
from ..helpers import Location, format_file_size, format_timedelta, format_line, PlaceholderError, make_path_safe, \
prune_within, prune_split, get_cache_dir, get_keys_dir, get_security_dir, Statistics, is_slow_msgpack, \
yes, TRUISH, FALSISH, DEFAULTISH, \
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
ProgressIndicatorPercent, ProgressIndicatorEndless, parse_pattern, load_exclude_file, load_pattern_file, \
PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, \
Buffer
from . import BaseTestCase, FakeInputs
@ -428,8 +429,13 @@ def test_invalid_unicode_pattern(pattern):
(["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
(["pp:aaabbb"], None),
(["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
(["/nomatch", "/more/*"],
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
# the order of exclude patterns shouldn't matter
(["/more/*", "/nomatch"],
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
])
def test_patterns_from_file(tmpdir, lines, expected):
def test_exclude_patterns_from_file(tmpdir, lines, expected):
files = [
'/data/something00.txt', '/more/data', '/home',
' #/wsfoobar',
@ -438,8 +444,10 @@ def test_patterns_from_file(tmpdir, lines, expected):
]
def evaluate(filename):
patterns = []
load_exclude_file(open(filename, "rt"), patterns)
matcher = PatternMatcher(fallback=True)
matcher.add(load_excludes(open(filename, "rt")), False)
matcher.add_inclexcl(patterns)
return [path for path in files if matcher.match(path)]
exclfile = tmpdir.join("exclude.txt")
@ -450,6 +458,104 @@ def test_patterns_from_file(tmpdir, lines, expected):
assert evaluate(str(exclfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
# "None" means all files, i.e. none excluded
([], [], 0),
(["# Comment only"], [], 0),
(["- *"], [], 1),
(["+fm:*/something00.txt",
"-/data"], [], 2),
(["R /"], ["/"], 0),
(["R /",
"# comment"], ["/"], 0),
(["# comment",
"- /data",
"R /home"], ["/home"], 1),
])
def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
def evaluate(filename):
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
return roots, len(inclexclpatterns)
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
roots, numpatterns = evaluate(str(patternfile))
assert roots == expected_roots
assert numpatterns == expected_numpatterns
@pytest.mark.parametrize("lines", [
(["X /data"]), # illegal pattern type prefix
(["/data"]), # need a pattern type prefix
])
def test_load_invalid_patterns_from_file(tmpdir, lines):
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
filename = str(patternfile)
with pytest.raises(argparse.ArgumentTypeError):
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
@pytest.mark.parametrize("lines, expected", [
# "None" means all files, i.e. none excluded
([], None),
(["# Comment only"], None),
(["- *"], []),
# default match type is sh: for patterns -> * doesn't match a /
(["-*/something0?.txt"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something00.txt"],
['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something0?.txt"],
["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+/*/something0?.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+fm:*/something00.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
# include /home/leo and exclude the rest of /home:
(["+/home/leo",
"-/home/*"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
# wrong order, /home/leo is already excluded by -/home/*:
(["-/home/*",
"+/home/leo"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
(["+fm:/home/leo",
"-/home/"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
])
def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
files = [
'/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other'
]
def evaluate(filename):
matcher = PatternMatcher(fallback=True)
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
matcher.add_inclexcl(inclexclpatterns)
return [path for path in files if matcher.match(path)]
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
assert evaluate(str(patternfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("pattern, cls", [
("", FnmatchPattern),