mirror of
https://github.com/borgbackup/borg.git
synced 2025-02-21 21:57:36 +00:00
Merge pull request #2471 from enkore/issue/2469
Move patterns to module
This commit is contained in:
commit
7c9a57bee5
7 changed files with 865 additions and 851 deletions
|
@ -36,7 +36,7 @@
|
|||
from .helpers import bin_to_hex
|
||||
from .helpers import safe_ns
|
||||
from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
|
||||
from .helpers import PathPrefixPattern, FnmatchPattern, IECommand
|
||||
from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
|
||||
from .item import Item, ArchiveItem
|
||||
from .key import key_factory
|
||||
from .platform import acl_get, acl_set, set_flags, get_flags, swidth
|
||||
|
|
|
@ -51,15 +51,15 @@
|
|||
from .helpers import hardlinkable
|
||||
from .helpers import StableDict
|
||||
from .helpers import check_extension_modules
|
||||
from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
|
||||
from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
|
||||
from .helpers import log_multi
|
||||
from .helpers import PatternMatcher
|
||||
from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
|
||||
from .helpers import ErrorIgnoringTextIOWrapper
|
||||
from .helpers import ProgressIndicatorPercent
|
||||
from .helpers import basic_json_data, json_print
|
||||
from .helpers import replace_placeholders
|
||||
from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
|
||||
from .patterns import PatternMatcher
|
||||
from .item import Item
|
||||
from .key import key_creator, tam_required_file, tam_required, RepoKey, PassphraseKey
|
||||
from .keymanager import KeyManager
|
||||
|
|
|
@ -18,14 +18,11 @@
|
|||
import textwrap
|
||||
import threading
|
||||
import time
|
||||
import unicodedata
|
||||
import uuid
|
||||
from binascii import hexlify
|
||||
from collections import namedtuple, deque, abc, Counter
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from enum import Enum
|
||||
from fnmatch import translate
|
||||
from functools import wraps, partial, lru_cache
|
||||
from functools import partial, lru_cache
|
||||
from itertools import islice
|
||||
from operator import attrgetter
|
||||
from string import Formatter
|
||||
|
@ -42,7 +39,6 @@
|
|||
from . import chunker
|
||||
from . import crypto
|
||||
from . import hashindex
|
||||
from . import shellpattern
|
||||
from .constants import * # NOQA
|
||||
|
||||
|
||||
|
@ -389,387 +385,6 @@ def parse_timestamp(timestamp):
|
|||
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def parse_patternfile_line(line, roots, ie_commands, fallback):
|
||||
"""Parse a pattern-file line and act depending on which command it represents."""
|
||||
ie_command = parse_inclexcl_command(line, fallback=fallback)
|
||||
if ie_command.cmd is IECommand.RootPath:
|
||||
roots.append(ie_command.val)
|
||||
elif ie_command.cmd is IECommand.PatternStyle:
|
||||
fallback = ie_command.val
|
||||
else:
|
||||
# it is some kind of include/exclude command
|
||||
ie_commands.append(ie_command)
|
||||
return fallback
|
||||
|
||||
|
||||
def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
|
||||
if fallback is None:
|
||||
fallback = ShellPattern # ShellPattern is defined later in this module
|
||||
for line in clean_lines(fileobj):
|
||||
fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
|
||||
|
||||
|
||||
def load_exclude_file(fileobj, patterns):
|
||||
for patternstr in clean_lines(fileobj):
|
||||
patterns.append(parse_exclude_pattern(patternstr))
|
||||
|
||||
|
||||
class ArgparsePatternAction(argparse.Action):
|
||||
def __init__(self, nargs=1, **kw):
|
||||
super().__init__(nargs=nargs, **kw)
|
||||
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
|
||||
|
||||
|
||||
class ArgparsePatternFileAction(argparse.Action):
|
||||
def __init__(self, nargs=1, **kw):
|
||||
super().__init__(nargs=nargs, **kw)
|
||||
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
"""Load and parse patterns from a file.
|
||||
Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
|
||||
"""
|
||||
filename = values[0]
|
||||
with open(filename) as f:
|
||||
self.parse(f, args)
|
||||
|
||||
def parse(self, fobj, args):
|
||||
load_pattern_file(fobj, args.paths, args.patterns)
|
||||
|
||||
|
||||
class ArgparseExcludeFileAction(ArgparsePatternFileAction):
|
||||
def parse(self, fobj, args):
|
||||
load_exclude_file(fobj, args.patterns)
|
||||
|
||||
|
||||
class PatternMatcher:
|
||||
"""Represents a collection of pattern objects to match paths against.
|
||||
|
||||
*fallback* is a boolean value that *match()* returns if no matching patterns are found.
|
||||
|
||||
"""
|
||||
def __init__(self, fallback=None):
|
||||
self._items = []
|
||||
|
||||
# Value to return from match function when none of the patterns match.
|
||||
self.fallback = fallback
|
||||
|
||||
# optimizations
|
||||
self._path_full_patterns = {} # full path -> return value
|
||||
|
||||
# indicates whether the last match() call ended on a pattern for which
|
||||
# we should recurse into any matching folder. Will be set to True or
|
||||
# False when calling match().
|
||||
self.recurse_dir = None
|
||||
|
||||
# whether to recurse into directories when no match is found
|
||||
# TODO: allow modification as a config option?
|
||||
self.recurse_dir_default = True
|
||||
|
||||
self.include_patterns = []
|
||||
|
||||
# TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
|
||||
self.is_include_cmd = {
|
||||
IECommand.Exclude: False,
|
||||
IECommand.ExcludeNoRecurse: False,
|
||||
IECommand.Include: True
|
||||
}
|
||||
|
||||
def empty(self):
|
||||
return not len(self._items) and not len(self._path_full_patterns)
|
||||
|
||||
def _add(self, pattern, cmd):
|
||||
"""*cmd* is an IECommand value.
|
||||
"""
|
||||
if isinstance(pattern, PathFullPattern):
|
||||
key = pattern.pattern # full, normalized path
|
||||
self._path_full_patterns[key] = cmd
|
||||
else:
|
||||
self._items.append((pattern, cmd))
|
||||
|
||||
def add(self, patterns, cmd):
|
||||
"""Add list of patterns to internal list. *cmd* indicates whether the
|
||||
pattern is an include/exclude pattern, and whether recursion should be
|
||||
done on excluded folders.
|
||||
"""
|
||||
for pattern in patterns:
|
||||
self._add(pattern, cmd)
|
||||
|
||||
def add_includepaths(self, include_paths):
|
||||
"""Used to add inclusion-paths from args.paths (from commandline).
|
||||
"""
|
||||
include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
|
||||
self.add(include_patterns, IECommand.Include)
|
||||
self.fallback = not include_patterns
|
||||
self.include_patterns = include_patterns
|
||||
|
||||
def get_unmatched_include_patterns(self):
|
||||
"Note that this only returns patterns added via *add_includepaths*."
|
||||
return [p for p in self.include_patterns if p.match_count == 0]
|
||||
|
||||
def add_inclexcl(self, patterns):
|
||||
"""Add list of patterns (of type CmdTuple) to internal list.
|
||||
"""
|
||||
for pattern, cmd in patterns:
|
||||
self._add(pattern, cmd)
|
||||
|
||||
def match(self, path):
|
||||
"""Return True or False depending on whether *path* is matched.
|
||||
|
||||
If no match is found among the patterns in this matcher, then the value
|
||||
in self.fallback is returned (defaults to None).
|
||||
|
||||
"""
|
||||
path = normalize_path(path)
|
||||
# do a fast lookup for full path matches (note: we do not count such matches):
|
||||
non_existent = object()
|
||||
value = self._path_full_patterns.get(path, non_existent)
|
||||
|
||||
if value is not non_existent:
|
||||
# we have a full path match!
|
||||
# TODO: get from pattern; don't hard-code
|
||||
self.recurse_dir = True
|
||||
return value
|
||||
|
||||
# this is the slow way, if we have many patterns in self._items:
|
||||
for (pattern, cmd) in self._items:
|
||||
if pattern.match(path, normalize=False):
|
||||
self.recurse_dir = pattern.recurse_dir
|
||||
return self.is_include_cmd[cmd]
|
||||
|
||||
# by default we will recurse if there is no match
|
||||
self.recurse_dir = self.recurse_dir_default
|
||||
return self.fallback
|
||||
|
||||
|
||||
def normalize_path(path):
|
||||
"""normalize paths for MacOS (but do nothing on other platforms)"""
|
||||
# HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
|
||||
# Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
|
||||
return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path
|
||||
|
||||
|
||||
class PatternBase:
|
||||
"""Shared logic for inclusion/exclusion patterns.
|
||||
"""
|
||||
PREFIX = NotImplemented
|
||||
|
||||
def __init__(self, pattern, recurse_dir=False):
|
||||
self.pattern_orig = pattern
|
||||
self.match_count = 0
|
||||
pattern = normalize_path(pattern)
|
||||
self._prepare(pattern)
|
||||
self.recurse_dir = recurse_dir
|
||||
|
||||
def match(self, path, normalize=True):
|
||||
"""Return a boolean indicating whether *path* is matched by this pattern.
|
||||
|
||||
If normalize is True (default), the path will get normalized using normalize_path(),
|
||||
otherwise it is assumed that it already is normalized using that function.
|
||||
"""
|
||||
if normalize:
|
||||
path = normalize_path(path)
|
||||
matches = self._match(path)
|
||||
if matches:
|
||||
self.match_count += 1
|
||||
return matches
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (type(self), self.pattern)
|
||||
|
||||
def __str__(self):
|
||||
return self.pattern_orig
|
||||
|
||||
def _prepare(self, pattern):
|
||||
"Should set the value of self.pattern"
|
||||
raise NotImplementedError
|
||||
|
||||
def _match(self, path):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class PathFullPattern(PatternBase):
|
||||
"""Full match of a path."""
|
||||
PREFIX = "pf"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
self.pattern = os.path.normpath(pattern)
|
||||
|
||||
def _match(self, path):
|
||||
return path == self.pattern
|
||||
|
||||
|
||||
# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
|
||||
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
|
||||
# separator to the end of the path before matching.
|
||||
|
||||
|
||||
class PathPrefixPattern(PatternBase):
|
||||
"""Literal files or directories listed on the command line
|
||||
for some operations (e.g. extract, but not create).
|
||||
If a directory is specified, all paths that start with that
|
||||
path match as well. A trailing slash makes no difference.
|
||||
"""
|
||||
PREFIX = "pp"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
|
||||
|
||||
def _match(self, path):
|
||||
return (path + os.path.sep).startswith(self.pattern)
|
||||
|
||||
|
||||
class FnmatchPattern(PatternBase):
|
||||
"""Shell glob patterns to exclude. A trailing slash means to
|
||||
exclude the contents of a directory, but not the directory itself.
|
||||
"""
|
||||
PREFIX = "fm"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
if pattern.endswith(os.path.sep):
|
||||
pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
|
||||
else:
|
||||
pattern = os.path.normpath(pattern) + os.path.sep + '*'
|
||||
|
||||
self.pattern = pattern
|
||||
|
||||
# fnmatch and re.match both cache compiled regular expressions.
|
||||
# Nevertheless, this is about 10 times faster.
|
||||
self.regex = re.compile(translate(self.pattern))
|
||||
|
||||
def _match(self, path):
|
||||
return (self.regex.match(path + os.path.sep) is not None)
|
||||
|
||||
|
||||
class ShellPattern(PatternBase):
|
||||
"""Shell glob patterns to exclude. A trailing slash means to
|
||||
exclude the contents of a directory, but not the directory itself.
|
||||
"""
|
||||
PREFIX = "sh"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
sep = os.path.sep
|
||||
|
||||
if pattern.endswith(sep):
|
||||
pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
|
||||
else:
|
||||
pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
|
||||
|
||||
self.pattern = pattern
|
||||
self.regex = re.compile(shellpattern.translate(self.pattern))
|
||||
|
||||
def _match(self, path):
|
||||
return (self.regex.match(path + os.path.sep) is not None)
|
||||
|
||||
|
||||
class RegexPattern(PatternBase):
|
||||
"""Regular expression to exclude.
|
||||
"""
|
||||
PREFIX = "re"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
self.pattern = pattern
|
||||
self.regex = re.compile(pattern)
|
||||
|
||||
def _match(self, path):
|
||||
# Normalize path separators
|
||||
if os.path.sep != '/':
|
||||
path = path.replace(os.path.sep, '/')
|
||||
|
||||
return (self.regex.search(path) is not None)
|
||||
|
||||
|
||||
_PATTERN_CLASSES = set([
|
||||
FnmatchPattern,
|
||||
PathFullPattern,
|
||||
PathPrefixPattern,
|
||||
RegexPattern,
|
||||
ShellPattern,
|
||||
])
|
||||
|
||||
_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
|
||||
|
||||
CmdTuple = namedtuple('CmdTuple', 'val cmd')
|
||||
|
||||
|
||||
class IECommand(Enum):
|
||||
"""A command that an InclExcl file line can represent.
|
||||
"""
|
||||
RootPath = 1
|
||||
PatternStyle = 2
|
||||
Include = 3
|
||||
Exclude = 4
|
||||
ExcludeNoRecurse = 5
|
||||
|
||||
|
||||
def get_pattern_class(prefix):
|
||||
try:
|
||||
return _PATTERN_CLASS_BY_PREFIX[prefix]
|
||||
except KeyError:
|
||||
raise ValueError("Unknown pattern style: {}".format(prefix)) from None
|
||||
|
||||
|
||||
def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
|
||||
"""Read pattern from string and return an instance of the appropriate implementation class.
|
||||
|
||||
"""
|
||||
if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
|
||||
(style, pattern) = (pattern[:2], pattern[3:])
|
||||
cls = get_pattern_class(style)
|
||||
else:
|
||||
cls = fallback
|
||||
return cls(pattern, recurse_dir)
|
||||
|
||||
|
||||
def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
|
||||
"""Read pattern from string and return an instance of the appropriate implementation class.
|
||||
"""
|
||||
epattern_obj = parse_pattern(pattern_str, fallback)
|
||||
return CmdTuple(epattern_obj, IECommand.Exclude)
|
||||
|
||||
|
||||
def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
|
||||
"""Read a --patterns-from command from string and return a CmdTuple object."""
|
||||
|
||||
cmd_prefix_map = {
|
||||
'-': IECommand.Exclude,
|
||||
'!': IECommand.ExcludeNoRecurse,
|
||||
'+': IECommand.Include,
|
||||
'R': IECommand.RootPath,
|
||||
'r': IECommand.RootPath,
|
||||
'P': IECommand.PatternStyle,
|
||||
'p': IECommand.PatternStyle,
|
||||
}
|
||||
|
||||
try:
|
||||
cmd = cmd_prefix_map[cmd_line_str[0]]
|
||||
|
||||
# remaining text on command-line following the command character
|
||||
remainder_str = cmd_line_str[1:].lstrip()
|
||||
|
||||
if not remainder_str:
|
||||
raise ValueError("Missing pattern/information!")
|
||||
except (IndexError, KeyError, ValueError):
|
||||
raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
|
||||
|
||||
if cmd is IECommand.RootPath:
|
||||
# TODO: validate string?
|
||||
val = remainder_str
|
||||
elif cmd is IECommand.PatternStyle:
|
||||
# then remainder_str is something like 're' or 'sh'
|
||||
try:
|
||||
val = get_pattern_class(remainder_str)
|
||||
except ValueError:
|
||||
raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
|
||||
else:
|
||||
# determine recurse_dir based on command type
|
||||
recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
|
||||
val = parse_pattern(remainder_str, fallback, recurse_dir)
|
||||
|
||||
return CmdTuple(val, cmd)
|
||||
|
||||
|
||||
def timestamp(s):
|
||||
"""Convert a --timestamp=s argument to a datetime object"""
|
||||
try:
|
||||
|
|
392
src/borg/patterns.py
Normal file
392
src/borg/patterns.py
Normal file
|
@ -0,0 +1,392 @@
|
|||
import argparse
|
||||
import fnmatch
|
||||
import os.path
|
||||
import re
|
||||
import sys
|
||||
import unicodedata
|
||||
from collections import namedtuple
|
||||
from enum import Enum
|
||||
|
||||
from . import shellpattern
|
||||
from .helpers import clean_lines
|
||||
|
||||
|
||||
def parse_patternfile_line(line, roots, ie_commands, fallback):
|
||||
"""Parse a pattern-file line and act depending on which command it represents."""
|
||||
ie_command = parse_inclexcl_command(line, fallback=fallback)
|
||||
if ie_command.cmd is IECommand.RootPath:
|
||||
roots.append(ie_command.val)
|
||||
elif ie_command.cmd is IECommand.PatternStyle:
|
||||
fallback = ie_command.val
|
||||
else:
|
||||
# it is some kind of include/exclude command
|
||||
ie_commands.append(ie_command)
|
||||
return fallback
|
||||
|
||||
|
||||
def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
|
||||
if fallback is None:
|
||||
fallback = ShellPattern # ShellPattern is defined later in this module
|
||||
for line in clean_lines(fileobj):
|
||||
fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
|
||||
|
||||
|
||||
def load_exclude_file(fileobj, patterns):
|
||||
for patternstr in clean_lines(fileobj):
|
||||
patterns.append(parse_exclude_pattern(patternstr))
|
||||
|
||||
|
||||
class ArgparsePatternAction(argparse.Action):
|
||||
def __init__(self, nargs=1, **kw):
|
||||
super().__init__(nargs=nargs, **kw)
|
||||
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
|
||||
|
||||
|
||||
class ArgparsePatternFileAction(argparse.Action):
|
||||
def __init__(self, nargs=1, **kw):
|
||||
super().__init__(nargs=nargs, **kw)
|
||||
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
"""Load and parse patterns from a file.
|
||||
Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
|
||||
"""
|
||||
filename = values[0]
|
||||
with open(filename) as f:
|
||||
self.parse(f, args)
|
||||
|
||||
def parse(self, fobj, args):
|
||||
load_pattern_file(fobj, args.paths, args.patterns)
|
||||
|
||||
|
||||
class ArgparseExcludeFileAction(ArgparsePatternFileAction):
|
||||
def parse(self, fobj, args):
|
||||
load_exclude_file(fobj, args.patterns)
|
||||
|
||||
|
||||
class PatternMatcher:
|
||||
"""Represents a collection of pattern objects to match paths against.
|
||||
|
||||
*fallback* is a boolean value that *match()* returns if no matching patterns are found.
|
||||
|
||||
"""
|
||||
def __init__(self, fallback=None):
|
||||
self._items = []
|
||||
|
||||
# Value to return from match function when none of the patterns match.
|
||||
self.fallback = fallback
|
||||
|
||||
# optimizations
|
||||
self._path_full_patterns = {} # full path -> return value
|
||||
|
||||
# indicates whether the last match() call ended on a pattern for which
|
||||
# we should recurse into any matching folder. Will be set to True or
|
||||
# False when calling match().
|
||||
self.recurse_dir = None
|
||||
|
||||
# whether to recurse into directories when no match is found
|
||||
# TODO: allow modification as a config option?
|
||||
self.recurse_dir_default = True
|
||||
|
||||
self.include_patterns = []
|
||||
|
||||
# TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
|
||||
self.is_include_cmd = {
|
||||
IECommand.Exclude: False,
|
||||
IECommand.ExcludeNoRecurse: False,
|
||||
IECommand.Include: True
|
||||
}
|
||||
|
||||
def empty(self):
|
||||
return not len(self._items) and not len(self._path_full_patterns)
|
||||
|
||||
def _add(self, pattern, cmd):
|
||||
"""*cmd* is an IECommand value.
|
||||
"""
|
||||
if isinstance(pattern, PathFullPattern):
|
||||
key = pattern.pattern # full, normalized path
|
||||
self._path_full_patterns[key] = cmd
|
||||
else:
|
||||
self._items.append((pattern, cmd))
|
||||
|
||||
def add(self, patterns, cmd):
|
||||
"""Add list of patterns to internal list. *cmd* indicates whether the
|
||||
pattern is an include/exclude pattern, and whether recursion should be
|
||||
done on excluded folders.
|
||||
"""
|
||||
for pattern in patterns:
|
||||
self._add(pattern, cmd)
|
||||
|
||||
def add_includepaths(self, include_paths):
|
||||
"""Used to add inclusion-paths from args.paths (from commandline).
|
||||
"""
|
||||
include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
|
||||
self.add(include_patterns, IECommand.Include)
|
||||
self.fallback = not include_patterns
|
||||
self.include_patterns = include_patterns
|
||||
|
||||
def get_unmatched_include_patterns(self):
|
||||
"Note that this only returns patterns added via *add_includepaths*."
|
||||
return [p for p in self.include_patterns if p.match_count == 0]
|
||||
|
||||
def add_inclexcl(self, patterns):
|
||||
"""Add list of patterns (of type CmdTuple) to internal list.
|
||||
"""
|
||||
for pattern, cmd in patterns:
|
||||
self._add(pattern, cmd)
|
||||
|
||||
def match(self, path):
|
||||
"""Return True or False depending on whether *path* is matched.
|
||||
|
||||
If no match is found among the patterns in this matcher, then the value
|
||||
in self.fallback is returned (defaults to None).
|
||||
|
||||
"""
|
||||
path = normalize_path(path)
|
||||
# do a fast lookup for full path matches (note: we do not count such matches):
|
||||
non_existent = object()
|
||||
value = self._path_full_patterns.get(path, non_existent)
|
||||
|
||||
if value is not non_existent:
|
||||
# we have a full path match!
|
||||
# TODO: get from pattern; don't hard-code
|
||||
self.recurse_dir = True
|
||||
return value
|
||||
|
||||
# this is the slow way, if we have many patterns in self._items:
|
||||
for (pattern, cmd) in self._items:
|
||||
if pattern.match(path, normalize=False):
|
||||
self.recurse_dir = pattern.recurse_dir
|
||||
return self.is_include_cmd[cmd]
|
||||
|
||||
# by default we will recurse if there is no match
|
||||
self.recurse_dir = self.recurse_dir_default
|
||||
return self.fallback
|
||||
|
||||
|
||||
def normalize_path(path):
|
||||
"""normalize paths for MacOS (but do nothing on other platforms)"""
|
||||
# HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
|
||||
# Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
|
||||
return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path
|
||||
|
||||
|
||||
class PatternBase:
|
||||
"""Shared logic for inclusion/exclusion patterns.
|
||||
"""
|
||||
PREFIX = NotImplemented
|
||||
|
||||
def __init__(self, pattern, recurse_dir=False):
|
||||
self.pattern_orig = pattern
|
||||
self.match_count = 0
|
||||
pattern = normalize_path(pattern)
|
||||
self._prepare(pattern)
|
||||
self.recurse_dir = recurse_dir
|
||||
|
||||
def match(self, path, normalize=True):
|
||||
"""Return a boolean indicating whether *path* is matched by this pattern.
|
||||
|
||||
If normalize is True (default), the path will get normalized using normalize_path(),
|
||||
otherwise it is assumed that it already is normalized using that function.
|
||||
"""
|
||||
if normalize:
|
||||
path = normalize_path(path)
|
||||
matches = self._match(path)
|
||||
if matches:
|
||||
self.match_count += 1
|
||||
return matches
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (type(self), self.pattern)
|
||||
|
||||
def __str__(self):
|
||||
return self.pattern_orig
|
||||
|
||||
def _prepare(self, pattern):
|
||||
"Should set the value of self.pattern"
|
||||
raise NotImplementedError
|
||||
|
||||
def _match(self, path):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class PathFullPattern(PatternBase):
|
||||
"""Full match of a path."""
|
||||
PREFIX = "pf"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
self.pattern = os.path.normpath(pattern)
|
||||
|
||||
def _match(self, path):
|
||||
return path == self.pattern
|
||||
|
||||
|
||||
# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
|
||||
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
|
||||
# separator to the end of the path before matching.
|
||||
|
||||
|
||||
class PathPrefixPattern(PatternBase):
|
||||
"""Literal files or directories listed on the command line
|
||||
for some operations (e.g. extract, but not create).
|
||||
If a directory is specified, all paths that start with that
|
||||
path match as well. A trailing slash makes no difference.
|
||||
"""
|
||||
PREFIX = "pp"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
|
||||
|
||||
def _match(self, path):
|
||||
return (path + os.path.sep).startswith(self.pattern)
|
||||
|
||||
|
||||
class FnmatchPattern(PatternBase):
|
||||
"""Shell glob patterns to exclude. A trailing slash means to
|
||||
exclude the contents of a directory, but not the directory itself.
|
||||
"""
|
||||
PREFIX = "fm"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
if pattern.endswith(os.path.sep):
|
||||
pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
|
||||
else:
|
||||
pattern = os.path.normpath(pattern) + os.path.sep + '*'
|
||||
|
||||
self.pattern = pattern
|
||||
|
||||
# fnmatch and re.match both cache compiled regular expressions.
|
||||
# Nevertheless, this is about 10 times faster.
|
||||
self.regex = re.compile(fnmatch.translate(self.pattern))
|
||||
|
||||
def _match(self, path):
|
||||
return (self.regex.match(path + os.path.sep) is not None)
|
||||
|
||||
|
||||
class ShellPattern(PatternBase):
|
||||
"""Shell glob patterns to exclude. A trailing slash means to
|
||||
exclude the contents of a directory, but not the directory itself.
|
||||
"""
|
||||
PREFIX = "sh"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
sep = os.path.sep
|
||||
|
||||
if pattern.endswith(sep):
|
||||
pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
|
||||
else:
|
||||
pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
|
||||
|
||||
self.pattern = pattern
|
||||
self.regex = re.compile(shellpattern.translate(self.pattern))
|
||||
|
||||
def _match(self, path):
|
||||
return (self.regex.match(path + os.path.sep) is not None)
|
||||
|
||||
|
||||
class RegexPattern(PatternBase):
|
||||
"""Regular expression to exclude.
|
||||
"""
|
||||
PREFIX = "re"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
self.pattern = pattern
|
||||
self.regex = re.compile(pattern)
|
||||
|
||||
def _match(self, path):
|
||||
# Normalize path separators
|
||||
if os.path.sep != '/':
|
||||
path = path.replace(os.path.sep, '/')
|
||||
|
||||
return (self.regex.search(path) is not None)
|
||||
|
||||
|
||||
_PATTERN_CLASSES = {
|
||||
FnmatchPattern,
|
||||
PathFullPattern,
|
||||
PathPrefixPattern,
|
||||
RegexPattern,
|
||||
ShellPattern,
|
||||
}
|
||||
|
||||
_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
|
||||
|
||||
CmdTuple = namedtuple('CmdTuple', 'val cmd')
|
||||
|
||||
|
||||
class IECommand(Enum):
|
||||
"""A command that an InclExcl file line can represent.
|
||||
"""
|
||||
RootPath = 1
|
||||
PatternStyle = 2
|
||||
Include = 3
|
||||
Exclude = 4
|
||||
ExcludeNoRecurse = 5
|
||||
|
||||
|
||||
def get_pattern_class(prefix):
|
||||
try:
|
||||
return _PATTERN_CLASS_BY_PREFIX[prefix]
|
||||
except KeyError:
|
||||
raise ValueError("Unknown pattern style: {}".format(prefix)) from None
|
||||
|
||||
|
||||
def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
|
||||
"""Read pattern from string and return an instance of the appropriate implementation class.
|
||||
|
||||
"""
|
||||
if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
|
||||
(style, pattern) = (pattern[:2], pattern[3:])
|
||||
cls = get_pattern_class(style)
|
||||
else:
|
||||
cls = fallback
|
||||
return cls(pattern, recurse_dir)
|
||||
|
||||
|
||||
def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
|
||||
"""Read pattern from string and return an instance of the appropriate implementation class.
|
||||
"""
|
||||
epattern_obj = parse_pattern(pattern_str, fallback)
|
||||
return CmdTuple(epattern_obj, IECommand.Exclude)
|
||||
|
||||
|
||||
def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
|
||||
"""Read a --patterns-from command from string and return a CmdTuple object."""
|
||||
|
||||
cmd_prefix_map = {
|
||||
'-': IECommand.Exclude,
|
||||
'!': IECommand.ExcludeNoRecurse,
|
||||
'+': IECommand.Include,
|
||||
'R': IECommand.RootPath,
|
||||
'r': IECommand.RootPath,
|
||||
'P': IECommand.PatternStyle,
|
||||
'p': IECommand.PatternStyle,
|
||||
}
|
||||
|
||||
try:
|
||||
cmd = cmd_prefix_map[cmd_line_str[0]]
|
||||
|
||||
# remaining text on command-line following the command character
|
||||
remainder_str = cmd_line_str[1:].lstrip()
|
||||
|
||||
if not remainder_str:
|
||||
raise ValueError("Missing pattern/information!")
|
||||
except (IndexError, KeyError, ValueError):
|
||||
raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
|
||||
|
||||
if cmd is IECommand.RootPath:
|
||||
# TODO: validate string?
|
||||
val = remainder_str
|
||||
elif cmd is IECommand.PatternStyle:
|
||||
# then remainder_str is something like 're' or 'sh'
|
||||
try:
|
||||
val = get_pattern_class(remainder_str)
|
||||
except ValueError:
|
||||
raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
|
||||
else:
|
||||
# determine recurse_dir based on command type
|
||||
recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
|
||||
val = parse_pattern(remainder_str, fallback, recurse_dir)
|
||||
|
||||
return CmdTuple(val, cmd)
|
|
@ -33,12 +33,12 @@
|
|||
from ..cache import Cache
|
||||
from ..constants import * # NOQA
|
||||
from ..crypto import bytes_to_long, num_aes_blocks
|
||||
from ..helpers import PatternMatcher, parse_pattern, Location, get_security_dir
|
||||
from ..helpers import Location, get_security_dir
|
||||
from ..helpers import Manifest
|
||||
from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
|
||||
from ..helpers import bin_to_hex
|
||||
from ..helpers import IECommand
|
||||
from ..helpers import MAX_S
|
||||
from ..patterns import IECommand, PatternMatcher, parse_pattern
|
||||
from ..item import Item
|
||||
from ..key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
|
||||
from ..keymanager import RepoIdMismatch, NotABorgKeyFile
|
||||
|
|
|
@ -23,9 +23,6 @@
|
|||
from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
|
||||
from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams
|
||||
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
|
||||
from ..helpers import load_exclude_file, load_pattern_file
|
||||
from ..helpers import parse_pattern, PatternMatcher
|
||||
from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
|
||||
from ..helpers import swidth_slice
|
||||
from ..helpers import chunkit
|
||||
from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS
|
||||
|
@ -244,463 +241,6 @@ def test(self):
|
|||
)
|
||||
|
||||
|
||||
def check_patterns(files, pattern, expected):
|
||||
"""Utility for testing patterns.
|
||||
"""
|
||||
assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths"
|
||||
|
||||
matched = [f for f in files if pattern.match(f)]
|
||||
|
||||
assert matched == (files if expected is None else expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("/", []),
|
||||
("/home", ["/home"]),
|
||||
("/home///", ["/home"]),
|
||||
("/./home", ["/home"]),
|
||||
("/home/user", ["/home/user"]),
|
||||
("/home/user2", ["/home/user2"]),
|
||||
("/home/user/.bashrc", ["/home/user/.bashrc"]),
|
||||
])
|
||||
def test_patterns_full(pattern, expected):
|
||||
files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
|
||||
|
||||
check_patterns(files, PathFullPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("", []),
|
||||
("relative", []),
|
||||
("relative/path/", ["relative/path"]),
|
||||
("relative/path", ["relative/path"]),
|
||||
])
|
||||
def test_patterns_full_relative(pattern, expected):
|
||||
files = ["relative/path", "relative/path2", ]
|
||||
|
||||
check_patterns(files, PathFullPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("/", None),
|
||||
("/./", None),
|
||||
("", []),
|
||||
("/home/u", []),
|
||||
("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]),
|
||||
("/etc", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("///etc//////", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/srv", ["/srv/messages", "/srv/dmesg"]),
|
||||
])
|
||||
def test_patterns_prefix(pattern, expected):
|
||||
files = [
|
||||
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
|
||||
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
|
||||
]
|
||||
|
||||
check_patterns(files, PathPrefixPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("", []),
|
||||
("foo", []),
|
||||
("relative", ["relative/path1", "relative/two"]),
|
||||
("more", ["more/relative"]),
|
||||
])
|
||||
def test_patterns_prefix_relative(pattern, expected):
|
||||
files = ["relative/path1", "relative/two", "more/relative"]
|
||||
|
||||
check_patterns(files, PathPrefixPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("/*", None),
|
||||
("/./*", None),
|
||||
("*", None),
|
||||
("*/*", None),
|
||||
("*///*", None),
|
||||
("/home/u", []),
|
||||
("/home/*",
|
||||
["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
|
||||
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
|
||||
("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
|
||||
("/etc/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
|
||||
("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/srv*", ["/srv/messages", "/srv/dmesg"]),
|
||||
("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
|
||||
])
|
||||
def test_patterns_fnmatch(pattern, expected):
|
||||
files = [
|
||||
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
|
||||
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
|
||||
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
|
||||
]
|
||||
|
||||
check_patterns(files, FnmatchPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("*", None),
|
||||
("**/*", None),
|
||||
("/**/*", None),
|
||||
("/./*", None),
|
||||
("*/*", None),
|
||||
("*///*", None),
|
||||
("/home/u", []),
|
||||
("/home/*",
|
||||
["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
|
||||
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
|
||||
("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
|
||||
("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("*/.pr????e", []),
|
||||
("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
|
||||
("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]),
|
||||
("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]),
|
||||
("/srv/*", ["/srv/messages", "/srv/dmesg"]),
|
||||
("/srv2/**", ["/srv2", "/srv2/blafasel"]),
|
||||
("/srv2/**/", ["/srv2/blafasel"]),
|
||||
("/home/*/.thumbnails", ["/home/foo/.thumbnails"]),
|
||||
("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]),
|
||||
])
|
||||
def test_patterns_shell(pattern, expected):
|
||||
files = [
|
||||
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
|
||||
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg",
|
||||
"/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
|
||||
]
|
||||
|
||||
check_patterns(files, ShellPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("", None),
|
||||
(".*", None),
|
||||
("^/", None),
|
||||
("^abc$", []),
|
||||
("^[^/]", []),
|
||||
("^(?!/srv|/foo|/opt)",
|
||||
["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile",
|
||||
"/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]),
|
||||
])
|
||||
def test_patterns_regex(pattern, expected):
|
||||
files = [
|
||||
'/srv/data', '/foo/bar', '/home',
|
||||
'/home/user/.profile', '/home/user/.bashrc',
|
||||
'/home/user2/.profile', '/home/user2/public_html/index.html',
|
||||
'/opt/log/messages.txt', '/opt/log/dmesg.txt',
|
||||
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
|
||||
]
|
||||
|
||||
obj = RegexPattern(pattern)
|
||||
assert str(obj) == pattern
|
||||
assert obj.pattern == pattern
|
||||
|
||||
check_patterns(files, obj, expected)
|
||||
|
||||
|
||||
def test_regex_pattern():
|
||||
# The forward slash must match the platform-specific path separator
|
||||
assert RegexPattern("^/$").match("/")
|
||||
assert RegexPattern("^/$").match(os.path.sep)
|
||||
assert not RegexPattern(r"^\\$").match("/")
|
||||
|
||||
|
||||
def use_normalized_unicode():
|
||||
return sys.platform in ("darwin",)
|
||||
|
||||
|
||||
def _make_test_patterns(pattern):
|
||||
return [PathPrefixPattern(pattern),
|
||||
FnmatchPattern(pattern),
|
||||
RegexPattern("^{}/foo$".format(pattern)),
|
||||
ShellPattern(pattern),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}"))
|
||||
def test_composed_unicode_pattern(pattern):
|
||||
assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}"))
|
||||
def test_decomposed_unicode_pattern(pattern):
|
||||
assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode()
|
||||
assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1")))
|
||||
def test_invalid_unicode_pattern(pattern):
|
||||
assert not pattern.match("ba/foo")
|
||||
assert pattern.match(str(b"ba\x80/foo", "latin1"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lines, expected", [
|
||||
# "None" means all files, i.e. none excluded
|
||||
([], None),
|
||||
(["# Comment only"], None),
|
||||
(["*"], []),
|
||||
(["# Comment",
|
||||
"*/something00.txt",
|
||||
" *whitespace* ",
|
||||
# Whitespace before comment
|
||||
" #/ws*",
|
||||
# Empty line
|
||||
"",
|
||||
"# EOF"],
|
||||
["/more/data", "/home", " #/wsfoobar"]),
|
||||
(["re:.*"], []),
|
||||
(["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
|
||||
([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
|
||||
(["", "", "",
|
||||
"# This is a test with mixed pattern styles",
|
||||
# Case-insensitive pattern
|
||||
"re:(?i)BAR|ME$",
|
||||
"",
|
||||
"*whitespace*",
|
||||
"fm:*/something00*"],
|
||||
["/more/data"]),
|
||||
([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
|
||||
([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
|
||||
(["pp:./"], None),
|
||||
(["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
|
||||
(["pp:aaabbb"], None),
|
||||
(["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
|
||||
(["/nomatch", "/more/*"],
|
||||
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
|
||||
# the order of exclude patterns shouldn't matter
|
||||
(["/more/*", "/nomatch"],
|
||||
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
|
||||
])
|
||||
def test_exclude_patterns_from_file(tmpdir, lines, expected):
|
||||
files = [
|
||||
'/data/something00.txt', '/more/data', '/home',
|
||||
' #/wsfoobar',
|
||||
'\tstart/whitespace',
|
||||
'/whitespace/end\t',
|
||||
]
|
||||
|
||||
def evaluate(filename):
|
||||
patterns = []
|
||||
load_exclude_file(open(filename, "rt"), patterns)
|
||||
matcher = PatternMatcher(fallback=True)
|
||||
matcher.add_inclexcl(patterns)
|
||||
return [path for path in files if matcher.match(path)]
|
||||
|
||||
exclfile = tmpdir.join("exclude.txt")
|
||||
|
||||
with exclfile.open("wt") as fh:
|
||||
fh.write("\n".join(lines))
|
||||
|
||||
assert evaluate(str(exclfile)) == (files if expected is None else expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
|
||||
# "None" means all files, i.e. none excluded
|
||||
([], [], 0),
|
||||
(["# Comment only"], [], 0),
|
||||
(["- *"], [], 1),
|
||||
(["+fm:*/something00.txt",
|
||||
"-/data"], [], 2),
|
||||
(["R /"], ["/"], 0),
|
||||
(["R /",
|
||||
"# comment"], ["/"], 0),
|
||||
(["# comment",
|
||||
"- /data",
|
||||
"R /home"], ["/home"], 1),
|
||||
])
|
||||
def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
|
||||
def evaluate(filename):
|
||||
roots = []
|
||||
inclexclpatterns = []
|
||||
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
|
||||
return roots, len(inclexclpatterns)
|
||||
patternfile = tmpdir.join("patterns.txt")
|
||||
|
||||
with patternfile.open("wt") as fh:
|
||||
fh.write("\n".join(lines))
|
||||
|
||||
roots, numpatterns = evaluate(str(patternfile))
|
||||
assert roots == expected_roots
|
||||
assert numpatterns == expected_numpatterns
|
||||
|
||||
|
||||
def test_switch_patterns_style():
|
||||
patterns = """\
|
||||
+0_initial_default_is_shell
|
||||
p fm
|
||||
+1_fnmatch
|
||||
P re
|
||||
+2_regex
|
||||
+3_more_regex
|
||||
P pp
|
||||
+4_pathprefix
|
||||
p fm
|
||||
p sh
|
||||
+5_shell
|
||||
"""
|
||||
pattern_file = io.StringIO(patterns)
|
||||
roots, patterns = [], []
|
||||
load_pattern_file(pattern_file, roots, patterns)
|
||||
assert len(patterns) == 6
|
||||
assert isinstance(patterns[0].val, ShellPattern)
|
||||
assert isinstance(patterns[1].val, FnmatchPattern)
|
||||
assert isinstance(patterns[2].val, RegexPattern)
|
||||
assert isinstance(patterns[3].val, RegexPattern)
|
||||
assert isinstance(patterns[4].val, PathPrefixPattern)
|
||||
assert isinstance(patterns[5].val, ShellPattern)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lines", [
|
||||
(["X /data"]), # illegal pattern type prefix
|
||||
(["/data"]), # need a pattern type prefix
|
||||
])
|
||||
def test_load_invalid_patterns_from_file(tmpdir, lines):
|
||||
patternfile = tmpdir.join("patterns.txt")
|
||||
with patternfile.open("wt") as fh:
|
||||
fh.write("\n".join(lines))
|
||||
filename = str(patternfile)
|
||||
with pytest.raises(argparse.ArgumentTypeError):
|
||||
roots = []
|
||||
inclexclpatterns = []
|
||||
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lines, expected", [
|
||||
# "None" means all files, i.e. none excluded
|
||||
([], None),
|
||||
(["# Comment only"], None),
|
||||
(["- *"], []),
|
||||
# default match type is sh: for patterns -> * doesn't match a /
|
||||
(["-*/something0?.txt"],
|
||||
['/data', '/data/something00.txt', '/data/subdir/something01.txt',
|
||||
'/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
(["-fm:*/something00.txt"],
|
||||
['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
(["-fm:*/something0?.txt"],
|
||||
["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
(["+/*/something0?.txt",
|
||||
"-/data"],
|
||||
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
(["+fm:*/something00.txt",
|
||||
"-/data"],
|
||||
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
# include /home/leo and exclude the rest of /home:
|
||||
(["+/home/leo",
|
||||
"-/home/*"],
|
||||
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
|
||||
# wrong order, /home/leo is already excluded by -/home/*:
|
||||
(["-/home/*",
|
||||
"+/home/leo"],
|
||||
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
|
||||
(["+fm:/home/leo",
|
||||
"-/home/"],
|
||||
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
|
||||
])
|
||||
def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
|
||||
files = [
|
||||
'/data', '/data/something00.txt', '/data/subdir/something01.txt',
|
||||
'/home', '/home/leo', '/home/leo/t', '/home/other'
|
||||
]
|
||||
|
||||
def evaluate(filename):
|
||||
matcher = PatternMatcher(fallback=True)
|
||||
roots = []
|
||||
inclexclpatterns = []
|
||||
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
|
||||
matcher.add_inclexcl(inclexclpatterns)
|
||||
return [path for path in files if matcher.match(path)]
|
||||
|
||||
patternfile = tmpdir.join("patterns.txt")
|
||||
|
||||
with patternfile.open("wt") as fh:
|
||||
fh.write("\n".join(lines))
|
||||
|
||||
assert evaluate(str(patternfile)) == (files if expected is None else expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, cls", [
|
||||
("", FnmatchPattern),
|
||||
|
||||
# Default style
|
||||
("*", FnmatchPattern),
|
||||
("/data/*", FnmatchPattern),
|
||||
|
||||
# fnmatch style
|
||||
("fm:", FnmatchPattern),
|
||||
("fm:*", FnmatchPattern),
|
||||
("fm:/data/*", FnmatchPattern),
|
||||
("fm:fm:/data/*", FnmatchPattern),
|
||||
|
||||
# Regular expression
|
||||
("re:", RegexPattern),
|
||||
("re:.*", RegexPattern),
|
||||
("re:^/something/", RegexPattern),
|
||||
("re:re:^/something/", RegexPattern),
|
||||
|
||||
# Path prefix
|
||||
("pp:", PathPrefixPattern),
|
||||
("pp:/", PathPrefixPattern),
|
||||
("pp:/data/", PathPrefixPattern),
|
||||
("pp:pp:/data/", PathPrefixPattern),
|
||||
|
||||
# Shell-pattern style
|
||||
("sh:", ShellPattern),
|
||||
("sh:*", ShellPattern),
|
||||
("sh:/data/*", ShellPattern),
|
||||
("sh:sh:/data/*", ShellPattern),
|
||||
])
|
||||
def test_parse_pattern(pattern, cls):
|
||||
assert isinstance(parse_pattern(pattern), cls)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
|
||||
def test_parse_pattern_error(pattern):
|
||||
with pytest.raises(ValueError):
|
||||
parse_pattern(pattern)
|
||||
|
||||
|
||||
def test_pattern_matcher():
|
||||
pm = PatternMatcher()
|
||||
|
||||
assert pm.fallback is None
|
||||
|
||||
for i in ["", "foo", "bar"]:
|
||||
assert pm.match(i) is None
|
||||
|
||||
# add extra entries to aid in testing
|
||||
for target in ["A", "B", "Empty", "FileNotFound"]:
|
||||
pm.is_include_cmd[target] = target
|
||||
|
||||
pm.add([RegexPattern("^a")], "A")
|
||||
pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
|
||||
pm.add([RegexPattern("^$")], "Empty")
|
||||
pm.fallback = "FileNotFound"
|
||||
|
||||
assert pm.match("") == "Empty"
|
||||
assert pm.match("aaa") == "A"
|
||||
assert pm.match("bbb") == "B"
|
||||
assert pm.match("ccc") == "FileNotFound"
|
||||
assert pm.match("xyz") == "FileNotFound"
|
||||
assert pm.match("z") == "B"
|
||||
|
||||
assert PatternMatcher(fallback="hey!").fallback == "hey!"
|
||||
|
||||
|
||||
def test_chunkerparams():
|
||||
assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095)
|
||||
assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095)
|
||||
|
|
467
src/borg/testsuite/patterns.py
Normal file
467
src/borg/testsuite/patterns.py
Normal file
|
@ -0,0 +1,467 @@
|
|||
import argparse
|
||||
import io
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from ..patterns import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
|
||||
from ..patterns import load_exclude_file, load_pattern_file
|
||||
from ..patterns import parse_pattern, PatternMatcher
|
||||
|
||||
|
||||
def check_patterns(files, pattern, expected):
|
||||
"""Utility for testing patterns.
|
||||
"""
|
||||
assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths"
|
||||
|
||||
matched = [f for f in files if pattern.match(f)]
|
||||
|
||||
assert matched == (files if expected is None else expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("/", []),
|
||||
("/home", ["/home"]),
|
||||
("/home///", ["/home"]),
|
||||
("/./home", ["/home"]),
|
||||
("/home/user", ["/home/user"]),
|
||||
("/home/user2", ["/home/user2"]),
|
||||
("/home/user/.bashrc", ["/home/user/.bashrc"]),
|
||||
])
|
||||
def test_patterns_full(pattern, expected):
|
||||
files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
|
||||
|
||||
check_patterns(files, PathFullPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("", []),
|
||||
("relative", []),
|
||||
("relative/path/", ["relative/path"]),
|
||||
("relative/path", ["relative/path"]),
|
||||
])
|
||||
def test_patterns_full_relative(pattern, expected):
|
||||
files = ["relative/path", "relative/path2", ]
|
||||
|
||||
check_patterns(files, PathFullPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("/", None),
|
||||
("/./", None),
|
||||
("", []),
|
||||
("/home/u", []),
|
||||
("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]),
|
||||
("/etc", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("///etc//////", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/srv", ["/srv/messages", "/srv/dmesg"]),
|
||||
])
|
||||
def test_patterns_prefix(pattern, expected):
|
||||
files = [
|
||||
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
|
||||
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
|
||||
]
|
||||
|
||||
check_patterns(files, PathPrefixPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("", []),
|
||||
("foo", []),
|
||||
("relative", ["relative/path1", "relative/two"]),
|
||||
("more", ["more/relative"]),
|
||||
])
|
||||
def test_patterns_prefix_relative(pattern, expected):
|
||||
files = ["relative/path1", "relative/two", "more/relative"]
|
||||
|
||||
check_patterns(files, PathPrefixPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("/*", None),
|
||||
("/./*", None),
|
||||
("*", None),
|
||||
("*/*", None),
|
||||
("*///*", None),
|
||||
("/home/u", []),
|
||||
("/home/*",
|
||||
["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
|
||||
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
|
||||
("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
|
||||
("/etc/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
|
||||
("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/srv*", ["/srv/messages", "/srv/dmesg"]),
|
||||
("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
|
||||
])
|
||||
def test_patterns_fnmatch(pattern, expected):
|
||||
files = [
|
||||
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
|
||||
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
|
||||
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
|
||||
]
|
||||
|
||||
check_patterns(files, FnmatchPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("*", None),
|
||||
("**/*", None),
|
||||
("/**/*", None),
|
||||
("/./*", None),
|
||||
("*/*", None),
|
||||
("*///*", None),
|
||||
("/home/u", []),
|
||||
("/home/*",
|
||||
["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
|
||||
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
|
||||
("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
|
||||
("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("*/.pr????e", []),
|
||||
("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
|
||||
("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]),
|
||||
("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]),
|
||||
("/srv/*", ["/srv/messages", "/srv/dmesg"]),
|
||||
("/srv2/**", ["/srv2", "/srv2/blafasel"]),
|
||||
("/srv2/**/", ["/srv2/blafasel"]),
|
||||
("/home/*/.thumbnails", ["/home/foo/.thumbnails"]),
|
||||
("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]),
|
||||
])
|
||||
def test_patterns_shell(pattern, expected):
|
||||
files = [
|
||||
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
|
||||
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg",
|
||||
"/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
|
||||
]
|
||||
|
||||
check_patterns(files, ShellPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("", None),
|
||||
(".*", None),
|
||||
("^/", None),
|
||||
("^abc$", []),
|
||||
("^[^/]", []),
|
||||
("^(?!/srv|/foo|/opt)",
|
||||
["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile",
|
||||
"/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]),
|
||||
])
|
||||
def test_patterns_regex(pattern, expected):
|
||||
files = [
|
||||
'/srv/data', '/foo/bar', '/home',
|
||||
'/home/user/.profile', '/home/user/.bashrc',
|
||||
'/home/user2/.profile', '/home/user2/public_html/index.html',
|
||||
'/opt/log/messages.txt', '/opt/log/dmesg.txt',
|
||||
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
|
||||
]
|
||||
|
||||
obj = RegexPattern(pattern)
|
||||
assert str(obj) == pattern
|
||||
assert obj.pattern == pattern
|
||||
|
||||
check_patterns(files, obj, expected)
|
||||
|
||||
|
||||
def test_regex_pattern():
|
||||
# The forward slash must match the platform-specific path separator
|
||||
assert RegexPattern("^/$").match("/")
|
||||
assert RegexPattern("^/$").match(os.path.sep)
|
||||
assert not RegexPattern(r"^\\$").match("/")
|
||||
|
||||
|
||||
def use_normalized_unicode():
|
||||
return sys.platform in ("darwin",)
|
||||
|
||||
|
||||
def _make_test_patterns(pattern):
|
||||
return [PathPrefixPattern(pattern),
|
||||
FnmatchPattern(pattern),
|
||||
RegexPattern("^{}/foo$".format(pattern)),
|
||||
ShellPattern(pattern),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}"))
|
||||
def test_composed_unicode_pattern(pattern):
|
||||
assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}"))
|
||||
def test_decomposed_unicode_pattern(pattern):
|
||||
assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode()
|
||||
assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1")))
|
||||
def test_invalid_unicode_pattern(pattern):
|
||||
assert not pattern.match("ba/foo")
|
||||
assert pattern.match(str(b"ba\x80/foo", "latin1"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lines, expected", [
|
||||
# "None" means all files, i.e. none excluded
|
||||
([], None),
|
||||
(["# Comment only"], None),
|
||||
(["*"], []),
|
||||
(["# Comment",
|
||||
"*/something00.txt",
|
||||
" *whitespace* ",
|
||||
# Whitespace before comment
|
||||
" #/ws*",
|
||||
# Empty line
|
||||
"",
|
||||
"# EOF"],
|
||||
["/more/data", "/home", " #/wsfoobar"]),
|
||||
(["re:.*"], []),
|
||||
(["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
|
||||
([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
|
||||
(["", "", "",
|
||||
"# This is a test with mixed pattern styles",
|
||||
# Case-insensitive pattern
|
||||
"re:(?i)BAR|ME$",
|
||||
"",
|
||||
"*whitespace*",
|
||||
"fm:*/something00*"],
|
||||
["/more/data"]),
|
||||
([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
|
||||
([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
|
||||
(["pp:./"], None),
|
||||
(["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
|
||||
(["pp:aaabbb"], None),
|
||||
(["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
|
||||
(["/nomatch", "/more/*"],
|
||||
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
|
||||
# the order of exclude patterns shouldn't matter
|
||||
(["/more/*", "/nomatch"],
|
||||
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
|
||||
])
|
||||
def test_exclude_patterns_from_file(tmpdir, lines, expected):
|
||||
files = [
|
||||
'/data/something00.txt', '/more/data', '/home',
|
||||
' #/wsfoobar',
|
||||
'\tstart/whitespace',
|
||||
'/whitespace/end\t',
|
||||
]
|
||||
|
||||
def evaluate(filename):
|
||||
patterns = []
|
||||
load_exclude_file(open(filename, "rt"), patterns)
|
||||
matcher = PatternMatcher(fallback=True)
|
||||
matcher.add_inclexcl(patterns)
|
||||
return [path for path in files if matcher.match(path)]
|
||||
|
||||
exclfile = tmpdir.join("exclude.txt")
|
||||
|
||||
with exclfile.open("wt") as fh:
|
||||
fh.write("\n".join(lines))
|
||||
|
||||
assert evaluate(str(exclfile)) == (files if expected is None else expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
|
||||
# "None" means all files, i.e. none excluded
|
||||
([], [], 0),
|
||||
(["# Comment only"], [], 0),
|
||||
(["- *"], [], 1),
|
||||
(["+fm:*/something00.txt",
|
||||
"-/data"], [], 2),
|
||||
(["R /"], ["/"], 0),
|
||||
(["R /",
|
||||
"# comment"], ["/"], 0),
|
||||
(["# comment",
|
||||
"- /data",
|
||||
"R /home"], ["/home"], 1),
|
||||
])
|
||||
def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
|
||||
def evaluate(filename):
|
||||
roots = []
|
||||
inclexclpatterns = []
|
||||
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
|
||||
return roots, len(inclexclpatterns)
|
||||
patternfile = tmpdir.join("patterns.txt")
|
||||
|
||||
with patternfile.open("wt") as fh:
|
||||
fh.write("\n".join(lines))
|
||||
|
||||
roots, numpatterns = evaluate(str(patternfile))
|
||||
assert roots == expected_roots
|
||||
assert numpatterns == expected_numpatterns
|
||||
|
||||
|
||||
def test_switch_patterns_style():
|
||||
patterns = """\
|
||||
+0_initial_default_is_shell
|
||||
p fm
|
||||
+1_fnmatch
|
||||
P re
|
||||
+2_regex
|
||||
+3_more_regex
|
||||
P pp
|
||||
+4_pathprefix
|
||||
p fm
|
||||
p sh
|
||||
+5_shell
|
||||
"""
|
||||
pattern_file = io.StringIO(patterns)
|
||||
roots, patterns = [], []
|
||||
load_pattern_file(pattern_file, roots, patterns)
|
||||
assert len(patterns) == 6
|
||||
assert isinstance(patterns[0].val, ShellPattern)
|
||||
assert isinstance(patterns[1].val, FnmatchPattern)
|
||||
assert isinstance(patterns[2].val, RegexPattern)
|
||||
assert isinstance(patterns[3].val, RegexPattern)
|
||||
assert isinstance(patterns[4].val, PathPrefixPattern)
|
||||
assert isinstance(patterns[5].val, ShellPattern)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lines", [
|
||||
(["X /data"]), # illegal pattern type prefix
|
||||
(["/data"]), # need a pattern type prefix
|
||||
])
|
||||
def test_load_invalid_patterns_from_file(tmpdir, lines):
|
||||
patternfile = tmpdir.join("patterns.txt")
|
||||
with patternfile.open("wt") as fh:
|
||||
fh.write("\n".join(lines))
|
||||
filename = str(patternfile)
|
||||
with pytest.raises(argparse.ArgumentTypeError):
|
||||
roots = []
|
||||
inclexclpatterns = []
|
||||
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lines, expected", [
|
||||
# "None" means all files, i.e. none excluded
|
||||
([], None),
|
||||
(["# Comment only"], None),
|
||||
(["- *"], []),
|
||||
# default match type is sh: for patterns -> * doesn't match a /
|
||||
(["-*/something0?.txt"],
|
||||
['/data', '/data/something00.txt', '/data/subdir/something01.txt',
|
||||
'/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
(["-fm:*/something00.txt"],
|
||||
['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
(["-fm:*/something0?.txt"],
|
||||
["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
(["+/*/something0?.txt",
|
||||
"-/data"],
|
||||
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
(["+fm:*/something00.txt",
|
||||
"-/data"],
|
||||
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
|
||||
# include /home/leo and exclude the rest of /home:
|
||||
(["+/home/leo",
|
||||
"-/home/*"],
|
||||
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
|
||||
# wrong order, /home/leo is already excluded by -/home/*:
|
||||
(["-/home/*",
|
||||
"+/home/leo"],
|
||||
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
|
||||
(["+fm:/home/leo",
|
||||
"-/home/"],
|
||||
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
|
||||
])
|
||||
def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
|
||||
files = [
|
||||
'/data', '/data/something00.txt', '/data/subdir/something01.txt',
|
||||
'/home', '/home/leo', '/home/leo/t', '/home/other'
|
||||
]
|
||||
|
||||
def evaluate(filename):
|
||||
matcher = PatternMatcher(fallback=True)
|
||||
roots = []
|
||||
inclexclpatterns = []
|
||||
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
|
||||
matcher.add_inclexcl(inclexclpatterns)
|
||||
return [path for path in files if matcher.match(path)]
|
||||
|
||||
patternfile = tmpdir.join("patterns.txt")
|
||||
|
||||
with patternfile.open("wt") as fh:
|
||||
fh.write("\n".join(lines))
|
||||
|
||||
assert evaluate(str(patternfile)) == (files if expected is None else expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, cls", [
|
||||
("", FnmatchPattern),
|
||||
|
||||
# Default style
|
||||
("*", FnmatchPattern),
|
||||
("/data/*", FnmatchPattern),
|
||||
|
||||
# fnmatch style
|
||||
("fm:", FnmatchPattern),
|
||||
("fm:*", FnmatchPattern),
|
||||
("fm:/data/*", FnmatchPattern),
|
||||
("fm:fm:/data/*", FnmatchPattern),
|
||||
|
||||
# Regular expression
|
||||
("re:", RegexPattern),
|
||||
("re:.*", RegexPattern),
|
||||
("re:^/something/", RegexPattern),
|
||||
("re:re:^/something/", RegexPattern),
|
||||
|
||||
# Path prefix
|
||||
("pp:", PathPrefixPattern),
|
||||
("pp:/", PathPrefixPattern),
|
||||
("pp:/data/", PathPrefixPattern),
|
||||
("pp:pp:/data/", PathPrefixPattern),
|
||||
|
||||
# Shell-pattern style
|
||||
("sh:", ShellPattern),
|
||||
("sh:*", ShellPattern),
|
||||
("sh:/data/*", ShellPattern),
|
||||
("sh:sh:/data/*", ShellPattern),
|
||||
])
|
||||
def test_parse_pattern(pattern, cls):
|
||||
assert isinstance(parse_pattern(pattern), cls)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
|
||||
def test_parse_pattern_error(pattern):
|
||||
with pytest.raises(ValueError):
|
||||
parse_pattern(pattern)
|
||||
|
||||
|
||||
def test_pattern_matcher():
|
||||
pm = PatternMatcher()
|
||||
|
||||
assert pm.fallback is None
|
||||
|
||||
for i in ["", "foo", "bar"]:
|
||||
assert pm.match(i) is None
|
||||
|
||||
# add extra entries to aid in testing
|
||||
for target in ["A", "B", "Empty", "FileNotFound"]:
|
||||
pm.is_include_cmd[target] = target
|
||||
|
||||
pm.add([RegexPattern("^a")], "A")
|
||||
pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
|
||||
pm.add([RegexPattern("^$")], "Empty")
|
||||
pm.fallback = "FileNotFound"
|
||||
|
||||
assert pm.match("") == "Empty"
|
||||
assert pm.match("aaa") == "A"
|
||||
assert pm.match("bbb") == "B"
|
||||
assert pm.match("ccc") == "FileNotFound"
|
||||
assert pm.match("xyz") == "FileNotFound"
|
||||
assert pm.match("z") == "B"
|
||||
|
||||
assert PatternMatcher(fallback="hey!").fallback == "hey!"
|
Loading…
Reference in a new issue