1
0
Fork 0
mirror of https://github.com/borgbackup/borg.git synced 2025-02-21 21:57:36 +00:00

Merge pull request #2471 from enkore/issue/2469

Move patterns to module
This commit is contained in:
TW 2017-05-01 23:37:19 +02:00 committed by GitHub
commit 7c9a57bee5
7 changed files with 865 additions and 851 deletions

View file

@ -36,7 +36,7 @@
from .helpers import bin_to_hex
from .helpers import safe_ns
from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
from .helpers import PathPrefixPattern, FnmatchPattern, IECommand
from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
from .item import Item, ArchiveItem
from .key import key_factory
from .platform import acl_get, acl_set, set_flags, get_flags, swidth

View file

@ -51,15 +51,15 @@
from .helpers import hardlinkable
from .helpers import StableDict
from .helpers import check_extension_modules
from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
from .helpers import log_multi
from .helpers import PatternMatcher
from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
from .helpers import ErrorIgnoringTextIOWrapper
from .helpers import ProgressIndicatorPercent
from .helpers import basic_json_data, json_print
from .helpers import replace_placeholders
from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
from .patterns import PatternMatcher
from .item import Item
from .key import key_creator, tam_required_file, tam_required, RepoKey, PassphraseKey
from .keymanager import KeyManager

View file

@ -18,14 +18,11 @@
import textwrap
import threading
import time
import unicodedata
import uuid
from binascii import hexlify
from collections import namedtuple, deque, abc, Counter
from datetime import datetime, timezone, timedelta
from enum import Enum
from fnmatch import translate
from functools import wraps, partial, lru_cache
from functools import partial, lru_cache
from itertools import islice
from operator import attrgetter
from string import Formatter
@ -42,7 +39,6 @@
from . import chunker
from . import crypto
from . import hashindex
from . import shellpattern
from .constants import * # NOQA
@ -389,387 +385,6 @@ def parse_timestamp(timestamp):
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)
def parse_patternfile_line(line, roots, ie_commands, fallback):
"""Parse a pattern-file line and act depending on which command it represents."""
ie_command = parse_inclexcl_command(line, fallback=fallback)
if ie_command.cmd is IECommand.RootPath:
roots.append(ie_command.val)
elif ie_command.cmd is IECommand.PatternStyle:
fallback = ie_command.val
else:
# it is some kind of include/exclude command
ie_commands.append(ie_command)
return fallback
def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
if fallback is None:
fallback = ShellPattern # ShellPattern is defined later in this module
for line in clean_lines(fileobj):
fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
def load_exclude_file(fileobj, patterns):
for patternstr in clean_lines(fileobj):
patterns.append(parse_exclude_pattern(patternstr))
class ArgparsePatternAction(argparse.Action):
def __init__(self, nargs=1, **kw):
super().__init__(nargs=nargs, **kw)
def __call__(self, parser, args, values, option_string=None):
parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
class ArgparsePatternFileAction(argparse.Action):
def __init__(self, nargs=1, **kw):
super().__init__(nargs=nargs, **kw)
def __call__(self, parser, args, values, option_string=None):
"""Load and parse patterns from a file.
Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
"""
filename = values[0]
with open(filename) as f:
self.parse(f, args)
def parse(self, fobj, args):
load_pattern_file(fobj, args.paths, args.patterns)
class ArgparseExcludeFileAction(ArgparsePatternFileAction):
def parse(self, fobj, args):
load_exclude_file(fobj, args.patterns)
class PatternMatcher:
"""Represents a collection of pattern objects to match paths against.
*fallback* is a boolean value that *match()* returns if no matching patterns are found.
"""
def __init__(self, fallback=None):
self._items = []
# Value to return from match function when none of the patterns match.
self.fallback = fallback
# optimizations
self._path_full_patterns = {} # full path -> return value
# indicates whether the last match() call ended on a pattern for which
# we should recurse into any matching folder. Will be set to True or
# False when calling match().
self.recurse_dir = None
# whether to recurse into directories when no match is found
# TODO: allow modification as a config option?
self.recurse_dir_default = True
self.include_patterns = []
# TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
self.is_include_cmd = {
IECommand.Exclude: False,
IECommand.ExcludeNoRecurse: False,
IECommand.Include: True
}
def empty(self):
return not len(self._items) and not len(self._path_full_patterns)
def _add(self, pattern, cmd):
"""*cmd* is an IECommand value.
"""
if isinstance(pattern, PathFullPattern):
key = pattern.pattern # full, normalized path
self._path_full_patterns[key] = cmd
else:
self._items.append((pattern, cmd))
def add(self, patterns, cmd):
"""Add list of patterns to internal list. *cmd* indicates whether the
pattern is an include/exclude pattern, and whether recursion should be
done on excluded folders.
"""
for pattern in patterns:
self._add(pattern, cmd)
def add_includepaths(self, include_paths):
"""Used to add inclusion-paths from args.paths (from commandline).
"""
include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
self.add(include_patterns, IECommand.Include)
self.fallback = not include_patterns
self.include_patterns = include_patterns
def get_unmatched_include_patterns(self):
"Note that this only returns patterns added via *add_includepaths*."
return [p for p in self.include_patterns if p.match_count == 0]
def add_inclexcl(self, patterns):
"""Add list of patterns (of type CmdTuple) to internal list.
"""
for pattern, cmd in patterns:
self._add(pattern, cmd)
def match(self, path):
"""Return True or False depending on whether *path* is matched.
If no match is found among the patterns in this matcher, then the value
in self.fallback is returned (defaults to None).
"""
path = normalize_path(path)
# do a fast lookup for full path matches (note: we do not count such matches):
non_existent = object()
value = self._path_full_patterns.get(path, non_existent)
if value is not non_existent:
# we have a full path match!
# TODO: get from pattern; don't hard-code
self.recurse_dir = True
return value
# this is the slow way, if we have many patterns in self._items:
for (pattern, cmd) in self._items:
if pattern.match(path, normalize=False):
self.recurse_dir = pattern.recurse_dir
return self.is_include_cmd[cmd]
# by default we will recurse if there is no match
self.recurse_dir = self.recurse_dir_default
return self.fallback
def normalize_path(path):
"""normalize paths for MacOS (but do nothing on other platforms)"""
# HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
# Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path
class PatternBase:
"""Shared logic for inclusion/exclusion patterns.
"""
PREFIX = NotImplemented
def __init__(self, pattern, recurse_dir=False):
self.pattern_orig = pattern
self.match_count = 0
pattern = normalize_path(pattern)
self._prepare(pattern)
self.recurse_dir = recurse_dir
def match(self, path, normalize=True):
"""Return a boolean indicating whether *path* is matched by this pattern.
If normalize is True (default), the path will get normalized using normalize_path(),
otherwise it is assumed that it already is normalized using that function.
"""
if normalize:
path = normalize_path(path)
matches = self._match(path)
if matches:
self.match_count += 1
return matches
def __repr__(self):
return '%s(%s)' % (type(self), self.pattern)
def __str__(self):
return self.pattern_orig
def _prepare(self, pattern):
"Should set the value of self.pattern"
raise NotImplementedError
def _match(self, path):
raise NotImplementedError
class PathFullPattern(PatternBase):
"""Full match of a path."""
PREFIX = "pf"
def _prepare(self, pattern):
self.pattern = os.path.normpath(pattern)
def _match(self, path):
return path == self.pattern
# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
# separator to the end of the path before matching.
class PathPrefixPattern(PatternBase):
"""Literal files or directories listed on the command line
for some operations (e.g. extract, but not create).
If a directory is specified, all paths that start with that
path match as well. A trailing slash makes no difference.
"""
PREFIX = "pp"
def _prepare(self, pattern):
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
def _match(self, path):
return (path + os.path.sep).startswith(self.pattern)
class FnmatchPattern(PatternBase):
"""Shell glob patterns to exclude. A trailing slash means to
exclude the contents of a directory, but not the directory itself.
"""
PREFIX = "fm"
def _prepare(self, pattern):
if pattern.endswith(os.path.sep):
pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
else:
pattern = os.path.normpath(pattern) + os.path.sep + '*'
self.pattern = pattern
# fnmatch and re.match both cache compiled regular expressions.
# Nevertheless, this is about 10 times faster.
self.regex = re.compile(translate(self.pattern))
def _match(self, path):
return (self.regex.match(path + os.path.sep) is not None)
class ShellPattern(PatternBase):
"""Shell glob patterns to exclude. A trailing slash means to
exclude the contents of a directory, but not the directory itself.
"""
PREFIX = "sh"
def _prepare(self, pattern):
sep = os.path.sep
if pattern.endswith(sep):
pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
else:
pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
self.pattern = pattern
self.regex = re.compile(shellpattern.translate(self.pattern))
def _match(self, path):
return (self.regex.match(path + os.path.sep) is not None)
class RegexPattern(PatternBase):
"""Regular expression to exclude.
"""
PREFIX = "re"
def _prepare(self, pattern):
self.pattern = pattern
self.regex = re.compile(pattern)
def _match(self, path):
# Normalize path separators
if os.path.sep != '/':
path = path.replace(os.path.sep, '/')
return (self.regex.search(path) is not None)
_PATTERN_CLASSES = set([
FnmatchPattern,
PathFullPattern,
PathPrefixPattern,
RegexPattern,
ShellPattern,
])
_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
CmdTuple = namedtuple('CmdTuple', 'val cmd')
class IECommand(Enum):
"""A command that an InclExcl file line can represent.
"""
RootPath = 1
PatternStyle = 2
Include = 3
Exclude = 4
ExcludeNoRecurse = 5
def get_pattern_class(prefix):
try:
return _PATTERN_CLASS_BY_PREFIX[prefix]
except KeyError:
raise ValueError("Unknown pattern style: {}".format(prefix)) from None
def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
(style, pattern) = (pattern[:2], pattern[3:])
cls = get_pattern_class(style)
else:
cls = fallback
return cls(pattern, recurse_dir)
def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
epattern_obj = parse_pattern(pattern_str, fallback)
return CmdTuple(epattern_obj, IECommand.Exclude)
def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
"""Read a --patterns-from command from string and return a CmdTuple object."""
cmd_prefix_map = {
'-': IECommand.Exclude,
'!': IECommand.ExcludeNoRecurse,
'+': IECommand.Include,
'R': IECommand.RootPath,
'r': IECommand.RootPath,
'P': IECommand.PatternStyle,
'p': IECommand.PatternStyle,
}
try:
cmd = cmd_prefix_map[cmd_line_str[0]]
# remaining text on command-line following the command character
remainder_str = cmd_line_str[1:].lstrip()
if not remainder_str:
raise ValueError("Missing pattern/information!")
except (IndexError, KeyError, ValueError):
raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
if cmd is IECommand.RootPath:
# TODO: validate string?
val = remainder_str
elif cmd is IECommand.PatternStyle:
# then remainder_str is something like 're' or 'sh'
try:
val = get_pattern_class(remainder_str)
except ValueError:
raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
else:
# determine recurse_dir based on command type
recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
val = parse_pattern(remainder_str, fallback, recurse_dir)
return CmdTuple(val, cmd)
def timestamp(s):
"""Convert a --timestamp=s argument to a datetime object"""
try:

392
src/borg/patterns.py Normal file
View file

@ -0,0 +1,392 @@
import argparse
import fnmatch
import os.path
import re
import sys
import unicodedata
from collections import namedtuple
from enum import Enum
from . import shellpattern
from .helpers import clean_lines
def parse_patternfile_line(line, roots, ie_commands, fallback):
"""Parse a pattern-file line and act depending on which command it represents."""
ie_command = parse_inclexcl_command(line, fallback=fallback)
if ie_command.cmd is IECommand.RootPath:
roots.append(ie_command.val)
elif ie_command.cmd is IECommand.PatternStyle:
fallback = ie_command.val
else:
# it is some kind of include/exclude command
ie_commands.append(ie_command)
return fallback
def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
if fallback is None:
fallback = ShellPattern # ShellPattern is defined later in this module
for line in clean_lines(fileobj):
fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
def load_exclude_file(fileobj, patterns):
for patternstr in clean_lines(fileobj):
patterns.append(parse_exclude_pattern(patternstr))
class ArgparsePatternAction(argparse.Action):
def __init__(self, nargs=1, **kw):
super().__init__(nargs=nargs, **kw)
def __call__(self, parser, args, values, option_string=None):
parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
class ArgparsePatternFileAction(argparse.Action):
def __init__(self, nargs=1, **kw):
super().__init__(nargs=nargs, **kw)
def __call__(self, parser, args, values, option_string=None):
"""Load and parse patterns from a file.
Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
"""
filename = values[0]
with open(filename) as f:
self.parse(f, args)
def parse(self, fobj, args):
load_pattern_file(fobj, args.paths, args.patterns)
class ArgparseExcludeFileAction(ArgparsePatternFileAction):
def parse(self, fobj, args):
load_exclude_file(fobj, args.patterns)
class PatternMatcher:
"""Represents a collection of pattern objects to match paths against.
*fallback* is a boolean value that *match()* returns if no matching patterns are found.
"""
def __init__(self, fallback=None):
self._items = []
# Value to return from match function when none of the patterns match.
self.fallback = fallback
# optimizations
self._path_full_patterns = {} # full path -> return value
# indicates whether the last match() call ended on a pattern for which
# we should recurse into any matching folder. Will be set to True or
# False when calling match().
self.recurse_dir = None
# whether to recurse into directories when no match is found
# TODO: allow modification as a config option?
self.recurse_dir_default = True
self.include_patterns = []
# TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
self.is_include_cmd = {
IECommand.Exclude: False,
IECommand.ExcludeNoRecurse: False,
IECommand.Include: True
}
def empty(self):
return not len(self._items) and not len(self._path_full_patterns)
def _add(self, pattern, cmd):
"""*cmd* is an IECommand value.
"""
if isinstance(pattern, PathFullPattern):
key = pattern.pattern # full, normalized path
self._path_full_patterns[key] = cmd
else:
self._items.append((pattern, cmd))
def add(self, patterns, cmd):
"""Add list of patterns to internal list. *cmd* indicates whether the
pattern is an include/exclude pattern, and whether recursion should be
done on excluded folders.
"""
for pattern in patterns:
self._add(pattern, cmd)
def add_includepaths(self, include_paths):
"""Used to add inclusion-paths from args.paths (from commandline).
"""
include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
self.add(include_patterns, IECommand.Include)
self.fallback = not include_patterns
self.include_patterns = include_patterns
def get_unmatched_include_patterns(self):
"Note that this only returns patterns added via *add_includepaths*."
return [p for p in self.include_patterns if p.match_count == 0]
def add_inclexcl(self, patterns):
"""Add list of patterns (of type CmdTuple) to internal list.
"""
for pattern, cmd in patterns:
self._add(pattern, cmd)
def match(self, path):
"""Return True or False depending on whether *path* is matched.
If no match is found among the patterns in this matcher, then the value
in self.fallback is returned (defaults to None).
"""
path = normalize_path(path)
# do a fast lookup for full path matches (note: we do not count such matches):
non_existent = object()
value = self._path_full_patterns.get(path, non_existent)
if value is not non_existent:
# we have a full path match!
# TODO: get from pattern; don't hard-code
self.recurse_dir = True
return value
# this is the slow way, if we have many patterns in self._items:
for (pattern, cmd) in self._items:
if pattern.match(path, normalize=False):
self.recurse_dir = pattern.recurse_dir
return self.is_include_cmd[cmd]
# by default we will recurse if there is no match
self.recurse_dir = self.recurse_dir_default
return self.fallback
def normalize_path(path):
"""normalize paths for MacOS (but do nothing on other platforms)"""
# HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
# Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path
class PatternBase:
"""Shared logic for inclusion/exclusion patterns.
"""
PREFIX = NotImplemented
def __init__(self, pattern, recurse_dir=False):
self.pattern_orig = pattern
self.match_count = 0
pattern = normalize_path(pattern)
self._prepare(pattern)
self.recurse_dir = recurse_dir
def match(self, path, normalize=True):
"""Return a boolean indicating whether *path* is matched by this pattern.
If normalize is True (default), the path will get normalized using normalize_path(),
otherwise it is assumed that it already is normalized using that function.
"""
if normalize:
path = normalize_path(path)
matches = self._match(path)
if matches:
self.match_count += 1
return matches
def __repr__(self):
return '%s(%s)' % (type(self), self.pattern)
def __str__(self):
return self.pattern_orig
def _prepare(self, pattern):
"Should set the value of self.pattern"
raise NotImplementedError
def _match(self, path):
raise NotImplementedError
class PathFullPattern(PatternBase):
"""Full match of a path."""
PREFIX = "pf"
def _prepare(self, pattern):
self.pattern = os.path.normpath(pattern)
def _match(self, path):
return path == self.pattern
# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
# separator to the end of the path before matching.
class PathPrefixPattern(PatternBase):
"""Literal files or directories listed on the command line
for some operations (e.g. extract, but not create).
If a directory is specified, all paths that start with that
path match as well. A trailing slash makes no difference.
"""
PREFIX = "pp"
def _prepare(self, pattern):
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
def _match(self, path):
return (path + os.path.sep).startswith(self.pattern)
class FnmatchPattern(PatternBase):
"""Shell glob patterns to exclude. A trailing slash means to
exclude the contents of a directory, but not the directory itself.
"""
PREFIX = "fm"
def _prepare(self, pattern):
if pattern.endswith(os.path.sep):
pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
else:
pattern = os.path.normpath(pattern) + os.path.sep + '*'
self.pattern = pattern
# fnmatch and re.match both cache compiled regular expressions.
# Nevertheless, this is about 10 times faster.
self.regex = re.compile(fnmatch.translate(self.pattern))
def _match(self, path):
return (self.regex.match(path + os.path.sep) is not None)
class ShellPattern(PatternBase):
"""Shell glob patterns to exclude. A trailing slash means to
exclude the contents of a directory, but not the directory itself.
"""
PREFIX = "sh"
def _prepare(self, pattern):
sep = os.path.sep
if pattern.endswith(sep):
pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
else:
pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
self.pattern = pattern
self.regex = re.compile(shellpattern.translate(self.pattern))
def _match(self, path):
return (self.regex.match(path + os.path.sep) is not None)
class RegexPattern(PatternBase):
"""Regular expression to exclude.
"""
PREFIX = "re"
def _prepare(self, pattern):
self.pattern = pattern
self.regex = re.compile(pattern)
def _match(self, path):
# Normalize path separators
if os.path.sep != '/':
path = path.replace(os.path.sep, '/')
return (self.regex.search(path) is not None)
_PATTERN_CLASSES = {
FnmatchPattern,
PathFullPattern,
PathPrefixPattern,
RegexPattern,
ShellPattern,
}
_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
CmdTuple = namedtuple('CmdTuple', 'val cmd')
class IECommand(Enum):
"""A command that an InclExcl file line can represent.
"""
RootPath = 1
PatternStyle = 2
Include = 3
Exclude = 4
ExcludeNoRecurse = 5
def get_pattern_class(prefix):
try:
return _PATTERN_CLASS_BY_PREFIX[prefix]
except KeyError:
raise ValueError("Unknown pattern style: {}".format(prefix)) from None
def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
(style, pattern) = (pattern[:2], pattern[3:])
cls = get_pattern_class(style)
else:
cls = fallback
return cls(pattern, recurse_dir)
def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
"""Read pattern from string and return an instance of the appropriate implementation class.
"""
epattern_obj = parse_pattern(pattern_str, fallback)
return CmdTuple(epattern_obj, IECommand.Exclude)
def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
"""Read a --patterns-from command from string and return a CmdTuple object."""
cmd_prefix_map = {
'-': IECommand.Exclude,
'!': IECommand.ExcludeNoRecurse,
'+': IECommand.Include,
'R': IECommand.RootPath,
'r': IECommand.RootPath,
'P': IECommand.PatternStyle,
'p': IECommand.PatternStyle,
}
try:
cmd = cmd_prefix_map[cmd_line_str[0]]
# remaining text on command-line following the command character
remainder_str = cmd_line_str[1:].lstrip()
if not remainder_str:
raise ValueError("Missing pattern/information!")
except (IndexError, KeyError, ValueError):
raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
if cmd is IECommand.RootPath:
# TODO: validate string?
val = remainder_str
elif cmd is IECommand.PatternStyle:
# then remainder_str is something like 're' or 'sh'
try:
val = get_pattern_class(remainder_str)
except ValueError:
raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
else:
# determine recurse_dir based on command type
recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
val = parse_pattern(remainder_str, fallback, recurse_dir)
return CmdTuple(val, cmd)

View file

@ -33,12 +33,12 @@
from ..cache import Cache
from ..constants import * # NOQA
from ..crypto import bytes_to_long, num_aes_blocks
from ..helpers import PatternMatcher, parse_pattern, Location, get_security_dir
from ..helpers import Location, get_security_dir
from ..helpers import Manifest
from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
from ..helpers import bin_to_hex
from ..helpers import IECommand
from ..helpers import MAX_S
from ..patterns import IECommand, PatternMatcher, parse_pattern
from ..item import Item
from ..key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
from ..keymanager import RepoIdMismatch, NotABorgKeyFile

View file

@ -23,9 +23,6 @@
from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
from ..helpers import load_exclude_file, load_pattern_file
from ..helpers import parse_pattern, PatternMatcher
from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
from ..helpers import swidth_slice
from ..helpers import chunkit
from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS
@ -244,463 +241,6 @@ def test(self):
)
def check_patterns(files, pattern, expected):
"""Utility for testing patterns.
"""
assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths"
matched = [f for f in files if pattern.match(f)]
assert matched == (files if expected is None else expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("/", []),
("/home", ["/home"]),
("/home///", ["/home"]),
("/./home", ["/home"]),
("/home/user", ["/home/user"]),
("/home/user2", ["/home/user2"]),
("/home/user/.bashrc", ["/home/user/.bashrc"]),
])
def test_patterns_full(pattern, expected):
files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
check_patterns(files, PathFullPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("", []),
("relative", []),
("relative/path/", ["relative/path"]),
("relative/path", ["relative/path"]),
])
def test_patterns_full_relative(pattern, expected):
files = ["relative/path", "relative/path2", ]
check_patterns(files, PathFullPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("/", None),
("/./", None),
("", []),
("/home/u", []),
("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]),
("/etc", ["/etc/server/config", "/etc/server/hosts"]),
("///etc//////", ["/etc/server/config", "/etc/server/hosts"]),
("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
("/srv", ["/srv/messages", "/srv/dmesg"]),
])
def test_patterns_prefix(pattern, expected):
files = [
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
]
check_patterns(files, PathPrefixPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("", []),
("foo", []),
("relative", ["relative/path1", "relative/two"]),
("more", ["more/relative"]),
])
def test_patterns_prefix_relative(pattern, expected):
files = ["relative/path1", "relative/two", "more/relative"]
check_patterns(files, PathPrefixPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("/*", None),
("/./*", None),
("*", None),
("*/*", None),
("*///*", None),
("/home/u", []),
("/home/*",
["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
("/etc/*", ["/etc/server/config", "/etc/server/hosts"]),
("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
("/srv*", ["/srv/messages", "/srv/dmesg"]),
("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
])
def test_patterns_fnmatch(pattern, expected):
files = [
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
]
check_patterns(files, FnmatchPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("*", None),
("**/*", None),
("/**/*", None),
("/./*", None),
("*/*", None),
("*///*", None),
("/home/u", []),
("/home/*",
["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]),
("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]),
("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]),
("*/.pr????e", []),
("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]),
("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]),
("/srv/*", ["/srv/messages", "/srv/dmesg"]),
("/srv2/**", ["/srv2", "/srv2/blafasel"]),
("/srv2/**/", ["/srv2/blafasel"]),
("/home/*/.thumbnails", ["/home/foo/.thumbnails"]),
("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]),
])
def test_patterns_shell(pattern, expected):
files = [
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg",
"/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
]
check_patterns(files, ShellPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("", None),
(".*", None),
("^/", None),
("^abc$", []),
("^[^/]", []),
("^(?!/srv|/foo|/opt)",
["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile",
"/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]),
])
def test_patterns_regex(pattern, expected):
files = [
'/srv/data', '/foo/bar', '/home',
'/home/user/.profile', '/home/user/.bashrc',
'/home/user2/.profile', '/home/user2/public_html/index.html',
'/opt/log/messages.txt', '/opt/log/dmesg.txt',
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
]
obj = RegexPattern(pattern)
assert str(obj) == pattern
assert obj.pattern == pattern
check_patterns(files, obj, expected)
def test_regex_pattern():
# The forward slash must match the platform-specific path separator
assert RegexPattern("^/$").match("/")
assert RegexPattern("^/$").match(os.path.sep)
assert not RegexPattern(r"^\\$").match("/")
def use_normalized_unicode():
return sys.platform in ("darwin",)
def _make_test_patterns(pattern):
return [PathPrefixPattern(pattern),
FnmatchPattern(pattern),
RegexPattern("^{}/foo$".format(pattern)),
ShellPattern(pattern),
]
@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}"))
def test_composed_unicode_pattern(pattern):
assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode()
@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}"))
def test_decomposed_unicode_pattern(pattern):
assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode()
assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo")
@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1")))
def test_invalid_unicode_pattern(pattern):
assert not pattern.match("ba/foo")
assert pattern.match(str(b"ba\x80/foo", "latin1"))
@pytest.mark.parametrize("lines, expected", [
# "None" means all files, i.e. none excluded
([], None),
(["# Comment only"], None),
(["*"], []),
(["# Comment",
"*/something00.txt",
" *whitespace* ",
# Whitespace before comment
" #/ws*",
# Empty line
"",
"# EOF"],
["/more/data", "/home", " #/wsfoobar"]),
(["re:.*"], []),
(["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
(["", "", "",
"# This is a test with mixed pattern styles",
# Case-insensitive pattern
"re:(?i)BAR|ME$",
"",
"*whitespace*",
"fm:*/something00*"],
["/more/data"]),
([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
(["pp:./"], None),
(["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
(["pp:aaabbb"], None),
(["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
(["/nomatch", "/more/*"],
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
# the order of exclude patterns shouldn't matter
(["/more/*", "/nomatch"],
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
])
def test_exclude_patterns_from_file(tmpdir, lines, expected):
files = [
'/data/something00.txt', '/more/data', '/home',
' #/wsfoobar',
'\tstart/whitespace',
'/whitespace/end\t',
]
def evaluate(filename):
patterns = []
load_exclude_file(open(filename, "rt"), patterns)
matcher = PatternMatcher(fallback=True)
matcher.add_inclexcl(patterns)
return [path for path in files if matcher.match(path)]
exclfile = tmpdir.join("exclude.txt")
with exclfile.open("wt") as fh:
fh.write("\n".join(lines))
assert evaluate(str(exclfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
# "None" means all files, i.e. none excluded
([], [], 0),
(["# Comment only"], [], 0),
(["- *"], [], 1),
(["+fm:*/something00.txt",
"-/data"], [], 2),
(["R /"], ["/"], 0),
(["R /",
"# comment"], ["/"], 0),
(["# comment",
"- /data",
"R /home"], ["/home"], 1),
])
def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
def evaluate(filename):
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
return roots, len(inclexclpatterns)
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
roots, numpatterns = evaluate(str(patternfile))
assert roots == expected_roots
assert numpatterns == expected_numpatterns
def test_switch_patterns_style():
patterns = """\
+0_initial_default_is_shell
p fm
+1_fnmatch
P re
+2_regex
+3_more_regex
P pp
+4_pathprefix
p fm
p sh
+5_shell
"""
pattern_file = io.StringIO(patterns)
roots, patterns = [], []
load_pattern_file(pattern_file, roots, patterns)
assert len(patterns) == 6
assert isinstance(patterns[0].val, ShellPattern)
assert isinstance(patterns[1].val, FnmatchPattern)
assert isinstance(patterns[2].val, RegexPattern)
assert isinstance(patterns[3].val, RegexPattern)
assert isinstance(patterns[4].val, PathPrefixPattern)
assert isinstance(patterns[5].val, ShellPattern)
@pytest.mark.parametrize("lines", [
(["X /data"]), # illegal pattern type prefix
(["/data"]), # need a pattern type prefix
])
def test_load_invalid_patterns_from_file(tmpdir, lines):
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
filename = str(patternfile)
with pytest.raises(argparse.ArgumentTypeError):
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
@pytest.mark.parametrize("lines, expected", [
# "None" means all files, i.e. none excluded
([], None),
(["# Comment only"], None),
(["- *"], []),
# default match type is sh: for patterns -> * doesn't match a /
(["-*/something0?.txt"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something00.txt"],
['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something0?.txt"],
["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+/*/something0?.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+fm:*/something00.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
# include /home/leo and exclude the rest of /home:
(["+/home/leo",
"-/home/*"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
# wrong order, /home/leo is already excluded by -/home/*:
(["-/home/*",
"+/home/leo"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
(["+fm:/home/leo",
"-/home/"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
])
def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
files = [
'/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other'
]
def evaluate(filename):
matcher = PatternMatcher(fallback=True)
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
matcher.add_inclexcl(inclexclpatterns)
return [path for path in files if matcher.match(path)]
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
assert evaluate(str(patternfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("pattern, cls", [
("", FnmatchPattern),
# Default style
("*", FnmatchPattern),
("/data/*", FnmatchPattern),
# fnmatch style
("fm:", FnmatchPattern),
("fm:*", FnmatchPattern),
("fm:/data/*", FnmatchPattern),
("fm:fm:/data/*", FnmatchPattern),
# Regular expression
("re:", RegexPattern),
("re:.*", RegexPattern),
("re:^/something/", RegexPattern),
("re:re:^/something/", RegexPattern),
# Path prefix
("pp:", PathPrefixPattern),
("pp:/", PathPrefixPattern),
("pp:/data/", PathPrefixPattern),
("pp:pp:/data/", PathPrefixPattern),
# Shell-pattern style
("sh:", ShellPattern),
("sh:*", ShellPattern),
("sh:/data/*", ShellPattern),
("sh:sh:/data/*", ShellPattern),
])
def test_parse_pattern(pattern, cls):
assert isinstance(parse_pattern(pattern), cls)
@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
def test_parse_pattern_error(pattern):
with pytest.raises(ValueError):
parse_pattern(pattern)
def test_pattern_matcher():
pm = PatternMatcher()
assert pm.fallback is None
for i in ["", "foo", "bar"]:
assert pm.match(i) is None
# add extra entries to aid in testing
for target in ["A", "B", "Empty", "FileNotFound"]:
pm.is_include_cmd[target] = target
pm.add([RegexPattern("^a")], "A")
pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
pm.add([RegexPattern("^$")], "Empty")
pm.fallback = "FileNotFound"
assert pm.match("") == "Empty"
assert pm.match("aaa") == "A"
assert pm.match("bbb") == "B"
assert pm.match("ccc") == "FileNotFound"
assert pm.match("xyz") == "FileNotFound"
assert pm.match("z") == "B"
assert PatternMatcher(fallback="hey!").fallback == "hey!"
def test_chunkerparams():
assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095)
assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095)

View file

@ -0,0 +1,467 @@
import argparse
import io
import os.path
import sys
import pytest
from ..patterns import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
from ..patterns import load_exclude_file, load_pattern_file
from ..patterns import parse_pattern, PatternMatcher
def check_patterns(files, pattern, expected):
"""Utility for testing patterns.
"""
assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths"
matched = [f for f in files if pattern.match(f)]
assert matched == (files if expected is None else expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("/", []),
("/home", ["/home"]),
("/home///", ["/home"]),
("/./home", ["/home"]),
("/home/user", ["/home/user"]),
("/home/user2", ["/home/user2"]),
("/home/user/.bashrc", ["/home/user/.bashrc"]),
])
def test_patterns_full(pattern, expected):
files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
check_patterns(files, PathFullPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("", []),
("relative", []),
("relative/path/", ["relative/path"]),
("relative/path", ["relative/path"]),
])
def test_patterns_full_relative(pattern, expected):
files = ["relative/path", "relative/path2", ]
check_patterns(files, PathFullPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("/", None),
("/./", None),
("", []),
("/home/u", []),
("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]),
("/etc", ["/etc/server/config", "/etc/server/hosts"]),
("///etc//////", ["/etc/server/config", "/etc/server/hosts"]),
("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
("/srv", ["/srv/messages", "/srv/dmesg"]),
])
def test_patterns_prefix(pattern, expected):
files = [
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
]
check_patterns(files, PathPrefixPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("", []),
("foo", []),
("relative", ["relative/path1", "relative/two"]),
("more", ["more/relative"]),
])
def test_patterns_prefix_relative(pattern, expected):
files = ["relative/path1", "relative/two", "more/relative"]
check_patterns(files, PathPrefixPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("/*", None),
("/./*", None),
("*", None),
("*/*", None),
("*///*", None),
("/home/u", []),
("/home/*",
["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
("/etc/*", ["/etc/server/config", "/etc/server/hosts"]),
("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
("/srv*", ["/srv/messages", "/srv/dmesg"]),
("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
])
def test_patterns_fnmatch(pattern, expected):
files = [
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
]
check_patterns(files, FnmatchPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("*", None),
("**/*", None),
("/**/*", None),
("/./*", None),
("*/*", None),
("*///*", None),
("/home/u", []),
("/home/*",
["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]),
("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]),
("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]),
("*/.pr????e", []),
("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]),
("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]),
("/srv/*", ["/srv/messages", "/srv/dmesg"]),
("/srv2/**", ["/srv2", "/srv2/blafasel"]),
("/srv2/**/", ["/srv2/blafasel"]),
("/home/*/.thumbnails", ["/home/foo/.thumbnails"]),
("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]),
])
def test_patterns_shell(pattern, expected):
files = [
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg",
"/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
]
check_patterns(files, ShellPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("", None),
(".*", None),
("^/", None),
("^abc$", []),
("^[^/]", []),
("^(?!/srv|/foo|/opt)",
["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile",
"/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]),
])
def test_patterns_regex(pattern, expected):
files = [
'/srv/data', '/foo/bar', '/home',
'/home/user/.profile', '/home/user/.bashrc',
'/home/user2/.profile', '/home/user2/public_html/index.html',
'/opt/log/messages.txt', '/opt/log/dmesg.txt',
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
]
obj = RegexPattern(pattern)
assert str(obj) == pattern
assert obj.pattern == pattern
check_patterns(files, obj, expected)
def test_regex_pattern():
# The forward slash must match the platform-specific path separator
assert RegexPattern("^/$").match("/")
assert RegexPattern("^/$").match(os.path.sep)
assert not RegexPattern(r"^\\$").match("/")
def use_normalized_unicode():
return sys.platform in ("darwin",)
def _make_test_patterns(pattern):
return [PathPrefixPattern(pattern),
FnmatchPattern(pattern),
RegexPattern("^{}/foo$".format(pattern)),
ShellPattern(pattern),
]
@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}"))
def test_composed_unicode_pattern(pattern):
assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode()
@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}"))
def test_decomposed_unicode_pattern(pattern):
assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode()
assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo")
@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1")))
def test_invalid_unicode_pattern(pattern):
assert not pattern.match("ba/foo")
assert pattern.match(str(b"ba\x80/foo", "latin1"))
@pytest.mark.parametrize("lines, expected", [
# "None" means all files, i.e. none excluded
([], None),
(["# Comment only"], None),
(["*"], []),
(["# Comment",
"*/something00.txt",
" *whitespace* ",
# Whitespace before comment
" #/ws*",
# Empty line
"",
"# EOF"],
["/more/data", "/home", " #/wsfoobar"]),
(["re:.*"], []),
(["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
(["", "", "",
"# This is a test with mixed pattern styles",
# Case-insensitive pattern
"re:(?i)BAR|ME$",
"",
"*whitespace*",
"fm:*/something00*"],
["/more/data"]),
([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
(["pp:./"], None),
(["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
(["pp:aaabbb"], None),
(["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
(["/nomatch", "/more/*"],
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
# the order of exclude patterns shouldn't matter
(["/more/*", "/nomatch"],
['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
])
def test_exclude_patterns_from_file(tmpdir, lines, expected):
files = [
'/data/something00.txt', '/more/data', '/home',
' #/wsfoobar',
'\tstart/whitespace',
'/whitespace/end\t',
]
def evaluate(filename):
patterns = []
load_exclude_file(open(filename, "rt"), patterns)
matcher = PatternMatcher(fallback=True)
matcher.add_inclexcl(patterns)
return [path for path in files if matcher.match(path)]
exclfile = tmpdir.join("exclude.txt")
with exclfile.open("wt") as fh:
fh.write("\n".join(lines))
assert evaluate(str(exclfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
# "None" means all files, i.e. none excluded
([], [], 0),
(["# Comment only"], [], 0),
(["- *"], [], 1),
(["+fm:*/something00.txt",
"-/data"], [], 2),
(["R /"], ["/"], 0),
(["R /",
"# comment"], ["/"], 0),
(["# comment",
"- /data",
"R /home"], ["/home"], 1),
])
def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
def evaluate(filename):
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
return roots, len(inclexclpatterns)
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
roots, numpatterns = evaluate(str(patternfile))
assert roots == expected_roots
assert numpatterns == expected_numpatterns
def test_switch_patterns_style():
patterns = """\
+0_initial_default_is_shell
p fm
+1_fnmatch
P re
+2_regex
+3_more_regex
P pp
+4_pathprefix
p fm
p sh
+5_shell
"""
pattern_file = io.StringIO(patterns)
roots, patterns = [], []
load_pattern_file(pattern_file, roots, patterns)
assert len(patterns) == 6
assert isinstance(patterns[0].val, ShellPattern)
assert isinstance(patterns[1].val, FnmatchPattern)
assert isinstance(patterns[2].val, RegexPattern)
assert isinstance(patterns[3].val, RegexPattern)
assert isinstance(patterns[4].val, PathPrefixPattern)
assert isinstance(patterns[5].val, ShellPattern)
@pytest.mark.parametrize("lines", [
(["X /data"]), # illegal pattern type prefix
(["/data"]), # need a pattern type prefix
])
def test_load_invalid_patterns_from_file(tmpdir, lines):
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
filename = str(patternfile)
with pytest.raises(argparse.ArgumentTypeError):
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
@pytest.mark.parametrize("lines, expected", [
# "None" means all files, i.e. none excluded
([], None),
(["# Comment only"], None),
(["- *"], []),
# default match type is sh: for patterns -> * doesn't match a /
(["-*/something0?.txt"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something00.txt"],
['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["-fm:*/something0?.txt"],
["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+/*/something0?.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
(["+fm:*/something00.txt",
"-/data"],
["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
# include /home/leo and exclude the rest of /home:
(["+/home/leo",
"-/home/*"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
# wrong order, /home/leo is already excluded by -/home/*:
(["-/home/*",
"+/home/leo"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
(["+fm:/home/leo",
"-/home/"],
['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
])
def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
files = [
'/data', '/data/something00.txt', '/data/subdir/something01.txt',
'/home', '/home/leo', '/home/leo/t', '/home/other'
]
def evaluate(filename):
matcher = PatternMatcher(fallback=True)
roots = []
inclexclpatterns = []
load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
matcher.add_inclexcl(inclexclpatterns)
return [path for path in files if matcher.match(path)]
patternfile = tmpdir.join("patterns.txt")
with patternfile.open("wt") as fh:
fh.write("\n".join(lines))
assert evaluate(str(patternfile)) == (files if expected is None else expected)
@pytest.mark.parametrize("pattern, cls", [
("", FnmatchPattern),
# Default style
("*", FnmatchPattern),
("/data/*", FnmatchPattern),
# fnmatch style
("fm:", FnmatchPattern),
("fm:*", FnmatchPattern),
("fm:/data/*", FnmatchPattern),
("fm:fm:/data/*", FnmatchPattern),
# Regular expression
("re:", RegexPattern),
("re:.*", RegexPattern),
("re:^/something/", RegexPattern),
("re:re:^/something/", RegexPattern),
# Path prefix
("pp:", PathPrefixPattern),
("pp:/", PathPrefixPattern),
("pp:/data/", PathPrefixPattern),
("pp:pp:/data/", PathPrefixPattern),
# Shell-pattern style
("sh:", ShellPattern),
("sh:*", ShellPattern),
("sh:/data/*", ShellPattern),
("sh:sh:/data/*", ShellPattern),
])
def test_parse_pattern(pattern, cls):
assert isinstance(parse_pattern(pattern), cls)
@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
def test_parse_pattern_error(pattern):
with pytest.raises(ValueError):
parse_pattern(pattern)
def test_pattern_matcher():
pm = PatternMatcher()
assert pm.fallback is None
for i in ["", "foo", "bar"]:
assert pm.match(i) is None
# add extra entries to aid in testing
for target in ["A", "B", "Empty", "FileNotFound"]:
pm.is_include_cmd[target] = target
pm.add([RegexPattern("^a")], "A")
pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
pm.add([RegexPattern("^$")], "Empty")
pm.fallback = "FileNotFound"
assert pm.match("") == "Empty"
assert pm.match("aaa") == "A"
assert pm.match("bbb") == "B"
assert pm.match("ccc") == "FileNotFound"
assert pm.match("xyz") == "FileNotFound"
assert pm.match("z") == "B"
assert PatternMatcher(fallback="hey!").fallback == "hey!"