Merge pull request #2334 from ThomasWaldmann/precise-pattern

add PathFullPattern / optimization for it
This commit is contained in:
TW 2017-03-27 01:15:24 +02:00 committed by GitHub
commit f2e9e862d8
3 changed files with 82 additions and 5 deletions

View File

@ -1604,11 +1604,27 @@ class Archiver:
regular expression syntax is described in the `Python documentation for
the re module <https://docs.python.org/3/library/re.html>`_.
Prefix path, selector `pp:`
Path prefix, selector `pp:`
This pattern style is useful to match whole sub-directories. The pattern
`pp:/data/bar` matches `/data/bar` and everything therein.
Path full-match, selector `pf:`
This pattern style is useful to match whole paths.
This is kind of a pseudo pattern as it can not have any variable or
unspecified parts - the full, precise path must be given.
`pf:/data/foo.txt` matches `/data/foo.txt` only.
Implementation note: this is implemented via very time-efficient O(1)
hashtable lookups (this means you can have huge amounts of such patterns
without impacting performance much).
Due to that, this kind of pattern does not respect any context or order.
If you use such a pattern to include a file, it will always be included
(if the directory recursion encounters it).
Other include/exclude patterns that would normally match will be ignored.
Same logic applies for exclude.
Exclusions can be passed via the command line option `--exclude`. When used
from within a shell the patterns should be quoted to protect them from
expansion.

View File

@ -451,23 +451,42 @@ class PatternMatcher:
# Value to return from match function when none of the patterns match.
self.fallback = fallback
# optimizations
self._path_full_patterns = {} # full path -> return value
def empty(self):
return not len(self._items)
return not len(self._items) and not len(self._path_full_patterns)
def _add(self, pattern, value):
if isinstance(pattern, PathFullPattern):
key = pattern.pattern # full, normalized path
self._path_full_patterns[key] = value
else:
self._items.append((pattern, value))
def add(self, patterns, value):
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
given patterns matches.
"""
self._items.extend((i, value) for i in patterns)
for pattern in patterns:
self._add(pattern, value)
def add_inclexcl(self, patterns):
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
the match function when one of the given patterns matches.
"""
self._items.extend(patterns)
for pattern, pattern_type in patterns:
self._add(pattern, pattern_type)
def match(self, path):
path = normalize_path(path)
# do a fast lookup for full path matches (note: we do not count such matches):
non_existent = object()
value = self._path_full_patterns.get(path, non_existent)
if value is not non_existent:
# we have a full path match!
return value
# this is the slow way, if we have many patterns in self._items:
for (pattern, value) in self._items:
if pattern.match(path, normalize=False):
return value
@ -518,6 +537,17 @@ class PatternBase:
raise NotImplementedError
class PathFullPattern(PatternBase):
"""Full match of a path."""
PREFIX = "pf"
def _prepare(self, pattern):
self.pattern = os.path.normpath(pattern)
def _match(self, path):
return path == self.pattern
# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
# separator to the end of the path before matching.
@ -600,6 +630,7 @@ class RegexPattern(PatternBase):
_PATTERN_STYLES = set([
FnmatchPattern,
PathFullPattern,
PathPrefixPattern,
RegexPattern,
ShellPattern,

View File

@ -25,7 +25,8 @@ from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams,
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
from ..helpers import load_exclude_file, load_pattern_file
from ..helpers import CompressionSpec, ComprSpec, CompressionDecider1, CompressionDecider2
from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
from ..helpers import parse_pattern, PatternMatcher
from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
from ..helpers import swidth_slice
from ..helpers import chunkit
from ..helpers import safe_ns, safe_s
@ -254,6 +255,35 @@ def check_patterns(files, pattern, expected):
assert matched == (files if expected is None else expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("/", []),
("/home", ["/home"]),
("/home///", ["/home"]),
("/./home", ["/home"]),
("/home/user", ["/home/user"]),
("/home/user2", ["/home/user2"]),
("/home/user/.bashrc", ["/home/user/.bashrc"]),
])
def test_patterns_full(pattern, expected):
files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
check_patterns(files, PathFullPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("", []),
("relative", []),
("relative/path/", ["relative/path"]),
("relative/path", ["relative/path"]),
])
def test_patterns_full_relative(pattern, expected):
files = ["relative/path", "relative/path2", ]
check_patterns(files, PathFullPattern(pattern), expected)
@pytest.mark.parametrize("pattern, expected", [
# "None" means all files, i.e. all match the given pattern
("/", None),