mirror of https://github.com/borgbackup/borg.git
Merge pull request #2334 from ThomasWaldmann/precise-pattern
add PathFullPattern / optimization for it
This commit is contained in:
commit
f2e9e862d8
|
@ -1604,11 +1604,27 @@ class Archiver:
|
|||
regular expression syntax is described in the `Python documentation for
|
||||
the re module <https://docs.python.org/3/library/re.html>`_.
|
||||
|
||||
Prefix path, selector `pp:`
|
||||
Path prefix, selector `pp:`
|
||||
|
||||
This pattern style is useful to match whole sub-directories. The pattern
|
||||
`pp:/data/bar` matches `/data/bar` and everything therein.
|
||||
|
||||
Path full-match, selector `pf:`
|
||||
|
||||
This pattern style is useful to match whole paths.
|
||||
This is kind of a pseudo pattern as it can not have any variable or
|
||||
unspecified parts - the full, precise path must be given.
|
||||
`pf:/data/foo.txt` matches `/data/foo.txt` only.
|
||||
|
||||
Implementation note: this is implemented via very time-efficient O(1)
|
||||
hashtable lookups (this means you can have huge amounts of such patterns
|
||||
without impacting performance much).
|
||||
Due to that, this kind of pattern does not respect any context or order.
|
||||
If you use such a pattern to include a file, it will always be included
|
||||
(if the directory recursion encounters it).
|
||||
Other include/exclude patterns that would normally match will be ignored.
|
||||
Same logic applies for exclude.
|
||||
|
||||
Exclusions can be passed via the command line option `--exclude`. When used
|
||||
from within a shell the patterns should be quoted to protect them from
|
||||
expansion.
|
||||
|
|
|
@ -451,23 +451,42 @@ class PatternMatcher:
|
|||
# Value to return from match function when none of the patterns match.
|
||||
self.fallback = fallback
|
||||
|
||||
# optimizations
|
||||
self._path_full_patterns = {} # full path -> return value
|
||||
|
||||
def empty(self):
|
||||
return not len(self._items)
|
||||
return not len(self._items) and not len(self._path_full_patterns)
|
||||
|
||||
def _add(self, pattern, value):
|
||||
if isinstance(pattern, PathFullPattern):
|
||||
key = pattern.pattern # full, normalized path
|
||||
self._path_full_patterns[key] = value
|
||||
else:
|
||||
self._items.append((pattern, value))
|
||||
|
||||
def add(self, patterns, value):
|
||||
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
|
||||
given patterns matches.
|
||||
"""
|
||||
self._items.extend((i, value) for i in patterns)
|
||||
for pattern in patterns:
|
||||
self._add(pattern, value)
|
||||
|
||||
def add_inclexcl(self, patterns):
|
||||
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
|
||||
the match function when one of the given patterns matches.
|
||||
"""
|
||||
self._items.extend(patterns)
|
||||
for pattern, pattern_type in patterns:
|
||||
self._add(pattern, pattern_type)
|
||||
|
||||
def match(self, path):
|
||||
path = normalize_path(path)
|
||||
# do a fast lookup for full path matches (note: we do not count such matches):
|
||||
non_existent = object()
|
||||
value = self._path_full_patterns.get(path, non_existent)
|
||||
if value is not non_existent:
|
||||
# we have a full path match!
|
||||
return value
|
||||
# this is the slow way, if we have many patterns in self._items:
|
||||
for (pattern, value) in self._items:
|
||||
if pattern.match(path, normalize=False):
|
||||
return value
|
||||
|
@ -518,6 +537,17 @@ class PatternBase:
|
|||
raise NotImplementedError
|
||||
|
||||
|
||||
class PathFullPattern(PatternBase):
|
||||
"""Full match of a path."""
|
||||
PREFIX = "pf"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
self.pattern = os.path.normpath(pattern)
|
||||
|
||||
def _match(self, path):
|
||||
return path == self.pattern
|
||||
|
||||
|
||||
# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
|
||||
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
|
||||
# separator to the end of the path before matching.
|
||||
|
@ -600,6 +630,7 @@ class RegexPattern(PatternBase):
|
|||
|
||||
_PATTERN_STYLES = set([
|
||||
FnmatchPattern,
|
||||
PathFullPattern,
|
||||
PathPrefixPattern,
|
||||
RegexPattern,
|
||||
ShellPattern,
|
||||
|
|
|
@ -25,7 +25,8 @@ from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams,
|
|||
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
|
||||
from ..helpers import load_exclude_file, load_pattern_file
|
||||
from ..helpers import CompressionSpec, ComprSpec, CompressionDecider1, CompressionDecider2
|
||||
from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
|
||||
from ..helpers import parse_pattern, PatternMatcher
|
||||
from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
|
||||
from ..helpers import swidth_slice
|
||||
from ..helpers import chunkit
|
||||
from ..helpers import safe_ns, safe_s
|
||||
|
@ -254,6 +255,35 @@ def check_patterns(files, pattern, expected):
|
|||
assert matched == (files if expected is None else expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("/", []),
|
||||
("/home", ["/home"]),
|
||||
("/home///", ["/home"]),
|
||||
("/./home", ["/home"]),
|
||||
("/home/user", ["/home/user"]),
|
||||
("/home/user2", ["/home/user2"]),
|
||||
("/home/user/.bashrc", ["/home/user/.bashrc"]),
|
||||
])
|
||||
def test_patterns_full(pattern, expected):
|
||||
files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
|
||||
|
||||
check_patterns(files, PathFullPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("", []),
|
||||
("relative", []),
|
||||
("relative/path/", ["relative/path"]),
|
||||
("relative/path", ["relative/path"]),
|
||||
])
|
||||
def test_patterns_full_relative(pattern, expected):
|
||||
files = ["relative/path", "relative/path2", ]
|
||||
|
||||
check_patterns(files, PathFullPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("/", None),
|
||||
|
|
Loading…
Reference in New Issue