mirror of https://github.com/borgbackup/borg.git
Merge pull request #2334 from ThomasWaldmann/precise-pattern
add PathFullPattern / optimization for it
This commit is contained in:
commit
f2e9e862d8
|
@ -1604,11 +1604,27 @@ class Archiver:
|
||||||
regular expression syntax is described in the `Python documentation for
|
regular expression syntax is described in the `Python documentation for
|
||||||
the re module <https://docs.python.org/3/library/re.html>`_.
|
the re module <https://docs.python.org/3/library/re.html>`_.
|
||||||
|
|
||||||
Prefix path, selector `pp:`
|
Path prefix, selector `pp:`
|
||||||
|
|
||||||
This pattern style is useful to match whole sub-directories. The pattern
|
This pattern style is useful to match whole sub-directories. The pattern
|
||||||
`pp:/data/bar` matches `/data/bar` and everything therein.
|
`pp:/data/bar` matches `/data/bar` and everything therein.
|
||||||
|
|
||||||
|
Path full-match, selector `pf:`
|
||||||
|
|
||||||
|
This pattern style is useful to match whole paths.
|
||||||
|
This is kind of a pseudo pattern as it can not have any variable or
|
||||||
|
unspecified parts - the full, precise path must be given.
|
||||||
|
`pf:/data/foo.txt` matches `/data/foo.txt` only.
|
||||||
|
|
||||||
|
Implementation note: this is implemented via very time-efficient O(1)
|
||||||
|
hashtable lookups (this means you can have huge amounts of such patterns
|
||||||
|
without impacting performance much).
|
||||||
|
Due to that, this kind of pattern does not respect any context or order.
|
||||||
|
If you use such a pattern to include a file, it will always be included
|
||||||
|
(if the directory recursion encounters it).
|
||||||
|
Other include/exclude patterns that would normally match will be ignored.
|
||||||
|
Same logic applies for exclude.
|
||||||
|
|
||||||
Exclusions can be passed via the command line option `--exclude`. When used
|
Exclusions can be passed via the command line option `--exclude`. When used
|
||||||
from within a shell the patterns should be quoted to protect them from
|
from within a shell the patterns should be quoted to protect them from
|
||||||
expansion.
|
expansion.
|
||||||
|
|
|
@ -451,23 +451,42 @@ class PatternMatcher:
|
||||||
# Value to return from match function when none of the patterns match.
|
# Value to return from match function when none of the patterns match.
|
||||||
self.fallback = fallback
|
self.fallback = fallback
|
||||||
|
|
||||||
|
# optimizations
|
||||||
|
self._path_full_patterns = {} # full path -> return value
|
||||||
|
|
||||||
def empty(self):
|
def empty(self):
|
||||||
return not len(self._items)
|
return not len(self._items) and not len(self._path_full_patterns)
|
||||||
|
|
||||||
|
def _add(self, pattern, value):
|
||||||
|
if isinstance(pattern, PathFullPattern):
|
||||||
|
key = pattern.pattern # full, normalized path
|
||||||
|
self._path_full_patterns[key] = value
|
||||||
|
else:
|
||||||
|
self._items.append((pattern, value))
|
||||||
|
|
||||||
def add(self, patterns, value):
|
def add(self, patterns, value):
|
||||||
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
|
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
|
||||||
given patterns matches.
|
given patterns matches.
|
||||||
"""
|
"""
|
||||||
self._items.extend((i, value) for i in patterns)
|
for pattern in patterns:
|
||||||
|
self._add(pattern, value)
|
||||||
|
|
||||||
def add_inclexcl(self, patterns):
|
def add_inclexcl(self, patterns):
|
||||||
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
|
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
|
||||||
the match function when one of the given patterns matches.
|
the match function when one of the given patterns matches.
|
||||||
"""
|
"""
|
||||||
self._items.extend(patterns)
|
for pattern, pattern_type in patterns:
|
||||||
|
self._add(pattern, pattern_type)
|
||||||
|
|
||||||
def match(self, path):
|
def match(self, path):
|
||||||
path = normalize_path(path)
|
path = normalize_path(path)
|
||||||
|
# do a fast lookup for full path matches (note: we do not count such matches):
|
||||||
|
non_existent = object()
|
||||||
|
value = self._path_full_patterns.get(path, non_existent)
|
||||||
|
if value is not non_existent:
|
||||||
|
# we have a full path match!
|
||||||
|
return value
|
||||||
|
# this is the slow way, if we have many patterns in self._items:
|
||||||
for (pattern, value) in self._items:
|
for (pattern, value) in self._items:
|
||||||
if pattern.match(path, normalize=False):
|
if pattern.match(path, normalize=False):
|
||||||
return value
|
return value
|
||||||
|
@ -518,6 +537,17 @@ class PatternBase:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class PathFullPattern(PatternBase):
|
||||||
|
"""Full match of a path."""
|
||||||
|
PREFIX = "pf"
|
||||||
|
|
||||||
|
def _prepare(self, pattern):
|
||||||
|
self.pattern = os.path.normpath(pattern)
|
||||||
|
|
||||||
|
def _match(self, path):
|
||||||
|
return path == self.pattern
|
||||||
|
|
||||||
|
|
||||||
# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
|
# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
|
||||||
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
|
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
|
||||||
# separator to the end of the path before matching.
|
# separator to the end of the path before matching.
|
||||||
|
@ -600,6 +630,7 @@ class RegexPattern(PatternBase):
|
||||||
|
|
||||||
_PATTERN_STYLES = set([
|
_PATTERN_STYLES = set([
|
||||||
FnmatchPattern,
|
FnmatchPattern,
|
||||||
|
PathFullPattern,
|
||||||
PathPrefixPattern,
|
PathPrefixPattern,
|
||||||
RegexPattern,
|
RegexPattern,
|
||||||
ShellPattern,
|
ShellPattern,
|
||||||
|
|
|
@ -25,7 +25,8 @@ from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams,
|
||||||
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
|
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
|
||||||
from ..helpers import load_exclude_file, load_pattern_file
|
from ..helpers import load_exclude_file, load_pattern_file
|
||||||
from ..helpers import CompressionSpec, ComprSpec, CompressionDecider1, CompressionDecider2
|
from ..helpers import CompressionSpec, ComprSpec, CompressionDecider1, CompressionDecider2
|
||||||
from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
|
from ..helpers import parse_pattern, PatternMatcher
|
||||||
|
from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
|
||||||
from ..helpers import swidth_slice
|
from ..helpers import swidth_slice
|
||||||
from ..helpers import chunkit
|
from ..helpers import chunkit
|
||||||
from ..helpers import safe_ns, safe_s
|
from ..helpers import safe_ns, safe_s
|
||||||
|
@ -254,6 +255,35 @@ def check_patterns(files, pattern, expected):
|
||||||
assert matched == (files if expected is None else expected)
|
assert matched == (files if expected is None else expected)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("pattern, expected", [
|
||||||
|
# "None" means all files, i.e. all match the given pattern
|
||||||
|
("/", []),
|
||||||
|
("/home", ["/home"]),
|
||||||
|
("/home///", ["/home"]),
|
||||||
|
("/./home", ["/home"]),
|
||||||
|
("/home/user", ["/home/user"]),
|
||||||
|
("/home/user2", ["/home/user2"]),
|
||||||
|
("/home/user/.bashrc", ["/home/user/.bashrc"]),
|
||||||
|
])
|
||||||
|
def test_patterns_full(pattern, expected):
|
||||||
|
files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
|
||||||
|
|
||||||
|
check_patterns(files, PathFullPattern(pattern), expected)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("pattern, expected", [
|
||||||
|
# "None" means all files, i.e. all match the given pattern
|
||||||
|
("", []),
|
||||||
|
("relative", []),
|
||||||
|
("relative/path/", ["relative/path"]),
|
||||||
|
("relative/path", ["relative/path"]),
|
||||||
|
])
|
||||||
|
def test_patterns_full_relative(pattern, expected):
|
||||||
|
files = ["relative/path", "relative/path2", ]
|
||||||
|
|
||||||
|
check_patterns(files, PathFullPattern(pattern), expected)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("pattern, expected", [
|
@pytest.mark.parametrize("pattern, expected", [
|
||||||
# "None" means all files, i.e. all match the given pattern
|
# "None" means all files, i.e. all match the given pattern
|
||||||
("/", None),
|
("/", None),
|
||||||
|
|
Loading…
Reference in New Issue