mirror of https://github.com/borgbackup/borg.git
optimize PathFullPattern matching for O(1) time
For a borg create run using a patterns file with 15.000 PathFullPattern excludes that excluded almost all files in the input data set: - before this optimization: ~60s - after this optimization: ~1s
This commit is contained in:
parent
ebd928795e
commit
93feb75411
|
@ -451,23 +451,42 @@ class PatternMatcher:
|
|||
# Value to return from match function when none of the patterns match.
|
||||
self.fallback = fallback
|
||||
|
||||
# optimizations
|
||||
self._path_full_patterns = {} # full path -> return value
|
||||
|
||||
def empty(self):
|
||||
return not len(self._items)
|
||||
return not len(self._items) and not len(self._path_full_patterns)
|
||||
|
||||
def _add(self, pattern, value):
|
||||
if isinstance(pattern, PathFullPattern):
|
||||
key = pattern.pattern # full, normalized path
|
||||
self._path_full_patterns[key] = value
|
||||
else:
|
||||
self._items.append((pattern, value))
|
||||
|
||||
def add(self, patterns, value):
|
||||
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
|
||||
given patterns matches.
|
||||
"""
|
||||
self._items.extend((i, value) for i in patterns)
|
||||
for pattern in patterns:
|
||||
self._add(pattern, value)
|
||||
|
||||
def add_inclexcl(self, patterns):
|
||||
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
|
||||
the match function when one of the given patterns matches.
|
||||
"""
|
||||
self._items.extend(patterns)
|
||||
for pattern, pattern_type in patterns:
|
||||
self._add(pattern, pattern_type)
|
||||
|
||||
def match(self, path):
|
||||
path = normalize_path(path)
|
||||
# do a fast lookup for full path matches (note: we do not count such matches):
|
||||
non_existent = object()
|
||||
value = self._path_full_patterns.get(path, non_existent)
|
||||
if value is not non_existent:
|
||||
# we have a full path match!
|
||||
return value
|
||||
# this is the slow way, if we have many patterns in self._items:
|
||||
for (pattern, value) in self._items:
|
||||
if pattern.match(path, normalize=False):
|
||||
return value
|
||||
|
|
Loading…
Reference in New Issue