optimize PathFullPattern matching for O(1) time

For a borg create run using a patterns file with 15.000 PathFullPattern excludes
that excluded almost all files in the input data set:
- before this optimization: ~60s
- after this optimization: ~1s
This commit is contained in:
Thomas Waldmann 2017-03-24 06:06:02 +01:00
parent ebd928795e
commit 93feb75411
1 changed files with 22 additions and 3 deletions

View File

@ -451,23 +451,42 @@ class PatternMatcher:
# Value to return from match function when none of the patterns match. # Value to return from match function when none of the patterns match.
self.fallback = fallback self.fallback = fallback
# optimizations
self._path_full_patterns = {} # full path -> return value
def empty(self): def empty(self):
return not len(self._items) return not len(self._items) and not len(self._path_full_patterns)
def _add(self, pattern, value):
if isinstance(pattern, PathFullPattern):
key = pattern.pattern # full, normalized path
self._path_full_patterns[key] = value
else:
self._items.append((pattern, value))
def add(self, patterns, value): def add(self, patterns, value):
"""Add list of patterns to internal list. The given value is returned from the match function when one of the """Add list of patterns to internal list. The given value is returned from the match function when one of the
given patterns matches. given patterns matches.
""" """
self._items.extend((i, value) for i in patterns) for pattern in patterns:
self._add(pattern, value)
def add_inclexcl(self, patterns): def add_inclexcl(self, patterns):
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from """Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
the match function when one of the given patterns matches. the match function when one of the given patterns matches.
""" """
self._items.extend(patterns) for pattern, pattern_type in patterns:
self._add(pattern, pattern_type)
def match(self, path): def match(self, path):
path = normalize_path(path) path = normalize_path(path)
# do a fast lookup for full path matches (note: we do not count such matches):
non_existent = object()
value = self._path_full_patterns.get(path, non_existent)
if value is not non_existent:
# we have a full path match!
return value
# this is the slow way, if we have many patterns in self._items:
for (pattern, value) in self._items: for (pattern, value) in self._items:
if pattern.match(path, normalize=False): if pattern.match(path, normalize=False):
return value return value