optimize PathFullPattern matching for O(1) time

For a borg create run using a patterns file with 15.000 PathFullPattern excludes
that excluded almost all files in the input data set:
- before this optimization: ~60s
- after this optimization: ~1s
This commit is contained in:
Thomas Waldmann 2017-03-24 06:06:02 +01:00
parent ebd928795e
commit 93feb75411
1 changed files with 22 additions and 3 deletions

View File

@ -451,23 +451,42 @@ class PatternMatcher:
# Value to return from match function when none of the patterns match.
self.fallback = fallback
# optimizations
self._path_full_patterns = {} # full path -> return value
def empty(self):
return not len(self._items)
return not len(self._items) and not len(self._path_full_patterns)
def _add(self, pattern, value):
if isinstance(pattern, PathFullPattern):
key = pattern.pattern # full, normalized path
self._path_full_patterns[key] = value
else:
self._items.append((pattern, value))
def add(self, patterns, value):
"""Add list of patterns to internal list. The given value is returned from the match function when one of the
given patterns matches.
"""
self._items.extend((i, value) for i in patterns)
for pattern in patterns:
self._add(pattern, value)
def add_inclexcl(self, patterns):
"""Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
the match function when one of the given patterns matches.
"""
self._items.extend(patterns)
for pattern, pattern_type in patterns:
self._add(pattern, pattern_type)
def match(self, path):
path = normalize_path(path)
# do a fast lookup for full path matches (note: we do not count such matches):
non_existent = object()
value = self._path_full_patterns.get(path, non_existent)
if value is not non_existent:
# we have a full path match!
return value
# this is the slow way, if we have many patterns in self._items:
for (pattern, value) in self._items:
if pattern.match(path, normalize=False):
return value