Reduce code duplication in inclusion/exclusion pattern logic

The two classes for applying inclusion and exclusion patterns contained unnecessarily duplicated logic. The introduction of a shared base class allows for easier reuse, especially considering that two more classes are going to be added in forthcoming changes (regular expressions and shell-style patterns).
2025-03-03 18:27:01 +00:00 · 2016-01-13 14:26:50 +01:00 · 2016-01-13 14:26:50 +01:00 · 93c9c49250
commit 93c9c49250
parent 5d40eba175
1 changed files with 39 additions and 34 deletions
--- a/borg/helpers.py
+++ b/borg/helpers.py
@ -270,12 +270,6 @@ def exclude_path(path, patterns):
    return False


-# For both IncludePattern and ExcludePattern, we require that
-# the pattern either match the whole path or an initial segment
-# of the path up to but not including a path separator.  To
-# unify the two cases, we add a path separator to the end of
-# the path before matching.
-
 def normalized(func):
    """ Decorator for the Pattern match methods, returning a wrapper that
    normalizes OSX paths to match the normalized pattern on OSX, and
@ -294,11 +288,8 @@ def normalize_wrapper(self, path):
        return func


-class IncludePattern:
-    """Literal files or directories listed on the command line
-    for some operations (e.g. extract, but not create).
-    If a directory is specified, all paths that start with that
-    path match as well.  A trailing slash makes no difference.
+class PatternBase:
+    """Shared logic for inclusion/exclusion patterns.
    """
    def __init__(self, pattern):
        self.pattern_orig = pattern
@ -307,13 +298,15 @@ def __init__(self, pattern):
        if sys.platform in ('darwin',):
            pattern = unicodedata.normalize("NFD", pattern)

-        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep
+        self._prepare(pattern)

    @normalized
    def match(self, path):
-        matches = (path+os.path.sep).startswith(self.pattern)
+        matches = self._match(path)
+
        if matches:
            self.match_count += 1
+
        return matches

    def __repr__(self):
@ -322,39 +315,51 @@ def __repr__(self):
    def __str__(self):
        return self.pattern_orig

+    def _prepare(self, pattern):
+        raise NotImplementedError

-class ExcludePattern(IncludePattern):
+    def _match(self, path):
+        raise NotImplementedError
+
+
+# For both IncludePattern and ExcludePattern, we require that
+# the pattern either match the whole path or an initial segment
+# of the path up to but not including a path separator.  To
+# unify the two cases, we add a path separator to the end of
+# the path before matching.
+
+
+class IncludePattern(PatternBase):
+    """Literal files or directories listed on the command line
+    for some operations (e.g. extract, but not create).
+    If a directory is specified, all paths that start with that
+    path match as well.  A trailing slash makes no difference.
+    """
+    def _prepare(self, pattern):
+        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
+
+    def _match(self, path):
+        return (path + os.path.sep).startswith(self.pattern)
+
+
+class ExcludePattern(PatternBase):
    """Shell glob patterns to exclude.  A trailing slash means to
    exclude the contents of a directory, but not the directory itself.
    """
-    def __init__(self, pattern):
-        self.pattern_orig = pattern
-        self.match_count = 0
-
+    def _prepare(self, pattern):
        if pattern.endswith(os.path.sep):
-            self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
+            pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
        else:
-            self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
+            pattern = os.path.normpath(pattern) + os.path.sep+'*'

-        if sys.platform in ('darwin',):
-            self.pattern = unicodedata.normalize("NFD", self.pattern)
+        self.pattern = pattern

        # fnmatch and re.match both cache compiled regular expressions.
        # Nevertheless, this is about 10 times faster.
        self.regex = re.compile(translate(self.pattern))

-    @normalized
-    def match(self, path):
-        matches = self.regex.match(path+os.path.sep) is not None
-        if matches:
-            self.match_count += 1
-        return matches
-
-    def __repr__(self):
-        return '%s(%s)' % (type(self), self.pattern)
-
-    def __str__(self):
-        return self.pattern_orig
+    def _match(self, path):
+        return (self.regex.match(path + os.path.sep) is not None)


 def timestamp(s):