Move pattern normalization decision into decorator

Using a decorator moves the duplicate code in the init methods into a single decorator method, while still retaining the same runtime overhead (zero for for the non-OSX path, one extra function call plus the call to unicodedata.normalize for OSX). The pattern classes are much visually cleaner, and duplicate code limited to two lines normalizing the pattern on OSX. Because the decoration happens at class init time (vs instance init time for the previous approach), the OSX and non-OSX test cases can no longer be called in the same run, so I also removed the OSX test case monkey patching and uncommented the platform skipif decorator.
2015-09-09 15:00:58 -04:00 · 2015-09-09 15:00:58 -04:00 · 13ddfdf4a3
parent cc13f3db97
commit 13ddfdf4a3
2 changed files with 26 additions and 41 deletions
--- a/borg/helpers.py
+++ b/borg/helpers.py
@ -1,6 +1,7 @@
 import argparse
 import binascii
 from collections import namedtuple
+from functools import wraps
 import grp
 import os
 import pwd
@ -222,9 +223,22 @@ def exclude_path(path, patterns):
 # unify the two cases, we add a path separator to the end of
 # the path before matching.

-##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-##### For discussion only, don't merge this code!
-##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+def normalized(func):
+    """ Decorator for the Pattern match methods, returning a wrapper that
+    normalizes OSX paths to match the normalized pattern on OSX, and 
+    returning the original method on other platforms"""
+    @wraps(func)
+    def normalize_wrapper(self, path):
+        return func(self, unicodedata.normalize("NFD", path))
+
+    if sys.platform in ('darwin',):
+        # HFS+ converts paths to a canonical form, so users shouldn't be
+        # required to enter an exact match
+        return normalize_wrapper
+    else:
+        # Windows and Unix filesystems allow different forms, so users
+        # always have to enter an exact match
+        return func

 class IncludePattern:
    """Literal files or directories listed on the command line
@ -233,23 +247,15 @@ class IncludePattern:
    path match as well.  A trailing slash makes no difference.
    """
    def __init__(self, pattern):
-        def match(path):
-            return (path+os.path.sep).startswith(self.pattern)
-
-        # HFS+ converts paths to a canonical form, so users shouldn't be
-        # required to enter an exact match
        if sys.platform in ('darwin',):
-            # repository paths will be mostly in NFD, as the OSX exception list
-            # to NFD is small, so normalize to that form for best performance
            pattern = unicodedata.normalize("NFD", pattern)
-            self.match = lambda p: match(unicodedata.normalize("NFD", p))
-        # Windows and Unix filesystems allow different forms, so users
-        # always have to enter an exact match
-        else:
-            self.match = match

        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep

+    @normalized
+    def match(self, path):
+        return (path+os.path.sep).startswith(self.pattern)
+
    def __repr__(self):
        return '%s(%s)' % (type(self), self.pattern)

@ -259,30 +265,22 @@ class ExcludePattern(IncludePattern):
    exclude the contents of a directory, but not the directory itself.
    """
    def __init__(self, pattern):
-        def match(path):
-            return self.regex.match(path+os.path.sep) is not None
-
        if pattern.endswith(os.path.sep):
            self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
        else:
            self.pattern = os.path.normpath(pattern)+os.path.sep+'*'

-        # HFS+ converts paths to a canonical form, so users shouldn't be
-        # required to enter an exact match
        if sys.platform in ('darwin',):
-            # repository paths will be mostly in NFD, as the OSX exception list
-            # to NFD is small, so normalize to that form for best performance
            self.pattern = unicodedata.normalize("NFD", self.pattern)
-            self.match = lambda p: match(unicodedata.normalize("NFD", p))
-        # Windows and Unix filesystems allow different forms, so users
-        # always have to enter an exact match
-        else:
-            self.match = match

        # fnmatch and re.match both cache compiled regular expressions.
        # Nevertheless, this is about 10 times faster.
        self.regex = re.compile(translate(self.pattern))

+    @normalized
+    def match(self, path):
+        return self.regex.match(path+os.path.sep) is not None
+
    def __repr__(self):
        return '%s(%s)' % (type(self), self.pattern)

--- a/borg/testsuite/helpers.py
+++ b/borg/testsuite/helpers.py
@ -212,21 +212,8 @@ class PatternNonAsciiTestCase(BaseTestCase):
        assert e.match(str(b"ba\x80/foo", 'latin1'))


-#@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
+@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
 class OSXPatternNormalizationTestCase(BaseTestCase):
-    # monkey patch sys.platform to allow testing on non-OSX during development
-    # remove and uncomment OSX-only decorator before push
-    def setUp(self):
-        self.oldplatform = sys.platform
-        sys.platform = 'darwin'
-        pass
-
-    # monkey patch sys.platform to allow testing on non-OSX during development
-    # remove and uncomment OSX-only decorator before push
-    def tearDown(self):
-        sys.platform = self.oldplatform
-        pass
-        
    def testComposedUnicode(self):
        pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
        i = IncludePattern(pattern)