Reduce code duplication in inclusion/exclusion pattern logic

The two classes for applying inclusion and exclusion patterns contained
unnecessarily duplicated logic. The introduction of a shared base class
allows for easier reuse, especially considering that two more classes
are going to be added in forthcoming changes (regular expressions and
shell-style patterns).
This commit is contained in:
Michael Hanselmann 2016-01-13 14:26:50 +01:00
parent 5d40eba175
commit 93c9c49250
1 changed files with 39 additions and 34 deletions

View File

@ -270,12 +270,6 @@ def exclude_path(path, patterns):
return False
# For both IncludePattern and ExcludePattern, we require that
# the pattern either match the whole path or an initial segment
# of the path up to but not including a path separator. To
# unify the two cases, we add a path separator to the end of
# the path before matching.
def normalized(func):
""" Decorator for the Pattern match methods, returning a wrapper that
normalizes OSX paths to match the normalized pattern on OSX, and
@ -294,11 +288,8 @@ def normalized(func):
return func
class IncludePattern:
"""Literal files or directories listed on the command line
for some operations (e.g. extract, but not create).
If a directory is specified, all paths that start with that
path match as well. A trailing slash makes no difference.
class PatternBase:
"""Shared logic for inclusion/exclusion patterns.
"""
def __init__(self, pattern):
self.pattern_orig = pattern
@ -307,13 +298,15 @@ class IncludePattern:
if sys.platform in ('darwin',):
pattern = unicodedata.normalize("NFD", pattern)
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep
self._prepare(pattern)
@normalized
def match(self, path):
matches = (path+os.path.sep).startswith(self.pattern)
matches = self._match(path)
if matches:
self.match_count += 1
return matches
def __repr__(self):
@ -322,39 +315,51 @@ class IncludePattern:
def __str__(self):
return self.pattern_orig
def _prepare(self, pattern):
raise NotImplementedError
class ExcludePattern(IncludePattern):
def _match(self, path):
raise NotImplementedError
# For both IncludePattern and ExcludePattern, we require that
# the pattern either match the whole path or an initial segment
# of the path up to but not including a path separator. To
# unify the two cases, we add a path separator to the end of
# the path before matching.
class IncludePattern(PatternBase):
"""Literal files or directories listed on the command line
for some operations (e.g. extract, but not create).
If a directory is specified, all paths that start with that
path match as well. A trailing slash makes no difference.
"""
def _prepare(self, pattern):
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
def _match(self, path):
return (path + os.path.sep).startswith(self.pattern)
class ExcludePattern(PatternBase):
"""Shell glob patterns to exclude. A trailing slash means to
exclude the contents of a directory, but not the directory itself.
"""
def __init__(self, pattern):
self.pattern_orig = pattern
self.match_count = 0
def _prepare(self, pattern):
if pattern.endswith(os.path.sep):
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
else:
self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
pattern = os.path.normpath(pattern) + os.path.sep+'*'
if sys.platform in ('darwin',):
self.pattern = unicodedata.normalize("NFD", self.pattern)
self.pattern = pattern
# fnmatch and re.match both cache compiled regular expressions.
# Nevertheless, this is about 10 times faster.
self.regex = re.compile(translate(self.pattern))
@normalized
def match(self, path):
matches = self.regex.match(path+os.path.sep) is not None
if matches:
self.match_count += 1
return matches
def __repr__(self):
return '%s(%s)' % (type(self), self.pattern)
def __str__(self):
return self.pattern_orig
def _match(self, path):
return (self.regex.match(path + os.path.sep) is not None)
def timestamp(s):