From 2c7ab8595dc69b092d6922ce5a54c8adccdaab53 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Fri, 15 Jan 2016 16:52:55 +0100 Subject: [PATCH 1/2] Refactor Unicode pattern tests The unit tests for Unicode in path patterns contained a lot of unnecessary duplication. One set of duplication was for Mac OS X (also known as Darwin) as it normalizes Unicode in paths to NFD. Then each test case was repeated for every type of pattern. With this change the tests become parametrized using py.test. The duplicated code has been removed. --- borg/testsuite/helpers.py | 95 +++++++++------------------------------ 1 file changed, 20 insertions(+), 75 deletions(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 24ea572e4..b919cda36 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -233,88 +233,33 @@ def test_regex_pattern(): assert not ExcludeRegex(r"^\\$").match("/") -@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test') -class PatternNonAsciiTestCase(BaseTestCase): - def testComposedUnicode(self): - pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' - i = IncludePattern(pattern) - e = ExcludePattern(pattern) - er = ExcludeRegex("^{}/foo$".format(pattern)) - - assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert not er.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testDecomposedUnicode(self): - pattern = 'ba\N{COMBINING ACUTE ACCENT}' - i = IncludePattern(pattern) - e = ExcludePattern(pattern) - er = ExcludeRegex("^{}/foo$".format(pattern)) - - assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert not er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert er.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testInvalidUnicode(self): - pattern = str(b'ba\x80', 'latin1') - i = IncludePattern(pattern) - e = ExcludePattern(pattern) - er = ExcludeRegex("^{}/foo$".format(pattern)) - - assert not i.match("ba/foo") - assert i.match(str(b"ba\x80/foo", 'latin1')) - assert not e.match("ba/foo") - assert e.match(str(b"ba\x80/foo", 'latin1')) - assert not er.match("ba/foo") - assert er.match(str(b"ba\x80/foo", 'latin1')) +def use_normalized_unicode(): + return sys.platform in ("darwin",) -@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') -class OSXPatternNormalizationTestCase(BaseTestCase): - def testComposedUnicode(self): - pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' - i = IncludePattern(pattern) - e = ExcludePattern(pattern) - er = ExcludeRegex("^{}/foo$".format(pattern)) +def _make_test_patterns(pattern): + return [IncludePattern(pattern), + ExcludePattern(pattern), + ExcludeRegex("^{}/foo$".format(pattern)), + ] - assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert er.match("ba\N{COMBINING ACUTE ACCENT}/foo") - def testDecomposedUnicode(self): - pattern = 'ba\N{COMBINING ACUTE ACCENT}' - i = IncludePattern(pattern) - e = ExcludePattern(pattern) - er = ExcludeRegex("^{}/foo$".format(pattern)) +@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}")) +def test_composed_unicode_pattern(pattern): + assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode() - assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert er.match("ba\N{COMBINING ACUTE ACCENT}/foo") - def testInvalidUnicode(self): - pattern = str(b'ba\x80', 'latin1') - i = IncludePattern(pattern) - e = ExcludePattern(pattern) - er = ExcludeRegex("^{}/foo$".format(pattern)) +@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}")) +def test_decomposed_unicode_pattern(pattern): + assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode() + assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") - assert not i.match("ba/foo") - assert i.match(str(b"ba\x80/foo", 'latin1')) - assert not e.match("ba/foo") - assert e.match(str(b"ba\x80/foo", 'latin1')) - assert not er.match("ba/foo") - assert er.match(str(b"ba\x80/foo", 'latin1')) + +@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1"))) +def test_invalid_unicode_pattern(pattern): + assert not pattern.match("ba/foo") + assert pattern.match(str(b"ba\x80/foo", "latin1")) @pytest.mark.parametrize("lines, expected", [ From 3a39ddbd83a77b6ed4b3c80b3deb546f2f8efb67 Mon Sep 17 00:00:00 2001 From: Michael Hanselmann Date: Fri, 15 Jan 2016 15:41:02 +0100 Subject: [PATCH 2/2] Rename pattern classes for consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The class names “IncludePattern” and “ExcludePattern” may have been appropriate when they were the only styles. With the recent addition of regular expression support and with at least one more style being added in forthcoming changes these classes should be renamed to be more descriptive. “ExcludeRegex” is also renamed to match the new names. --- borg/archiver.py | 4 ++-- borg/helpers.py | 16 +++++++-------- borg/testsuite/helpers.py | 42 +++++++++++++++++++-------------------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 7ba11a039..588fe3954 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,7 +17,7 @@ import traceback from . import __version__ from .helpers import Error, location_validator, format_time, format_file_size, \ - format_file_mode, parse_pattern, IncludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ + format_file_mode, parse_pattern, PathPrefixPattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, prune_within, prune_split, unhexlify, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ @@ -314,7 +314,7 @@ class Archiver: while dirs: archive.extract_item(dirs.pop(-1)) for pattern in (patterns or []): - if isinstance(pattern, IncludePattern) and pattern.match_count == 0: + if isinstance(pattern, PathPrefixPattern) and pattern.match_count == 0: self.print_warning("Include pattern '%s' never matched.", pattern) return self.exit_code diff --git a/borg/helpers.py b/borg/helpers.py index 4647d78d7..ce344e3f5 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -259,7 +259,7 @@ def update_excludes(args): def adjust_patterns(paths, excludes): if paths: - return (excludes or []) + [IncludePattern(path) for path in paths] + [ExcludePattern('*')] + return (excludes or []) + [PathPrefixPattern(path) for path in paths] + [FnmatchPattern('*')] else: return excludes @@ -270,7 +270,7 @@ def exclude_path(path, patterns): """ for pattern in (patterns or []): if pattern.match(path): - return isinstance(pattern, (ExcludePattern, ExcludeRegex)) + return isinstance(pattern, (FnmatchPattern, RegexPattern)) return False @@ -326,14 +326,14 @@ class PatternBase: raise NotImplementedError -# For both IncludePattern and ExcludePattern, we require that +# For both PathPrefixPattern and FnmatchPattern, we require that # the pattern either match the whole path or an initial segment # of the path up to but not including a path separator. To # unify the two cases, we add a path separator to the end of # the path before matching. -class IncludePattern(PatternBase): +class PathPrefixPattern(PatternBase): """Literal files or directories listed on the command line for some operations (e.g. extract, but not create). If a directory is specified, all paths that start with that @@ -346,7 +346,7 @@ class IncludePattern(PatternBase): return (path + os.path.sep).startswith(self.pattern) -class ExcludePattern(PatternBase): +class FnmatchPattern(PatternBase): """Shell glob patterns to exclude. A trailing slash means to exclude the contents of a directory, but not the directory itself. """ @@ -366,7 +366,7 @@ class ExcludePattern(PatternBase): return (self.regex.match(path + os.path.sep) is not None) -class ExcludeRegex(PatternBase): +class RegexPattern(PatternBase): """Regular expression to exclude. """ def _prepare(self, pattern): @@ -383,8 +383,8 @@ class ExcludeRegex(PatternBase): _DEFAULT_PATTERN_STYLE = "fm" _PATTERN_STYLES = { - "fm": ExcludePattern, - "re": ExcludeRegex, + "fm": FnmatchPattern, + "re": RegexPattern, } diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index b919cda36..3da955d7f 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -9,8 +9,8 @@ import sys import msgpack import msgpack.fallback -from ..helpers import adjust_patterns, exclude_path, Location, format_file_size, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \ - prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, ExcludeRegex, \ +from ..helpers import adjust_patterns, exclude_path, Location, format_file_size, format_timedelta, PathPrefixPattern, FnmatchPattern, make_path_safe, \ + prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, RegexPattern, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern from . import BaseTestCase, environment_variable, FakeInputs @@ -193,7 +193,7 @@ def test_patterns(paths, excludes, expected): '/var/log/messages', '/var/log/dmesg', ] - check_patterns(files, paths, [ExcludePattern(p) for p in excludes], expected) + check_patterns(files, paths, [FnmatchPattern(p) for p in excludes], expected) @pytest.mark.parametrize("paths, excludes, expected", [ @@ -218,7 +218,7 @@ def test_patterns_regex(paths, excludes, expected): patterns = [] for i in excludes: - pat = ExcludeRegex(i) + pat = RegexPattern(i) assert str(pat) == i assert pat.pattern == i patterns.append(pat) @@ -228,9 +228,9 @@ def test_patterns_regex(paths, excludes, expected): def test_regex_pattern(): # The forward slash must match the platform-specific path separator - assert ExcludeRegex("^/$").match("/") - assert ExcludeRegex("^/$").match(os.path.sep) - assert not ExcludeRegex(r"^\\$").match("/") + assert RegexPattern("^/$").match("/") + assert RegexPattern("^/$").match(os.path.sep) + assert not RegexPattern(r"^\\$").match("/") def use_normalized_unicode(): @@ -238,9 +238,9 @@ def use_normalized_unicode(): def _make_test_patterns(pattern): - return [IncludePattern(pattern), - ExcludePattern(pattern), - ExcludeRegex("^{}/foo$".format(pattern)), + return [PathPrefixPattern(pattern), + FnmatchPattern(pattern), + RegexPattern("^{}/foo$".format(pattern)), ] @@ -311,23 +311,23 @@ def test_patterns_from_file(tmpdir, lines, expected): @pytest.mark.parametrize("pattern, cls", [ - ("", ExcludePattern), + ("", FnmatchPattern), # Default style - ("*", ExcludePattern), - ("/data/*", ExcludePattern), + ("*", FnmatchPattern), + ("/data/*", FnmatchPattern), # fnmatch style - ("fm:", ExcludePattern), - ("fm:*", ExcludePattern), - ("fm:/data/*", ExcludePattern), - ("fm:fm:/data/*", ExcludePattern), + ("fm:", FnmatchPattern), + ("fm:*", FnmatchPattern), + ("fm:/data/*", FnmatchPattern), + ("fm:fm:/data/*", FnmatchPattern), # Regular expression - ("re:", ExcludeRegex), - ("re:.*", ExcludeRegex), - ("re:^/something/", ExcludeRegex), - ("re:re:^/something/", ExcludeRegex), + ("re:", RegexPattern), + ("re:.*", RegexPattern), + ("re:^/something/", RegexPattern), + ("re:re:^/something/", RegexPattern), ]) def test_parse_pattern(pattern, cls): assert isinstance(parse_pattern(pattern), cls)