mirror of https://github.com/borgbackup/borg.git
Merge pull request #193 from edgewood/osxPathNormalization
Normalize paths before pattern matching on OS X
This commit is contained in:
commit
638204fd0e
|
@ -1,12 +1,15 @@
|
|||
import argparse
|
||||
import binascii
|
||||
from collections import namedtuple
|
||||
from functools import wraps
|
||||
import grp
|
||||
import os
|
||||
import pwd
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import unicodedata
|
||||
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from fnmatch import translate
|
||||
from operator import attrgetter
|
||||
|
@ -220,6 +223,23 @@ def exclude_path(path, patterns):
|
|||
# unify the two cases, we add a path separator to the end of
|
||||
# the path before matching.
|
||||
|
||||
def normalized(func):
|
||||
""" Decorator for the Pattern match methods, returning a wrapper that
|
||||
normalizes OSX paths to match the normalized pattern on OSX, and
|
||||
returning the original method on other platforms"""
|
||||
@wraps(func)
|
||||
def normalize_wrapper(self, path):
|
||||
return func(self, unicodedata.normalize("NFD", path))
|
||||
|
||||
if sys.platform in ('darwin',):
|
||||
# HFS+ converts paths to a canonical form, so users shouldn't be
|
||||
# required to enter an exact match
|
||||
return normalize_wrapper
|
||||
else:
|
||||
# Windows and Unix filesystems allow different forms, so users
|
||||
# always have to enter an exact match
|
||||
return func
|
||||
|
||||
class IncludePattern:
|
||||
"""Literal files or directories listed on the command line
|
||||
for some operations (e.g. extract, but not create).
|
||||
|
@ -227,8 +247,12 @@ class IncludePattern:
|
|||
path match as well. A trailing slash makes no difference.
|
||||
"""
|
||||
def __init__(self, pattern):
|
||||
if sys.platform in ('darwin',):
|
||||
pattern = unicodedata.normalize("NFD", pattern)
|
||||
|
||||
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep
|
||||
|
||||
@normalized
|
||||
def match(self, path):
|
||||
return (path+os.path.sep).startswith(self.pattern)
|
||||
|
||||
|
@ -245,10 +269,15 @@ class ExcludePattern(IncludePattern):
|
|||
self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
|
||||
else:
|
||||
self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
|
||||
|
||||
if sys.platform in ('darwin',):
|
||||
self.pattern = unicodedata.normalize("NFD", self.pattern)
|
||||
|
||||
# fnmatch and re.match both cache compiled regular expressions.
|
||||
# Nevertheless, this is about 10 times faster.
|
||||
self.regex = re.compile(translate(self.pattern))
|
||||
|
||||
@normalized
|
||||
def match(self, path):
|
||||
return self.regex.match(path+os.path.sep) is not None
|
||||
|
||||
|
|
|
@ -3,9 +3,10 @@ from time import mktime, strptime
|
|||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
import msgpack
|
||||
|
||||
from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
|
||||
from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \
|
||||
prune_within, prune_split, \
|
||||
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams
|
||||
from . import BaseTestCase
|
||||
|
@ -178,6 +179,72 @@ class PatternTestCase(BaseTestCase):
|
|||
['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test')
|
||||
class PatternNonAsciiTestCase(BaseTestCase):
|
||||
def testComposedUnicode(self):
|
||||
pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
|
||||
i = IncludePattern(pattern)
|
||||
e = ExcludePattern(pattern)
|
||||
|
||||
assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
|
||||
def testDecomposedUnicode(self):
|
||||
pattern = 'ba\N{COMBINING ACUTE ACCENT}'
|
||||
i = IncludePattern(pattern)
|
||||
e = ExcludePattern(pattern)
|
||||
|
||||
assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
|
||||
def testInvalidUnicode(self):
|
||||
pattern = str(b'ba\x80', 'latin1')
|
||||
i = IncludePattern(pattern)
|
||||
e = ExcludePattern(pattern)
|
||||
|
||||
assert not i.match("ba/foo")
|
||||
assert i.match(str(b"ba\x80/foo", 'latin1'))
|
||||
assert not e.match("ba/foo")
|
||||
assert e.match(str(b"ba\x80/foo", 'latin1'))
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
|
||||
class OSXPatternNormalizationTestCase(BaseTestCase):
|
||||
def testComposedUnicode(self):
|
||||
pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
|
||||
i = IncludePattern(pattern)
|
||||
e = ExcludePattern(pattern)
|
||||
|
||||
assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
|
||||
def testDecomposedUnicode(self):
|
||||
pattern = 'ba\N{COMBINING ACUTE ACCENT}'
|
||||
i = IncludePattern(pattern)
|
||||
e = ExcludePattern(pattern)
|
||||
|
||||
assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
||||
assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
||||
|
||||
def testInvalidUnicode(self):
|
||||
pattern = str(b'ba\x80', 'latin1')
|
||||
i = IncludePattern(pattern)
|
||||
e = ExcludePattern(pattern)
|
||||
|
||||
assert not i.match("ba/foo")
|
||||
assert i.match(str(b"ba\x80/foo", 'latin1'))
|
||||
assert not e.match("ba/foo")
|
||||
assert e.match(str(b"ba\x80/foo", 'latin1'))
|
||||
|
||||
|
||||
def test_compression_specs():
|
||||
with pytest.raises(ValueError):
|
||||
CompressionSpec('')
|
||||
|
|
Loading…
Reference in New Issue