mirror of
https://github.com/borgbackup/borg.git
synced 2024-12-25 01:06:50 +00:00
Add shell-style pattern syntax
The fnmatch module in Python's standard library implements a pattern format for paths which is similar to shell patterns. However, “*” matches any character including path separators. This newly introduced pattern syntax with the selector “sh” no longer matches the path separator with “*”. Instead “**/” can be used to match zero or more directory levels.
This commit is contained in:
parent
382b79212b
commit
c7fb598ab9
6 changed files with 268 additions and 11 deletions
|
@ -642,11 +642,20 @@ def do_break_lock(self, args):
|
|||
matching any single character specified, including ranges, and '[!...]'
|
||||
matching any character not specified. For the purpose of these patterns,
|
||||
the path separator ('\\' for Windows and '/' on other systems) is not
|
||||
treated specially. For a path to match a pattern, it must completely
|
||||
match from start to end, or must match from the start to just before
|
||||
a path separator. Except for the root path, paths will never end in the
|
||||
path separator when matching is attempted. Thus, if a given pattern ends
|
||||
in a path separator, a '*' is appended before matching is attempted.
|
||||
treated specially. Wrap meta-characters in brackets for a literal match
|
||||
(i.e. `[?]` to match the literal character `?`). For a path to match
|
||||
a pattern, it must completely match from start to end, or must match from
|
||||
the start to just before a path separator. Except for the root path,
|
||||
paths will never end in the path separator when matching is attempted.
|
||||
Thus, if a given pattern ends in a path separator, a '*' is appended
|
||||
before matching is attempted.
|
||||
|
||||
Shell-style patterns, selector `sh:`
|
||||
|
||||
Like fnmatch patterns these are similar to shell patterns. The difference
|
||||
is that the pattern may include `**/` for matching zero or more directory
|
||||
levels, `*` for matching zero or more arbitrary characters with the
|
||||
exception of any path separator.
|
||||
|
||||
Regular expressions, selector `re:`
|
||||
|
||||
|
@ -701,6 +710,7 @@ def do_break_lock(self, args):
|
|||
*.tmp
|
||||
fm:aa:something/*
|
||||
re:^/home/[^/]\.tmp/
|
||||
sh:/home/*/.thumbnails
|
||||
EOF
|
||||
$ borg create --exclude-from exclude.txt backup /
|
||||
''')
|
||||
|
|
|
@ -30,6 +30,7 @@ def get_terminal_size(fallback=(80, 24)):
|
|||
from . import hashindex
|
||||
from . import chunker
|
||||
from . import crypto
|
||||
from . import shellpattern
|
||||
import msgpack
|
||||
import msgpack.fallback
|
||||
|
||||
|
@ -332,11 +333,9 @@ def _match(self, path):
|
|||
raise NotImplementedError
|
||||
|
||||
|
||||
# For both PathPrefixPattern and FnmatchPattern, we require that
|
||||
# the pattern either match the whole path or an initial segment
|
||||
# of the path up to but not including a path separator. To
|
||||
# unify the two cases, we add a path separator to the end of
|
||||
# the path before matching.
|
||||
# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
|
||||
# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
|
||||
# separator to the end of the path before matching.
|
||||
|
||||
|
||||
class PathPrefixPattern(PatternBase):
|
||||
|
@ -376,6 +375,27 @@ def _match(self, path):
|
|||
return (self.regex.match(path + os.path.sep) is not None)
|
||||
|
||||
|
||||
class ShellPattern(PatternBase):
|
||||
"""Shell glob patterns to exclude. A trailing slash means to
|
||||
exclude the contents of a directory, but not the directory itself.
|
||||
"""
|
||||
PREFIX = "sh"
|
||||
|
||||
def _prepare(self, pattern):
|
||||
sep = os.path.sep
|
||||
|
||||
if pattern.endswith(sep):
|
||||
pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
|
||||
else:
|
||||
pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
|
||||
|
||||
self.pattern = pattern
|
||||
self.regex = re.compile(shellpattern.translate(self.pattern))
|
||||
|
||||
def _match(self, path):
|
||||
return (self.regex.match(path + os.path.sep) is not None)
|
||||
|
||||
|
||||
class RegexPattern(PatternBase):
|
||||
"""Regular expression to exclude.
|
||||
"""
|
||||
|
@ -397,6 +417,7 @@ def _match(self, path):
|
|||
FnmatchPattern,
|
||||
PathPrefixPattern,
|
||||
RegexPattern,
|
||||
ShellPattern,
|
||||
])
|
||||
|
||||
_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES)
|
||||
|
|
62
borg/shellpattern.py
Normal file
62
borg/shellpattern.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
import re
|
||||
import os
|
||||
|
||||
|
||||
def translate(pat):
|
||||
"""Translate a shell-style pattern to a regular expression.
|
||||
|
||||
The pattern may include "**<sep>" (<sep> stands for the platform-specific path separator; "/" on POSIX systems) for
|
||||
matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception of
|
||||
any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal character
|
||||
"?").
|
||||
|
||||
This function is derived from the "fnmatch" module distributed with the Python standard library.
|
||||
|
||||
Copyright (C) 2001-2016 Python Software Foundation. All rights reserved.
|
||||
|
||||
TODO: support {alt1,alt2} shell-style alternatives
|
||||
|
||||
"""
|
||||
sep = os.path.sep
|
||||
n = len(pat)
|
||||
i = 0
|
||||
res = ""
|
||||
|
||||
while i < n:
|
||||
c = pat[i]
|
||||
i += 1
|
||||
|
||||
if c == "*":
|
||||
if i + 1 < n and pat[i] == "*" and pat[i + 1] == sep:
|
||||
# **/ == wildcard for 0+ full (relative) directory names with trailing slashes; the forward slash stands
|
||||
# for the platform-specific path separator
|
||||
res += r"(?:[^\%s]*\%s)*" % (sep, sep)
|
||||
i += 2
|
||||
else:
|
||||
# * == wildcard for name parts (does not cross path separator)
|
||||
res += r"[^\%s]*" % sep
|
||||
elif c == "?":
|
||||
# ? == any single character excluding path separator
|
||||
res += r"[^\%s]" % sep
|
||||
elif c == "[":
|
||||
j = i
|
||||
if j < n and pat[j] == "!":
|
||||
j += 1
|
||||
if j < n and pat[j] == "]":
|
||||
j += 1
|
||||
while j < n and pat[j] != "]":
|
||||
j += 1
|
||||
if j >= n:
|
||||
res += "\\["
|
||||
else:
|
||||
stuff = pat[i:j].replace("\\", "\\\\")
|
||||
i = j + 1
|
||||
if stuff[0] == "!":
|
||||
stuff = "^" + stuff[1:]
|
||||
elif stuff[0] == "^":
|
||||
stuff = "\\" + stuff
|
||||
res += "[%s]" % stuff
|
||||
else:
|
||||
res += re.escape(c)
|
||||
|
||||
return res + r"\Z(?ms)"
|
|
@ -12,7 +12,8 @@
|
|||
from ..helpers import Location, format_file_size, format_timedelta, PathPrefixPattern, FnmatchPattern, make_path_safe, \
|
||||
prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, RegexPattern, \
|
||||
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
|
||||
ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, PatternMatcher
|
||||
ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, PatternMatcher, \
|
||||
ShellPattern
|
||||
from . import BaseTestCase, environment_variable, FakeInputs
|
||||
|
||||
|
||||
|
@ -234,6 +235,45 @@ def test_patterns_fnmatch(pattern, expected):
|
|||
check_patterns(files, FnmatchPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("*", None),
|
||||
("**/*", None),
|
||||
("/**/*", None),
|
||||
("/./*", None),
|
||||
("*/*", None),
|
||||
("*///*", None),
|
||||
("/home/u", []),
|
||||
("/home/*",
|
||||
["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
|
||||
"/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
|
||||
("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
|
||||
("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("*/.pr????e", []),
|
||||
("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
|
||||
("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
|
||||
("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
|
||||
("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]),
|
||||
("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]),
|
||||
("/srv/*", ["/srv/messages", "/srv/dmesg"]),
|
||||
("/srv2/**", ["/srv2", "/srv2/blafasel"]),
|
||||
("/srv2/**/", ["/srv2/blafasel"]),
|
||||
("/home/*/.thumbnails", ["/home/foo/.thumbnails"]),
|
||||
("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]),
|
||||
])
|
||||
def test_patterns_shell(pattern, expected):
|
||||
files = [
|
||||
"/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
|
||||
"/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg",
|
||||
"/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
|
||||
]
|
||||
|
||||
check_patterns(files, ShellPattern(pattern), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("pattern, expected", [
|
||||
# "None" means all files, i.e. all match the given pattern
|
||||
("", None),
|
||||
|
@ -276,6 +316,7 @@ def _make_test_patterns(pattern):
|
|||
return [PathPrefixPattern(pattern),
|
||||
FnmatchPattern(pattern),
|
||||
RegexPattern("^{}/foo$".format(pattern)),
|
||||
ShellPattern(pattern),
|
||||
]
|
||||
|
||||
|
||||
|
@ -374,6 +415,12 @@ def evaluate(filename):
|
|||
("pp:/", PathPrefixPattern),
|
||||
("pp:/data/", PathPrefixPattern),
|
||||
("pp:pp:/data/", PathPrefixPattern),
|
||||
|
||||
# Shell-pattern style
|
||||
("sh:", ShellPattern),
|
||||
("sh:*", ShellPattern),
|
||||
("sh:/data/*", ShellPattern),
|
||||
("sh:sh:/data/*", ShellPattern),
|
||||
])
|
||||
def test_parse_pattern(pattern, cls):
|
||||
assert isinstance(parse_pattern(pattern), cls)
|
||||
|
|
113
borg/testsuite/shellpattern.py
Normal file
113
borg/testsuite/shellpattern.py
Normal file
|
@ -0,0 +1,113 @@
|
|||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from .. import shellpattern
|
||||
|
||||
|
||||
def check(path, pattern):
|
||||
compiled = re.compile(shellpattern.translate(pattern))
|
||||
|
||||
return bool(compiled.match(path))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path, patterns", [
|
||||
# Literal string
|
||||
("foo/bar", ["foo/bar"]),
|
||||
("foo\\bar", ["foo\\bar"]),
|
||||
|
||||
# Non-ASCII
|
||||
("foo/c/\u0152/e/bar", ["foo/*/\u0152/*/bar", "*/*/\u0152/*/*", "**/\u0152/*/*"]),
|
||||
("\u00e4\u00f6\u00dc", ["???", "*", "\u00e4\u00f6\u00dc", "[\u00e4][\u00f6][\u00dc]"]),
|
||||
|
||||
# Question mark
|
||||
("foo", ["fo?"]),
|
||||
("foo", ["f?o"]),
|
||||
("foo", ["f??"]),
|
||||
("foo", ["?oo"]),
|
||||
("foo", ["?o?"]),
|
||||
("foo", ["??o"]),
|
||||
("foo", ["???"]),
|
||||
|
||||
# Single asterisk
|
||||
("", ["*"]),
|
||||
("foo", ["*", "**", "***"]),
|
||||
("foo", ["foo*"]),
|
||||
("foobar", ["foo*"]),
|
||||
("foobar", ["foo*bar"]),
|
||||
("foobarbaz", ["foo*baz"]),
|
||||
("bar", ["*bar"]),
|
||||
("foobar", ["*bar"]),
|
||||
("foo/bar", ["foo/*bar"]),
|
||||
("foo/bar", ["foo/*ar"]),
|
||||
("foo/bar", ["foo/*r"]),
|
||||
("foo/bar", ["foo/*"]),
|
||||
("foo/bar", ["foo*/bar"]),
|
||||
("foo/bar", ["fo*/bar"]),
|
||||
("foo/bar", ["f*/bar"]),
|
||||
("foo/bar", ["*/bar"]),
|
||||
|
||||
# Double asterisk (matches 0..n directory layers)
|
||||
("foo/bar", ["foo/**/bar"]),
|
||||
("foo/1/bar", ["foo/**/bar"]),
|
||||
("foo/1/22/333/bar", ["foo/**/bar"]),
|
||||
("foo/", ["foo/**/"]),
|
||||
("foo/1/", ["foo/**/"]),
|
||||
("foo/1/22/333/", ["foo/**/"]),
|
||||
("bar", ["**/bar"]),
|
||||
("1/bar", ["**/bar"]),
|
||||
("1/22/333/bar", ["**/bar"]),
|
||||
("foo/bar/baz", ["foo/**/*"]),
|
||||
|
||||
# Set
|
||||
("foo1", ["foo[12]"]),
|
||||
("foo2", ["foo[12]"]),
|
||||
("foo2/bar", ["foo[12]/*"]),
|
||||
("f??f", ["f??f", "f[?][?]f"]),
|
||||
("foo]", ["foo[]]"]),
|
||||
|
||||
# Inverted set
|
||||
("foo3", ["foo[!12]"]),
|
||||
("foo^", ["foo[^!]"]),
|
||||
("foo!", ["foo[^!]"]),
|
||||
])
|
||||
def test_match(path, patterns):
|
||||
for p in patterns:
|
||||
assert check(path, p)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path, patterns", [
|
||||
("", ["?", "[]"]),
|
||||
("foo", ["foo?"]),
|
||||
("foo", ["?foo"]),
|
||||
("foo", ["f?oo"]),
|
||||
|
||||
# do not match path separator
|
||||
("foo/ar", ["foo?ar"]),
|
||||
|
||||
# do not match/cross over os.path.sep
|
||||
("foo/bar", ["*"]),
|
||||
("foo/bar", ["foo*bar"]),
|
||||
("foo/bar", ["foo*ar"]),
|
||||
("foo/bar", ["fo*bar"]),
|
||||
("foo/bar", ["fo*ar"]),
|
||||
|
||||
# Double asterisk
|
||||
("foobar", ["foo/**/bar"]),
|
||||
|
||||
# Two asterisks without slash do not match directory separator
|
||||
("foo/bar", ["**"]),
|
||||
|
||||
# Double asterisk not matching filename
|
||||
("foo/bar", ["**/"]),
|
||||
|
||||
# Set
|
||||
("foo3", ["foo[12]"]),
|
||||
|
||||
# Inverted set
|
||||
("foo1", ["foo[!12]"]),
|
||||
("foo2", ["foo[!12]"]),
|
||||
])
|
||||
def test_mismatch(path, patterns):
|
||||
for p in patterns:
|
||||
assert not check(path, p)
|
|
@ -240,6 +240,10 @@ Examples
|
|||
$ borg create /mnt/backup::my-files /home \
|
||||
--exclude 're:^/home/[^/]+/\.thumbnails/'
|
||||
|
||||
# Do the same using a shell-style pattern
|
||||
$ borg create /mnt/backup::my-files /home \
|
||||
--exclude 'sh:/home/*/.thumbnails'
|
||||
|
||||
# Backup the root filesystem into an archive named "root-YYYY-MM-DD"
|
||||
# use zlib compression (good, but slow) - default is no compression
|
||||
NAME="root-`date +%Y-%m-%d`"
|
||||
|
|
Loading…
Reference in a new issue