mirror of
https://github.com/borgbackup/borg.git
synced 2025-01-01 12:45:34 +00:00
Merge pull request #7619 from DavidRambo/master
Add support and tests for shell-style alternatives.
This commit is contained in:
commit
021c9b656c
3 changed files with 95 additions and 4 deletions
|
@ -59,7 +59,8 @@ class HelpMixIn:
|
|||
Like fnmatch patterns these are similar to shell patterns. The difference
|
||||
is that the pattern may include ``**/`` for matching zero or more directory
|
||||
levels, ``*`` for matching zero or more arbitrary characters with the
|
||||
exception of any path separator. A leading path separator is always removed.
|
||||
exception of any path separator, ``{}`` containing comma-separated
|
||||
alternative patterns. A leading path separator is always removed.
|
||||
|
||||
`Regular expressions <https://docs.python.org/3/library/re.html>`_, selector ``re:``
|
||||
Unlike shell patterns, regular expressions are not required to match the full
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import os
|
||||
import re
|
||||
from queue import LifoQueue
|
||||
|
||||
|
||||
def translate(pat, match_end=r"\Z"):
|
||||
|
@ -17,10 +18,9 @@ def translate(pat, match_end=r"\Z"):
|
|||
|
||||
:copyright: 2001-2016 Python Software Foundation. All rights reserved.
|
||||
:license: PSFLv2
|
||||
|
||||
TODO: support {alt1,alt2} shell-style alternatives
|
||||
|
||||
"""
|
||||
pat = _translate_alternatives(pat)
|
||||
|
||||
sep = os.path.sep
|
||||
n = len(pat)
|
||||
i = 0
|
||||
|
@ -60,7 +60,82 @@ def translate(pat, match_end=r"\Z"):
|
|||
elif stuff[0] == "^":
|
||||
stuff = "\\" + stuff
|
||||
res += "[%s]" % stuff
|
||||
elif c in "(|)":
|
||||
if i > 0 and pat[i - 1] != "\\":
|
||||
res += c
|
||||
else:
|
||||
res += re.escape(c)
|
||||
|
||||
return "(?ms)" + res + match_end
|
||||
|
||||
|
||||
def _parse_braces(pat):
|
||||
"""Returns the index values of paired braces in `pat` as a list of tuples.
|
||||
|
||||
The dict's keys are the indexes corresponding to opening braces. Initially,
|
||||
they are set to a value of `None`. Once a corresponding closing brace is found,
|
||||
the value is updated. All dict keys with a positive int value are valid pairs.
|
||||
|
||||
Cannot rely on re.match("[^\\(\\\\)*]?{.*[^\\(\\\\)*]}") because, while it
|
||||
does handle unpaired braces and nested pairs of braces, it misses sequences
|
||||
of paired braces. E.g.: "{foo,bar}{bar,baz}" would translate, incorrectly, to
|
||||
"(foo|bar\\}\\{bar|baz)" instead of, correctly, to "(foo|bar)(bar|baz)"
|
||||
|
||||
So this function parses in a left-to-right fashion, tracking pairs with a LIFO
|
||||
queue: pushing opening braces on and popping them off when finding a closing
|
||||
brace.
|
||||
"""
|
||||
curly_q = LifoQueue()
|
||||
pairs: dict[int, int] = dict()
|
||||
|
||||
for idx, c in enumerate(pat):
|
||||
if c == "{":
|
||||
if idx == 0 or pat[idx - 1] != "\\":
|
||||
# Opening brace is not escaped.
|
||||
# Add to dict
|
||||
pairs[idx] = None
|
||||
# Add to queue
|
||||
curly_q.put(idx)
|
||||
if c == "}" and curly_q.qsize():
|
||||
# If queue is empty, then cannot close pair.
|
||||
if idx > 0 and pat[idx - 1] != "\\":
|
||||
# Closing brace is not escaped.
|
||||
# Pop off the index of the corresponding opening brace, which
|
||||
# provides the key in the dict of pairs, and set its value.
|
||||
pairs[curly_q.get()] = idx
|
||||
return [(opening, closing) for opening, closing in pairs.items() if closing is not None]
|
||||
|
||||
|
||||
def _translate_alternatives(pat):
|
||||
"""Translates the shell-style alternative portions of the pattern to regular expression groups.
|
||||
|
||||
For example: {alt1,alt2} -> (alt1|alt2)
|
||||
"""
|
||||
# Parse pattern for paired braces.
|
||||
brace_pairs = _parse_braces(pat)
|
||||
|
||||
pat_list = list(pat) # Convert to list in order to subscript characters.
|
||||
|
||||
# Convert non-escaped commas within groups to pipes.
|
||||
# Passing, e.g. "{a\,b}.txt" to the shell expands to "{a,b}.txt", whereas
|
||||
# "{a\,,b}.txt" expands to "a,.txt" and "b.txt"
|
||||
for opening, closing in brace_pairs:
|
||||
commas = 0
|
||||
|
||||
for i in range(opening + 1, closing): # Convert non-escaped commas to pipes.
|
||||
if pat_list[i] == ",":
|
||||
if i == opening or pat_list[i - 1] != "\\":
|
||||
pat_list[i] = "|"
|
||||
commas += 1
|
||||
elif pat_list[i] == "|" and (i == opening or pat_list[i - 1] != "\\"):
|
||||
# Nested groups have their commas converted to pipes when traversing the parent group.
|
||||
# So in order to confirm the presence of a comma in the original, shell-style pattern,
|
||||
# we must also check for a pipe.
|
||||
commas += 1
|
||||
|
||||
# Convert paired braces into parentheses, but only if at least one comma is present.
|
||||
if commas > 0:
|
||||
pat_list[opening] = "("
|
||||
pat_list[closing] = ")"
|
||||
|
||||
return "".join(pat_list)
|
||||
|
|
|
@ -66,6 +66,16 @@ def check(path, pattern):
|
|||
("foo3", ["foo[!12]"]),
|
||||
("foo^", ["foo[^!]"]),
|
||||
("foo!", ["foo[^!]"]),
|
||||
# Group
|
||||
("foo1", ["{foo1,foo2}"]),
|
||||
("foo2", ["foo{1,2}"]),
|
||||
("foo", ["foo{,1,2}"]),
|
||||
("foo1", ["{foo{1,2},bar}"]),
|
||||
("bar", ["{foo{1,2},bar}"]),
|
||||
("{foo", ["{foo{,bar}"]),
|
||||
("{foobar", ["{foo{,bar}"]),
|
||||
("{foo},bar}", ["{foo},bar}"]),
|
||||
("bar/foobar", ["**/foo{ba[!z]*,[0-9]}"]),
|
||||
],
|
||||
)
|
||||
def test_match(path, patterns):
|
||||
|
@ -99,6 +109,11 @@ def test_match(path, patterns):
|
|||
# Inverted set
|
||||
("foo1", ["foo[!12]"]),
|
||||
("foo2", ["foo[!12]"]),
|
||||
# Group
|
||||
("foo", ["{foo1,foo2}"]),
|
||||
("foo", ["foo{1,2}"]),
|
||||
("foo{1,2}", ["foo{1,2}"]),
|
||||
("bar/foobaz", ["**/foo{ba[!z]*,[0-9]}"]),
|
||||
],
|
||||
)
|
||||
def test_mismatch(path, patterns):
|
||||
|
|
Loading…
Reference in a new issue