diff --git a/src/borg/helpers/shellpattern.py b/src/borg/helpers/shellpattern.py index af7560ff..7dc3ee09 100644 --- a/src/borg/helpers/shellpattern.py +++ b/src/borg/helpers/shellpattern.py @@ -1,5 +1,6 @@ import os import re +from queue import LifoQueue def translate(pat, match_end=r"\Z"): @@ -17,10 +18,9 @@ def translate(pat, match_end=r"\Z"): :copyright: 2001-2016 Python Software Foundation. All rights reserved. :license: PSFLv2 - - TODO: support {alt1,alt2} shell-style alternatives - """ + pat = _translate_alternatives(pat) + sep = os.path.sep n = len(pat) i = 0 @@ -60,7 +60,82 @@ def translate(pat, match_end=r"\Z"): elif stuff[0] == "^": stuff = "\\" + stuff res += "[%s]" % stuff + elif c in "(|)": + if i > 0 and pat[i - 1] != "\\": + res += c else: res += re.escape(c) return "(?ms)" + res + match_end + + +def _parse_braces(pat): + """Returns the index values of paired braces in `pat` as a list of tuples. + + The dict's keys are the indexes corresponding to opening braces. Initially, + they are set to a value of `None`. Once a corresponding closing brace is found, + the value is updated. All dict keys with a positive int value are valid pairs. + + Cannot rely on re.match("[^\\(\\\\)*]?{.*[^\\(\\\\)*]}") because, while it + does handle unpaired braces and nested pairs of braces, it misses sequences + of paired braces. E.g.: "{foo,bar}{bar,baz}" would translate, incorrectly, to + "(foo|bar\\}\\{bar|baz)" instead of, correctly, to "(foo|bar)(bar|baz)" + + So this function parses in a left-to-right fashion, tracking pairs with a LIFO + queue: pushing opening braces on and popping them off when finding a closing + brace. + """ + curly_q = LifoQueue() + pairs: dict[int, int] = dict() + + for idx, c in enumerate(pat): + if c == "{": + if idx == 0 or pat[idx - 1] != "\\": + # Opening brace is not escaped. + # Add to dict + pairs[idx] = None + # Add to queue + curly_q.put(idx) + if c == "}" and curly_q.qsize(): + # If queue is empty, then cannot close pair. + if idx > 0 and pat[idx - 1] != "\\": + # Closing brace is not escaped. + # Pop off the index of the corresponding opening brace, which + # provides the key in the dict of pairs, and set its value. + pairs[curly_q.get()] = idx + return [(opening, closing) for opening, closing in pairs.items() if closing is not None] + + +def _translate_alternatives(pat): + """Translates the shell-style alternative portions of the pattern to regular expression groups. + + For example: {alt1,alt2} -> (alt1|alt2) + """ + # Parse pattern for paired braces. + brace_pairs = _parse_braces(pat) + + pat_list = list(pat) # Convert to list in order to subscript characters. + + # Convert non-escaped commas within groups to pipes. + # Passing, e.g. "{a\,b}.txt" to the shell expands to "{a,b}.txt", whereas + # "{a\,,b}.txt" expands to "a,.txt" and "b.txt" + for opening, closing in brace_pairs: + commas = 0 + + for i in range(opening + 1, closing): # Convert non-escaped commas to pipes. + if pat_list[i] == ",": + if i == opening or pat_list[i - 1] != "\\": + pat_list[i] = "|" + commas += 1 + elif pat_list[i] == "|" and (i == opening or pat_list[i - 1] != "\\"): + # Nested groups have their commas converted to pipes when traversing the parent group. + # So in order to confirm the presence of a comma in the original, shell-style pattern, + # we must also check for a pipe. + commas += 1 + + # Convert paired braces into parentheses, but only if at least one comma is present. + if commas > 0: + pat_list[opening] = "(" + pat_list[closing] = ")" + + return "".join(pat_list) diff --git a/src/borg/testsuite/shellpattern.py b/src/borg/testsuite/shellpattern.py index 5da7426e..e8b1acd1 100644 --- a/src/borg/testsuite/shellpattern.py +++ b/src/borg/testsuite/shellpattern.py @@ -66,6 +66,16 @@ def check(path, pattern): ("foo3", ["foo[!12]"]), ("foo^", ["foo[^!]"]), ("foo!", ["foo[^!]"]), + # Group + ("foo1", ["{foo1,foo2}"]), + ("foo2", ["foo{1,2}"]), + ("foo", ["foo{,1,2}"]), + ("foo1", ["{foo{1,2},bar}"]), + ("bar", ["{foo{1,2},bar}"]), + ("{foo", ["{foo{,bar}"]), + ("{foobar", ["{foo{,bar}"]), + ("{foo},bar}", ["{foo},bar}"]), + ("bar/foobar", ["**/foo{ba[!z]*,[0-9]}"]), ], ) def test_match(path, patterns): @@ -99,6 +109,11 @@ def test_match(path, patterns): # Inverted set ("foo1", ["foo[!12]"]), ("foo2", ["foo[!12]"]), + # Group + ("foo", ["{foo1,foo2}"]), + ("foo", ["foo{1,2}"]), + ("foo{1,2}", ["foo{1,2}"]), + ("bar/foobaz", ["**/foo{ba[!z]*,[0-9]}"]), ], ) def test_mismatch(path, patterns):