From 4efc7cd0bde6c48ed06577ca0b2767911a7e1ea6 Mon Sep 17 00:00:00 2001 From: David Rambo Date: Tue, 30 May 2023 17:59:05 -0700 Subject: [PATCH 1/2] Add support and tests for shell-style alternatives Code review fixes Remove empty line --- src/borg/helpers/shellpattern.py | 81 ++++++++++++++++++++++++++++-- src/borg/testsuite/shellpattern.py | 15 ++++++ 2 files changed, 93 insertions(+), 3 deletions(-) diff --git a/src/borg/helpers/shellpattern.py b/src/borg/helpers/shellpattern.py index af7560ff1..7dc3ee09b 100644 --- a/src/borg/helpers/shellpattern.py +++ b/src/borg/helpers/shellpattern.py @@ -1,5 +1,6 @@ import os import re +from queue import LifoQueue def translate(pat, match_end=r"\Z"): @@ -17,10 +18,9 @@ def translate(pat, match_end=r"\Z"): :copyright: 2001-2016 Python Software Foundation. All rights reserved. :license: PSFLv2 - - TODO: support {alt1,alt2} shell-style alternatives - """ + pat = _translate_alternatives(pat) + sep = os.path.sep n = len(pat) i = 0 @@ -60,7 +60,82 @@ def translate(pat, match_end=r"\Z"): elif stuff[0] == "^": stuff = "\\" + stuff res += "[%s]" % stuff + elif c in "(|)": + if i > 0 and pat[i - 1] != "\\": + res += c else: res += re.escape(c) return "(?ms)" + res + match_end + + +def _parse_braces(pat): + """Returns the index values of paired braces in `pat` as a list of tuples. + + The dict's keys are the indexes corresponding to opening braces. Initially, + they are set to a value of `None`. Once a corresponding closing brace is found, + the value is updated. All dict keys with a positive int value are valid pairs. + + Cannot rely on re.match("[^\\(\\\\)*]?{.*[^\\(\\\\)*]}") because, while it + does handle unpaired braces and nested pairs of braces, it misses sequences + of paired braces. E.g.: "{foo,bar}{bar,baz}" would translate, incorrectly, to + "(foo|bar\\}\\{bar|baz)" instead of, correctly, to "(foo|bar)(bar|baz)" + + So this function parses in a left-to-right fashion, tracking pairs with a LIFO + queue: pushing opening braces on and popping them off when finding a closing + brace. + """ + curly_q = LifoQueue() + pairs: dict[int, int] = dict() + + for idx, c in enumerate(pat): + if c == "{": + if idx == 0 or pat[idx - 1] != "\\": + # Opening brace is not escaped. + # Add to dict + pairs[idx] = None + # Add to queue + curly_q.put(idx) + if c == "}" and curly_q.qsize(): + # If queue is empty, then cannot close pair. + if idx > 0 and pat[idx - 1] != "\\": + # Closing brace is not escaped. + # Pop off the index of the corresponding opening brace, which + # provides the key in the dict of pairs, and set its value. + pairs[curly_q.get()] = idx + return [(opening, closing) for opening, closing in pairs.items() if closing is not None] + + +def _translate_alternatives(pat): + """Translates the shell-style alternative portions of the pattern to regular expression groups. + + For example: {alt1,alt2} -> (alt1|alt2) + """ + # Parse pattern for paired braces. + brace_pairs = _parse_braces(pat) + + pat_list = list(pat) # Convert to list in order to subscript characters. + + # Convert non-escaped commas within groups to pipes. + # Passing, e.g. "{a\,b}.txt" to the shell expands to "{a,b}.txt", whereas + # "{a\,,b}.txt" expands to "a,.txt" and "b.txt" + for opening, closing in brace_pairs: + commas = 0 + + for i in range(opening + 1, closing): # Convert non-escaped commas to pipes. + if pat_list[i] == ",": + if i == opening or pat_list[i - 1] != "\\": + pat_list[i] = "|" + commas += 1 + elif pat_list[i] == "|" and (i == opening or pat_list[i - 1] != "\\"): + # Nested groups have their commas converted to pipes when traversing the parent group. + # So in order to confirm the presence of a comma in the original, shell-style pattern, + # we must also check for a pipe. + commas += 1 + + # Convert paired braces into parentheses, but only if at least one comma is present. + if commas > 0: + pat_list[opening] = "(" + pat_list[closing] = ")" + + return "".join(pat_list) diff --git a/src/borg/testsuite/shellpattern.py b/src/borg/testsuite/shellpattern.py index 5da7426e9..e8b1acd1a 100644 --- a/src/borg/testsuite/shellpattern.py +++ b/src/borg/testsuite/shellpattern.py @@ -66,6 +66,16 @@ def check(path, pattern): ("foo3", ["foo[!12]"]), ("foo^", ["foo[^!]"]), ("foo!", ["foo[^!]"]), + # Group + ("foo1", ["{foo1,foo2}"]), + ("foo2", ["foo{1,2}"]), + ("foo", ["foo{,1,2}"]), + ("foo1", ["{foo{1,2},bar}"]), + ("bar", ["{foo{1,2},bar}"]), + ("{foo", ["{foo{,bar}"]), + ("{foobar", ["{foo{,bar}"]), + ("{foo},bar}", ["{foo},bar}"]), + ("bar/foobar", ["**/foo{ba[!z]*,[0-9]}"]), ], ) def test_match(path, patterns): @@ -99,6 +109,11 @@ def test_match(path, patterns): # Inverted set ("foo1", ["foo[!12]"]), ("foo2", ["foo[!12]"]), + # Group + ("foo", ["{foo1,foo2}"]), + ("foo", ["foo{1,2}"]), + ("foo{1,2}", ["foo{1,2}"]), + ("bar/foobaz", ["**/foo{ba[!z]*,[0-9]}"]), ], ) def test_mismatch(path, patterns): From ee0ca13ab51d3f3dc06bf8a6072573c11a134fd8 Mon Sep 17 00:00:00 2001 From: David Rambo Date: Tue, 30 May 2023 18:10:04 -0700 Subject: [PATCH 2/2] Update help docs to reflect support for alternative grouping in shell-style patterns --- src/borg/archiver/help_cmd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/borg/archiver/help_cmd.py b/src/borg/archiver/help_cmd.py index 8bc8d178e..440a87bbf 100644 --- a/src/borg/archiver/help_cmd.py +++ b/src/borg/archiver/help_cmd.py @@ -59,7 +59,8 @@ class HelpMixIn: Like fnmatch patterns these are similar to shell patterns. The difference is that the pattern may include ``**/`` for matching zero or more directory levels, ``*`` for matching zero or more arbitrary characters with the - exception of any path separator. A leading path separator is always removed. + exception of any path separator, ``{}`` containing comma-separated + alternative patterns. A leading path separator is always removed. `Regular expressions `_, selector ``re:`` Unlike shell patterns, regular expressions are not required to match the full