Add support and tests for shell-style alternatives

Code review fixes

Remove empty line
This commit is contained in:
David Rambo 2023-05-30 17:59:05 -07:00
parent 87b74f3b0d
commit 4efc7cd0bd
2 changed files with 93 additions and 3 deletions

View File

@ -1,5 +1,6 @@
import os
import re
from queue import LifoQueue
def translate(pat, match_end=r"\Z"):
@ -17,10 +18,9 @@ def translate(pat, match_end=r"\Z"):
:copyright: 2001-2016 Python Software Foundation. All rights reserved.
:license: PSFLv2
TODO: support {alt1,alt2} shell-style alternatives
"""
pat = _translate_alternatives(pat)
sep = os.path.sep
n = len(pat)
i = 0
@ -60,7 +60,82 @@ def translate(pat, match_end=r"\Z"):
elif stuff[0] == "^":
stuff = "\\" + stuff
res += "[%s]" % stuff
elif c in "(|)":
if i > 0 and pat[i - 1] != "\\":
res += c
else:
res += re.escape(c)
return "(?ms)" + res + match_end
def _parse_braces(pat):
"""Returns the index values of paired braces in `pat` as a list of tuples.
The dict's keys are the indexes corresponding to opening braces. Initially,
they are set to a value of `None`. Once a corresponding closing brace is found,
the value is updated. All dict keys with a positive int value are valid pairs.
Cannot rely on re.match("[^\\(\\\\)*]?{.*[^\\(\\\\)*]}") because, while it
does handle unpaired braces and nested pairs of braces, it misses sequences
of paired braces. E.g.: "{foo,bar}{bar,baz}" would translate, incorrectly, to
"(foo|bar\\}\\{bar|baz)" instead of, correctly, to "(foo|bar)(bar|baz)"
So this function parses in a left-to-right fashion, tracking pairs with a LIFO
queue: pushing opening braces on and popping them off when finding a closing
brace.
"""
curly_q = LifoQueue()
pairs: dict[int, int] = dict()
for idx, c in enumerate(pat):
if c == "{":
if idx == 0 or pat[idx - 1] != "\\":
# Opening brace is not escaped.
# Add to dict
pairs[idx] = None
# Add to queue
curly_q.put(idx)
if c == "}" and curly_q.qsize():
# If queue is empty, then cannot close pair.
if idx > 0 and pat[idx - 1] != "\\":
# Closing brace is not escaped.
# Pop off the index of the corresponding opening brace, which
# provides the key in the dict of pairs, and set its value.
pairs[curly_q.get()] = idx
return [(opening, closing) for opening, closing in pairs.items() if closing is not None]
def _translate_alternatives(pat):
"""Translates the shell-style alternative portions of the pattern to regular expression groups.
For example: {alt1,alt2} -> (alt1|alt2)
"""
# Parse pattern for paired braces.
brace_pairs = _parse_braces(pat)
pat_list = list(pat) # Convert to list in order to subscript characters.
# Convert non-escaped commas within groups to pipes.
# Passing, e.g. "{a\,b}.txt" to the shell expands to "{a,b}.txt", whereas
# "{a\,,b}.txt" expands to "a,.txt" and "b.txt"
for opening, closing in brace_pairs:
commas = 0
for i in range(opening + 1, closing): # Convert non-escaped commas to pipes.
if pat_list[i] == ",":
if i == opening or pat_list[i - 1] != "\\":
pat_list[i] = "|"
commas += 1
elif pat_list[i] == "|" and (i == opening or pat_list[i - 1] != "\\"):
# Nested groups have their commas converted to pipes when traversing the parent group.
# So in order to confirm the presence of a comma in the original, shell-style pattern,
# we must also check for a pipe.
commas += 1
# Convert paired braces into parentheses, but only if at least one comma is present.
if commas > 0:
pat_list[opening] = "("
pat_list[closing] = ")"
return "".join(pat_list)

View File

@ -66,6 +66,16 @@ def check(path, pattern):
("foo3", ["foo[!12]"]),
("foo^", ["foo[^!]"]),
("foo!", ["foo[^!]"]),
# Group
("foo1", ["{foo1,foo2}"]),
("foo2", ["foo{1,2}"]),
("foo", ["foo{,1,2}"]),
("foo1", ["{foo{1,2},bar}"]),
("bar", ["{foo{1,2},bar}"]),
("{foo", ["{foo{,bar}"]),
("{foobar", ["{foo{,bar}"]),
("{foo},bar}", ["{foo},bar}"]),
("bar/foobar", ["**/foo{ba[!z]*,[0-9]}"]),
],
)
def test_match(path, patterns):
@ -99,6 +109,11 @@ def test_match(path, patterns):
# Inverted set
("foo1", ["foo[!12]"]),
("foo2", ["foo[!12]"]),
# Group
("foo", ["{foo1,foo2}"]),
("foo", ["foo{1,2}"]),
("foo{1,2}", ["foo{1,2}"]),
("bar/foobaz", ["**/foo{ba[!z]*,[0-9]}"]),
],
)
def test_mismatch(path, patterns):