Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-72904: Simplify implementation of fnmatch.translate() #109879

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 41 additions & 69 deletions Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,84 +71,56 @@ def fnmatchcase(name, pat):
return match(name) is not None


_TRANSLATE_RE = re.compile(
r'(?P<star>\*+)|'
r'(?P<question_mark>\?)|'
r'(?P<set>\[(?P<negated>!?+)(?P<set_inner>\]?+[^\]]*)\])|'
r'(?P<literal>.)',
flags=re.DOTALL)


def _translate_iter(pat, star, question_mark):
for match in _TRANSLATE_RE.finditer(pat):
if match.group('star'):
yield star
elif match.group('question_mark'):
yield question_mark
elif match.group('set'):
inner = ''.join(_translate_set_iter(match.group('set_inner')))
if match.group('negated'):
yield f'[^{inner}]' if inner else question_mark
else:
yield f'[{inner}]' if inner else '(?!)'
else:
yield re.escape(match.group())


_TRANSLATE_SET_RE = re.compile(
r'(?P<range>(?P<start>.)-(?P<end>.))|'
r'(?P<literal>.)',
flags=re.DOTALL)


def _translate_set_iter(token):
for match in _TRANSLATE_SET_RE.finditer(token):
if match.group('range'):
start, end = match.group('start'), match.group('end')
if start <= end:
yield f'{re.escape(start)}-{re.escape(end)}'
else:
yield re.escape(match.group())


def translate(pat):
"""Translate a shell PATTERN to a regular expression.

There is no way to quote meta-characters.
"""

STAR = object()
res = []
add = res.append
i, n = 0, len(pat)
while i < n:
c = pat[i]
i = i+1
if c == '*':
# compress consecutive `*` into one
if (not res) or res[-1] is not STAR:
add(STAR)
elif c == '?':
add('.')
elif c == '[':
j = i
if j < n and pat[j] == '!':
j = j+1
if j < n and pat[j] == ']':
j = j+1
while j < n and pat[j] != ']':
j = j+1
if j >= n:
add('\\[')
else:
stuff = pat[i:j]
if '-' not in stuff:
stuff = stuff.replace('\\', r'\\')
else:
chunks = []
k = i+2 if pat[i] == '!' else i+1
while True:
k = pat.find('-', k, j)
if k < 0:
break
chunks.append(pat[i:k])
i = k+1
k = k+3
chunk = pat[i:j]
if chunk:
chunks.append(chunk)
else:
chunks[-1] += '-'
# Remove empty ranges -- invalid in RE.
for k in range(len(chunks)-1, 0, -1):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
del chunks[k]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
for s in chunks)
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
if not stuff:
# Empty range: never match.
add('(?!)')
elif stuff == '!':
# Negated empty range: match any character.
add('.')
else:
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
else:
add(re.escape(c))
assert i == n
inp = list(_translate_iter(pat, STAR, '.'))

# Deal with STARs.
inp = res
res = []
add = res.append
i, n = 0, len(inp)
Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def test_translate(self):
self.assertEqual(translate('?'), r'(?s:.)\Z')
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z')
self.assertEqual(translate('[]]'), r'(?s:[]])\Z')
self.assertEqual(translate('[]]'), r'(?s:[\]])\Z')
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
Expand All @@ -235,7 +235,7 @@ def test_translate(self):
self.assertEqual(translate('*********'), r'(?s:.*)\Z')
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[\?].)\Z')
# fancy translation to prevent exponential-time match failure
t = translate('**a*a****a')
self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z')
Expand Down