From 1f58b94e325f99989e39d349fff08939c8d4b8e8 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 May 2023 03:20:26 +0100 Subject: [PATCH] GH-104114: Fix `pathlib.WindowsPath.glob()` use of literal pattern segment case We now use `_WildcardSelector` to evaluate literal pattern segments, which allows us to retrieve the real filesystem case. This change is necessary in order to implement a *case_sensitive* argument (see GH-81079) and a *follow_symlinks* argument (see GH-77609). --- Lib/pathlib.py | 51 +++++-------------- Lib/test/test_pathlib.py | 4 +- ...-05-03-03-14-33.gh-issue-104114.RG26RD.rst | 3 ++ 3 files changed, 18 insertions(+), 40 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-05-03-03-14-33.gh-issue-104114.RG26RD.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c69089f4e1bc5d..2e75f7699c7ce5 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -54,11 +54,6 @@ def _ignore_error(exception): getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) -def _is_wildcard_pattern(pat): - # Whether this pattern needs actual matching using fnmatch, or can - # be looked up directly as a file. - return "*" in pat or "?" in pat or "[" in pat - def _is_case_sensitive(flavour): return flavour.normcase('Aa') == 'Aa' @@ -78,10 +73,8 @@ def _make_selector(pattern_parts, flavour): cls = _ParentSelector elif '**' in pat: raise ValueError("Invalid pattern: '**' can only be an entire path component") - elif _is_wildcard_pattern(pat): - cls = _WildcardSelector else: - cls = _PreciseSelector + cls = _WildcardSelector return cls(pat, child_parts, flavour) @@ -102,17 +95,15 @@ def select_from(self, parent_path): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) - is_dir = path_cls.is_dir - exists = path_cls.exists scandir = path_cls._scandir - if not is_dir(parent_path): + if not parent_path.is_dir(): return iter([]) - return self._select_from(parent_path, is_dir, exists, scandir) + return self._select_from(parent_path, scandir) class _TerminatingSelector: - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): yield parent_path @@ -120,28 +111,12 @@ class _ParentSelector(_Selector): def __init__(self, name, child_parts, flavour): _Selector.__init__(self, child_parts, flavour) - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): path = parent_path._make_child_relpath('..') - for p in self.successor._select_from(path, is_dir, exists, scandir): + for p in self.successor._select_from(path, scandir): yield p -class _PreciseSelector(_Selector): - - def __init__(self, name, child_parts, flavour): - self.name = name - _Selector.__init__(self, child_parts, flavour) - - def _select_from(self, parent_path, is_dir, exists, scandir): - try: - path = parent_path._make_child_relpath(self.name) - if (is_dir if self.dironly else exists)(path): - for p in self.successor._select_from(path, is_dir, exists, scandir): - yield p - except PermissionError: - return - - class _WildcardSelector(_Selector): def __init__(self, pat, child_parts, flavour): @@ -149,7 +124,7 @@ def __init__(self, pat, child_parts, flavour): self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch _Selector.__init__(self, child_parts, flavour) - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -170,7 +145,7 @@ def _select_from(self, parent_path, is_dir, exists, scandir): name = entry.name if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, is_dir, exists, scandir): + for p in self.successor._select_from(path, scandir): yield p except PermissionError: return @@ -181,7 +156,7 @@ class _RecursiveWildcardSelector(_Selector): def __init__(self, pat, child_parts, flavour): _Selector.__init__(self, child_parts, flavour) - def _iterate_directories(self, parent_path, is_dir, scandir): + def _iterate_directories(self, parent_path, scandir): yield parent_path try: # We must close the scandir() object before proceeding to @@ -197,18 +172,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir): raise if entry_is_dir and not entry.is_symlink(): path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, is_dir, scandir): + for p in self._iterate_directories(path, scandir): yield p except PermissionError: return - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): try: yielded = set() try: successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path, is_dir, scandir): - for p in successor_select(starting_point, is_dir, exists, scandir): + for starting_point in self._iterate_directories(parent_path, scandir): + for p in successor_select(starting_point, scandir): if p not in yielded: yield p yielded.add(p) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 9902b7242205f3..0eb7da56180b08 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -3118,7 +3118,7 @@ def test_glob(self): self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") }) self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") }) self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") }) - self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"}) + self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) def test_rglob(self): @@ -3126,7 +3126,7 @@ def test_rglob(self): p = P(BASE, "dirC") self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") }) self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") }) - self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"}) + self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) def test_expanduser(self): P = self.cls diff --git a/Misc/NEWS.d/next/Library/2023-05-03-03-14-33.gh-issue-104114.RG26RD.rst b/Misc/NEWS.d/next/Library/2023-05-03-03-14-33.gh-issue-104114.RG26RD.rst new file mode 100644 index 00000000000000..e705fea8326e7a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-03-03-14-33.gh-issue-104114.RG26RD.rst @@ -0,0 +1,3 @@ +Fix issue where :meth:`pathlib.Path.glob` returns paths using the case of +non-wildcard segments for corresponding path segments, rather than the real +filesystem case.