From 7ca6efa94f85e6fef79856c6e0c288ef66879971 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 18 Oct 2020 18:06:25 -0700 Subject: [PATCH 1/2] find_sources: find build sources recursively Fixes #8548 This is important to fix because it's a really common source of surprising false negatives. I also lay some groundwork for supporting passing in namespace packages on the command line. The approach towards namespace packages that this anticipates is that if you pass in files to mypy and you want mypy to understand your namespace packages, you'll need to specify explicit package roots. We also make many fewer calls to crawl functions, since we just pass around what we need. --- mypy/find_sources.py | 128 ++++++++++++++++++++++-------------- test-data/unit/cmdline.test | 18 ++++- 2 files changed, 93 insertions(+), 53 deletions(-) diff --git a/mypy/find_sources.py b/mypy/find_sources.py index e9dd9edecec5..d20f0ac9832f 100644 --- a/mypy/find_sources.py +++ b/mypy/find_sources.py @@ -16,7 +16,7 @@ class InvalidSourceList(Exception): """Exception indicating a problem in the list of sources given to mypy.""" -def create_source_list(files: Sequence[str], options: Options, +def create_source_list(paths: Sequence[str], options: Options, fscache: Optional[FileSystemCache] = None, allow_empty_dir: bool = False) -> List[BuildSource]: """From a list of source files/directories, makes a list of BuildSources. @@ -26,22 +26,24 @@ def create_source_list(files: Sequence[str], options: Options, fscache = fscache or FileSystemCache() finder = SourceFinder(fscache) - targets = [] - for f in files: - if f.endswith(PY_EXTENSIONS): + sources = [] + for path in paths: + path = os.path.normpath(path) + if path.endswith(PY_EXTENSIONS): # Can raise InvalidSourceList if a directory doesn't have a valid module name. - name, base_dir = finder.crawl_up(os.path.normpath(f)) - targets.append(BuildSource(f, name, None, base_dir)) - elif fscache.isdir(f): - sub_targets = finder.expand_dir(os.path.normpath(f)) - if not sub_targets and not allow_empty_dir: - raise InvalidSourceList("There are no .py[i] files in directory '{}'" - .format(f)) - targets.extend(sub_targets) + name, base_dir = finder.crawl_up(path) + sources.append(BuildSource(path, name, None, base_dir)) + elif fscache.isdir(path): + sub_sources = finder.find_sources_in_dir(path, explicit_package_roots=None) + if not sub_sources and not allow_empty_dir: + raise InvalidSourceList( + "There are no .py[i] files in directory '{}'".format(path) + ) + sources.extend(sub_sources) else: - mod = os.path.basename(f) if options.scripts_are_modules else None - targets.append(BuildSource(f, mod, None)) - return targets + mod = os.path.basename(path) if options.scripts_are_modules else None + sources.append(BuildSource(path, mod, None)) + return sources def keyfunc(name: str) -> Tuple[int, str]: @@ -62,57 +64,82 @@ def __init__(self, fscache: FileSystemCache) -> None: # A cache for package names, mapping from directory path to module id and base dir self.package_cache = {} # type: Dict[str, Tuple[str, str]] - def expand_dir(self, arg: str, mod_prefix: str = '') -> List[BuildSource]: - """Convert a directory name to a list of sources to build.""" - f = self.get_init_file(arg) - if mod_prefix and not f: - return [] + def find_sources_in_dir( + self, path: str, explicit_package_roots: Optional[List[str]] + ) -> List[BuildSource]: + if explicit_package_roots is None: + mod_prefix, root_dir = self.crawl_up_dir(path) + else: + mod_prefix = os.path.basename(path) + root_dir = os.path.dirname(path) or "." + if mod_prefix: + mod_prefix += "." + return self.find_sources_in_dir_helper(path, mod_prefix, root_dir, explicit_package_roots) + + def find_sources_in_dir_helper( + self, dir_path: str, mod_prefix: str, root_dir: str, + explicit_package_roots: Optional[List[str]] + ) -> List[BuildSource]: + assert not mod_prefix or mod_prefix.endswith(".") + + init_file = self.get_init_file(dir_path) + # If the current directory is an explicit package root, explore it as such. + # Alternatively, if we aren't given explicit package roots and we don't have an __init__ + # file, recursively explore this directory as a new package root. + if ( + (explicit_package_roots is not None and dir_path in explicit_package_roots) + or (explicit_package_roots is None and init_file is None) + ): + mod_prefix = "" + root_dir = dir_path + seen = set() # type: Set[str] sources = [] - top_mod, base_dir = self.crawl_up_dir(arg) - if f and not mod_prefix: - mod_prefix = top_mod + '.' - if mod_prefix: - sources.append(BuildSource(f, mod_prefix.rstrip('.'), None, base_dir)) - names = self.fscache.listdir(arg) + + if init_file: + sources.append(BuildSource(init_file, mod_prefix.rstrip("."), None, root_dir)) + + names = self.fscache.listdir(dir_path) names.sort(key=keyfunc) for name in names: # Skip certain names altogether - if (name == '__pycache__' or name == 'py.typed' - or name.startswith('.') - or name.endswith(('~', '.pyc', '.pyo'))): + if name == '__pycache__' or name.startswith('.') or name.endswith('~'): continue - path = os.path.join(arg, name) + path = os.path.join(dir_path, name) + if self.fscache.isdir(path): - sub_sources = self.expand_dir(path, mod_prefix + name + '.') + sub_sources = self.find_sources_in_dir_helper( + path, mod_prefix + name + '.', root_dir, explicit_package_roots + ) if sub_sources: seen.add(name) sources.extend(sub_sources) else: - base, suffix = os.path.splitext(name) - if base == '__init__': + stem, suffix = os.path.splitext(name) + if stem == '__init__': continue - if base not in seen and '.' not in base and suffix in PY_EXTENSIONS: - seen.add(base) - src = BuildSource(path, mod_prefix + base, None, base_dir) + if stem not in seen and '.' not in stem and suffix in PY_EXTENSIONS: + seen.add(stem) + src = BuildSource(path, mod_prefix + stem, None, root_dir) sources.append(src) + return sources - def crawl_up(self, arg: str) -> Tuple[str, str]: + def crawl_up(self, path: str) -> Tuple[str, str]: """Given a .py[i] filename, return module and base directory We crawl up the path until we find a directory without __init__.py[i], or until we run out of path components. """ - dir, mod = os.path.split(arg) - mod = strip_py(mod) or mod - base, base_dir = self.crawl_up_dir(dir) - if mod == '__init__' or not mod: - mod = base + parent, filename = os.path.split(path) + module_name = strip_py(filename) or os.path.basename(filename) + module_prefix, base_dir = self.crawl_up_dir(parent) + if module_name == '__init__' or not module_name: + module = module_prefix else: - mod = module_join(base, mod) + module = module_join(module_prefix, module_name) - return mod, base_dir + return module, base_dir def crawl_up_dir(self, dir: str) -> Tuple[str, str]: """Given a directory name, return the corresponding module name and base directory @@ -124,7 +151,7 @@ def crawl_up_dir(self, dir: str) -> Tuple[str, str]: parent_dir, base = os.path.split(dir) if not dir or not self.get_init_file(dir) or not base: - res = '' + module = '' base_dir = dir or '.' else: # Ensure that base is a valid python module name @@ -132,17 +159,16 @@ def crawl_up_dir(self, dir: str) -> Tuple[str, str]: base = base[:-6] # PEP-561 stub-only directory if not base.isidentifier(): raise InvalidSourceList('{} is not a valid Python package name'.format(base)) - parent, base_dir = self.crawl_up_dir(parent_dir) - res = module_join(parent, base) + parent_module, base_dir = self.crawl_up_dir(parent_dir) + module = module_join(parent_module, base) - self.package_cache[dir] = res, base_dir - return res, base_dir + self.package_cache[dir] = module, base_dir + return module, base_dir def get_init_file(self, dir: str) -> Optional[str]: """Check whether a directory contains a file named __init__.py[i]. - If so, return the file's name (with dir prefixed). If not, return - None. + If so, return the file's name (with dir prefixed). If not, return None. This prefers .pyi over .py (because of the ordering of PY_EXTENSIONS). """ diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index 9d74bdc9a1be..d19d8abf7f62 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -45,19 +45,32 @@ pkg/subpkg/a.py:1: error: Name 'undef' is not defined # cmd: mypy dir [file dir/a.py] undef -[file dir/subdir/a.py] +[file dir/subdir/b.py] undef [out] dir/a.py:1: error: Name 'undef' is not defined +dir/subdir/b.py:1: error: Name 'undef' is not defined + +[case testCmdlineNonPackageDuplicate] +# cmd: mypy dir +[file dir/a.py] +undef +[file dir/subdir/a.py] +undef +[out] +dir/a.py: error: Duplicate module named 'a' (also at 'dir/subdir/a.py') +dir/a.py: error: Are you missing an __init__.py? +== Return code: 2 [case testCmdlineNonPackageSlash] # cmd: mypy dir/ [file dir/a.py] undef -[file dir/subdir/a.py] +[file dir/subdir/b.py] undef [out] dir/a.py:1: error: Name 'undef' is not defined +dir/subdir/b.py:1: error: Name 'undef' is not defined [case testCmdlinePackageContainingSubdir] # cmd: mypy pkg @@ -68,6 +81,7 @@ undef undef [out] pkg/a.py:1: error: Name 'undef' is not defined +pkg/subdir/a.py:1: error: Name 'undef' is not defined [case testCmdlineNonPackageContainingPackage] # cmd: mypy dir From 333d79e637ef41de5e9270d037e961c010a2d2ff Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Fri, 23 Oct 2020 11:57:12 -0700 Subject: [PATCH 2/2] test_cmdline: import modules found --- test-data/unit/cmdline.test | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index d19d8abf7f62..541f63d10039 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -66,8 +66,10 @@ dir/a.py: error: Are you missing an __init__.py? # cmd: mypy dir/ [file dir/a.py] undef +import b [file dir/subdir/b.py] undef +import a [out] dir/a.py:1: error: Name 'undef' is not defined dir/subdir/b.py:1: error: Name 'undef' is not defined @@ -77,8 +79,10 @@ dir/subdir/b.py:1: error: Name 'undef' is not defined [file pkg/__init__.py] [file pkg/a.py] undef +import a [file pkg/subdir/a.py] undef +import pkg.a [out] pkg/a.py:1: error: Name 'undef' is not defined pkg/subdir/a.py:1: error: Name 'undef' is not defined