Skip to content

Commit

Permalink
find_sources: find build sources recursively (#9614)
Browse files Browse the repository at this point in the history
Fixes #8548

This is important to fix because it's a really common source of
surprising false negatives.

I also lay some groundwork for supporting passing in namespace packages
on the command line. The approach towards namespace packages that this
anticipates is that if you pass in files to mypy and you want mypy to
understand your namespace packages, you'll need to specify explicit
package roots.

We also make many fewer calls to crawl functions, since we just pass
around what we need.

Co-authored-by: hauntsaninja <>
  • Loading branch information
hauntsaninja authored Oct 23, 2020
1 parent af3c8be commit 0fb671c
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 53 deletions.
128 changes: 77 additions & 51 deletions mypy/find_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class InvalidSourceList(Exception):
"""Exception indicating a problem in the list of sources given to mypy."""


def create_source_list(files: Sequence[str], options: Options,
def create_source_list(paths: Sequence[str], options: Options,
fscache: Optional[FileSystemCache] = None,
allow_empty_dir: bool = False) -> List[BuildSource]:
"""From a list of source files/directories, makes a list of BuildSources.
Expand All @@ -26,22 +26,24 @@ def create_source_list(files: Sequence[str], options: Options,
fscache = fscache or FileSystemCache()
finder = SourceFinder(fscache)

targets = []
for f in files:
if f.endswith(PY_EXTENSIONS):
sources = []
for path in paths:
path = os.path.normpath(path)
if path.endswith(PY_EXTENSIONS):
# Can raise InvalidSourceList if a directory doesn't have a valid module name.
name, base_dir = finder.crawl_up(os.path.normpath(f))
targets.append(BuildSource(f, name, None, base_dir))
elif fscache.isdir(f):
sub_targets = finder.expand_dir(os.path.normpath(f))
if not sub_targets and not allow_empty_dir:
raise InvalidSourceList("There are no .py[i] files in directory '{}'"
.format(f))
targets.extend(sub_targets)
name, base_dir = finder.crawl_up(path)
sources.append(BuildSource(path, name, None, base_dir))
elif fscache.isdir(path):
sub_sources = finder.find_sources_in_dir(path, explicit_package_roots=None)
if not sub_sources and not allow_empty_dir:
raise InvalidSourceList(
"There are no .py[i] files in directory '{}'".format(path)
)
sources.extend(sub_sources)
else:
mod = os.path.basename(f) if options.scripts_are_modules else None
targets.append(BuildSource(f, mod, None))
return targets
mod = os.path.basename(path) if options.scripts_are_modules else None
sources.append(BuildSource(path, mod, None))
return sources


def keyfunc(name: str) -> Tuple[int, str]:
Expand All @@ -62,57 +64,82 @@ def __init__(self, fscache: FileSystemCache) -> None:
# A cache for package names, mapping from directory path to module id and base dir
self.package_cache = {} # type: Dict[str, Tuple[str, str]]

def expand_dir(self, arg: str, mod_prefix: str = '') -> List[BuildSource]:
"""Convert a directory name to a list of sources to build."""
f = self.get_init_file(arg)
if mod_prefix and not f:
return []
def find_sources_in_dir(
self, path: str, explicit_package_roots: Optional[List[str]]
) -> List[BuildSource]:
if explicit_package_roots is None:
mod_prefix, root_dir = self.crawl_up_dir(path)
else:
mod_prefix = os.path.basename(path)
root_dir = os.path.dirname(path) or "."
if mod_prefix:
mod_prefix += "."
return self.find_sources_in_dir_helper(path, mod_prefix, root_dir, explicit_package_roots)

def find_sources_in_dir_helper(
self, dir_path: str, mod_prefix: str, root_dir: str,
explicit_package_roots: Optional[List[str]]
) -> List[BuildSource]:
assert not mod_prefix or mod_prefix.endswith(".")

init_file = self.get_init_file(dir_path)
# If the current directory is an explicit package root, explore it as such.
# Alternatively, if we aren't given explicit package roots and we don't have an __init__
# file, recursively explore this directory as a new package root.
if (
(explicit_package_roots is not None and dir_path in explicit_package_roots)
or (explicit_package_roots is None and init_file is None)
):
mod_prefix = ""
root_dir = dir_path

seen = set() # type: Set[str]
sources = []
top_mod, base_dir = self.crawl_up_dir(arg)
if f and not mod_prefix:
mod_prefix = top_mod + '.'
if mod_prefix:
sources.append(BuildSource(f, mod_prefix.rstrip('.'), None, base_dir))
names = self.fscache.listdir(arg)

if init_file:
sources.append(BuildSource(init_file, mod_prefix.rstrip("."), None, root_dir))

names = self.fscache.listdir(dir_path)
names.sort(key=keyfunc)
for name in names:
# Skip certain names altogether
if (name == '__pycache__' or name == 'py.typed'
or name.startswith('.')
or name.endswith(('~', '.pyc', '.pyo'))):
if name == '__pycache__' or name.startswith('.') or name.endswith('~'):
continue
path = os.path.join(arg, name)
path = os.path.join(dir_path, name)

if self.fscache.isdir(path):
sub_sources = self.expand_dir(path, mod_prefix + name + '.')
sub_sources = self.find_sources_in_dir_helper(
path, mod_prefix + name + '.', root_dir, explicit_package_roots
)
if sub_sources:
seen.add(name)
sources.extend(sub_sources)
else:
base, suffix = os.path.splitext(name)
if base == '__init__':
stem, suffix = os.path.splitext(name)
if stem == '__init__':
continue
if base not in seen and '.' not in base and suffix in PY_EXTENSIONS:
seen.add(base)
src = BuildSource(path, mod_prefix + base, None, base_dir)
if stem not in seen and '.' not in stem and suffix in PY_EXTENSIONS:
seen.add(stem)
src = BuildSource(path, mod_prefix + stem, None, root_dir)
sources.append(src)

return sources

def crawl_up(self, arg: str) -> Tuple[str, str]:
def crawl_up(self, path: str) -> Tuple[str, str]:
"""Given a .py[i] filename, return module and base directory
We crawl up the path until we find a directory without
__init__.py[i], or until we run out of path components.
"""
dir, mod = os.path.split(arg)
mod = strip_py(mod) or mod
base, base_dir = self.crawl_up_dir(dir)
if mod == '__init__' or not mod:
mod = base
parent, filename = os.path.split(path)
module_name = strip_py(filename) or os.path.basename(filename)
module_prefix, base_dir = self.crawl_up_dir(parent)
if module_name == '__init__' or not module_name:
module = module_prefix
else:
mod = module_join(base, mod)
module = module_join(module_prefix, module_name)

return mod, base_dir
return module, base_dir

def crawl_up_dir(self, dir: str) -> Tuple[str, str]:
"""Given a directory name, return the corresponding module name and base directory
Expand All @@ -124,25 +151,24 @@ def crawl_up_dir(self, dir: str) -> Tuple[str, str]:

parent_dir, base = os.path.split(dir)
if not dir or not self.get_init_file(dir) or not base:
res = ''
module = ''
base_dir = dir or '.'
else:
# Ensure that base is a valid python module name
if base.endswith('-stubs'):
base = base[:-6] # PEP-561 stub-only directory
if not base.isidentifier():
raise InvalidSourceList('{} is not a valid Python package name'.format(base))
parent, base_dir = self.crawl_up_dir(parent_dir)
res = module_join(parent, base)
parent_module, base_dir = self.crawl_up_dir(parent_dir)
module = module_join(parent_module, base)

self.package_cache[dir] = res, base_dir
return res, base_dir
self.package_cache[dir] = module, base_dir
return module, base_dir

def get_init_file(self, dir: str) -> Optional[str]:
"""Check whether a directory contains a file named __init__.py[i].
If so, return the file's name (with dir prefixed). If not, return
None.
If so, return the file's name (with dir prefixed). If not, return None.
This prefers .pyi over .py (because of the ordering of PY_EXTENSIONS).
"""
Expand Down
22 changes: 20 additions & 2 deletions test-data/unit/cmdline.test
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,47 @@ pkg/subpkg/a.py:1: error: Name 'undef' is not defined
# cmd: mypy dir
[file dir/a.py]
undef
[file dir/subdir/a.py]
[file dir/subdir/b.py]
undef
[out]
dir/a.py:1: error: Name 'undef' is not defined
dir/subdir/b.py:1: error: Name 'undef' is not defined

[case testCmdlineNonPackageDuplicate]
# cmd: mypy dir
[file dir/a.py]
undef
[file dir/subdir/a.py]
undef
[out]
dir/a.py: error: Duplicate module named 'a' (also at 'dir/subdir/a.py')
dir/a.py: error: Are you missing an __init__.py?
== Return code: 2

[case testCmdlineNonPackageSlash]
# cmd: mypy dir/
[file dir/a.py]
undef
[file dir/subdir/a.py]
import b
[file dir/subdir/b.py]
undef
import a
[out]
dir/a.py:1: error: Name 'undef' is not defined
dir/subdir/b.py:1: error: Name 'undef' is not defined

[case testCmdlinePackageContainingSubdir]
# cmd: mypy pkg
[file pkg/__init__.py]
[file pkg/a.py]
undef
import a
[file pkg/subdir/a.py]
undef
import pkg.a
[out]
pkg/a.py:1: error: Name 'undef' is not defined
pkg/subdir/a.py:1: error: Name 'undef' is not defined

[case testCmdlineNonPackageContainingPackage]
# cmd: mypy dir
Expand Down

0 comments on commit 0fb671c

Please sign in to comment.