Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

find_sources: find build sources recursively #9614

Merged
merged 2 commits into from
Oct 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 77 additions & 51 deletions mypy/find_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class InvalidSourceList(Exception):
"""Exception indicating a problem in the list of sources given to mypy."""


def create_source_list(files: Sequence[str], options: Options,
def create_source_list(paths: Sequence[str], options: Options,
fscache: Optional[FileSystemCache] = None,
allow_empty_dir: bool = False) -> List[BuildSource]:
"""From a list of source files/directories, makes a list of BuildSources.
Expand All @@ -26,22 +26,24 @@ def create_source_list(files: Sequence[str], options: Options,
fscache = fscache or FileSystemCache()
finder = SourceFinder(fscache)

targets = []
for f in files:
if f.endswith(PY_EXTENSIONS):
sources = []
for path in paths:
path = os.path.normpath(path)
if path.endswith(PY_EXTENSIONS):
# Can raise InvalidSourceList if a directory doesn't have a valid module name.
name, base_dir = finder.crawl_up(os.path.normpath(f))
targets.append(BuildSource(f, name, None, base_dir))
elif fscache.isdir(f):
sub_targets = finder.expand_dir(os.path.normpath(f))
if not sub_targets and not allow_empty_dir:
raise InvalidSourceList("There are no .py[i] files in directory '{}'"
.format(f))
targets.extend(sub_targets)
name, base_dir = finder.crawl_up(path)
sources.append(BuildSource(path, name, None, base_dir))
elif fscache.isdir(path):
sub_sources = finder.find_sources_in_dir(path, explicit_package_roots=None)
if not sub_sources and not allow_empty_dir:
raise InvalidSourceList(
"There are no .py[i] files in directory '{}'".format(path)
)
sources.extend(sub_sources)
else:
mod = os.path.basename(f) if options.scripts_are_modules else None
targets.append(BuildSource(f, mod, None))
return targets
mod = os.path.basename(path) if options.scripts_are_modules else None
sources.append(BuildSource(path, mod, None))
return sources


def keyfunc(name: str) -> Tuple[int, str]:
Expand All @@ -62,57 +64,82 @@ def __init__(self, fscache: FileSystemCache) -> None:
# A cache for package names, mapping from directory path to module id and base dir
self.package_cache = {} # type: Dict[str, Tuple[str, str]]

def expand_dir(self, arg: str, mod_prefix: str = '') -> List[BuildSource]:
"""Convert a directory name to a list of sources to build."""
f = self.get_init_file(arg)
if mod_prefix and not f:
return []
def find_sources_in_dir(
self, path: str, explicit_package_roots: Optional[List[str]]
) -> List[BuildSource]:
if explicit_package_roots is None:
mod_prefix, root_dir = self.crawl_up_dir(path)
else:
mod_prefix = os.path.basename(path)
root_dir = os.path.dirname(path) or "."
if mod_prefix:
mod_prefix += "."
return self.find_sources_in_dir_helper(path, mod_prefix, root_dir, explicit_package_roots)

def find_sources_in_dir_helper(
self, dir_path: str, mod_prefix: str, root_dir: str,
explicit_package_roots: Optional[List[str]]
) -> List[BuildSource]:
assert not mod_prefix or mod_prefix.endswith(".")

init_file = self.get_init_file(dir_path)
# If the current directory is an explicit package root, explore it as such.
# Alternatively, if we aren't given explicit package roots and we don't have an __init__
# file, recursively explore this directory as a new package root.
if (
(explicit_package_roots is not None and dir_path in explicit_package_roots)
or (explicit_package_roots is None and init_file is None)
):
mod_prefix = ""
root_dir = dir_path

seen = set() # type: Set[str]
sources = []
top_mod, base_dir = self.crawl_up_dir(arg)
if f and not mod_prefix:
mod_prefix = top_mod + '.'
if mod_prefix:
sources.append(BuildSource(f, mod_prefix.rstrip('.'), None, base_dir))
names = self.fscache.listdir(arg)

if init_file:
sources.append(BuildSource(init_file, mod_prefix.rstrip("."), None, root_dir))

names = self.fscache.listdir(dir_path)
names.sort(key=keyfunc)
for name in names:
# Skip certain names altogether
if (name == '__pycache__' or name == 'py.typed'
or name.startswith('.')
or name.endswith(('~', '.pyc', '.pyo'))):
if name == '__pycache__' or name.startswith('.') or name.endswith('~'):
continue
path = os.path.join(arg, name)
path = os.path.join(dir_path, name)

if self.fscache.isdir(path):
sub_sources = self.expand_dir(path, mod_prefix + name + '.')
sub_sources = self.find_sources_in_dir_helper(
path, mod_prefix + name + '.', root_dir, explicit_package_roots
)
if sub_sources:
seen.add(name)
sources.extend(sub_sources)
else:
base, suffix = os.path.splitext(name)
if base == '__init__':
stem, suffix = os.path.splitext(name)
if stem == '__init__':
continue
if base not in seen and '.' not in base and suffix in PY_EXTENSIONS:
seen.add(base)
src = BuildSource(path, mod_prefix + base, None, base_dir)
if stem not in seen and '.' not in stem and suffix in PY_EXTENSIONS:
seen.add(stem)
src = BuildSource(path, mod_prefix + stem, None, root_dir)
sources.append(src)

return sources

def crawl_up(self, arg: str) -> Tuple[str, str]:
def crawl_up(self, path: str) -> Tuple[str, str]:
"""Given a .py[i] filename, return module and base directory

We crawl up the path until we find a directory without
__init__.py[i], or until we run out of path components.
"""
dir, mod = os.path.split(arg)
mod = strip_py(mod) or mod
base, base_dir = self.crawl_up_dir(dir)
if mod == '__init__' or not mod:
mod = base
parent, filename = os.path.split(path)
module_name = strip_py(filename) or os.path.basename(filename)
module_prefix, base_dir = self.crawl_up_dir(parent)
if module_name == '__init__' or not module_name:
module = module_prefix
else:
mod = module_join(base, mod)
module = module_join(module_prefix, module_name)

return mod, base_dir
return module, base_dir

def crawl_up_dir(self, dir: str) -> Tuple[str, str]:
"""Given a directory name, return the corresponding module name and base directory
Expand All @@ -124,25 +151,24 @@ def crawl_up_dir(self, dir: str) -> Tuple[str, str]:

parent_dir, base = os.path.split(dir)
if not dir or not self.get_init_file(dir) or not base:
res = ''
module = ''
base_dir = dir or '.'
else:
# Ensure that base is a valid python module name
if base.endswith('-stubs'):
base = base[:-6] # PEP-561 stub-only directory
if not base.isidentifier():
raise InvalidSourceList('{} is not a valid Python package name'.format(base))
parent, base_dir = self.crawl_up_dir(parent_dir)
res = module_join(parent, base)
parent_module, base_dir = self.crawl_up_dir(parent_dir)
module = module_join(parent_module, base)

self.package_cache[dir] = res, base_dir
return res, base_dir
self.package_cache[dir] = module, base_dir
return module, base_dir

def get_init_file(self, dir: str) -> Optional[str]:
"""Check whether a directory contains a file named __init__.py[i].

If so, return the file's name (with dir prefixed). If not, return
None.
If so, return the file's name (with dir prefixed). If not, return None.

This prefers .pyi over .py (because of the ordering of PY_EXTENSIONS).
"""
Expand Down
22 changes: 20 additions & 2 deletions test-data/unit/cmdline.test
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,47 @@ pkg/subpkg/a.py:1: error: Name 'undef' is not defined
# cmd: mypy dir
[file dir/a.py]
undef
[file dir/subdir/a.py]
[file dir/subdir/b.py]
undef
[out]
dir/a.py:1: error: Name 'undef' is not defined
dir/subdir/b.py:1: error: Name 'undef' is not defined

[case testCmdlineNonPackageDuplicate]
# cmd: mypy dir
[file dir/a.py]
undef
[file dir/subdir/a.py]
undef
[out]
dir/a.py: error: Duplicate module named 'a' (also at 'dir/subdir/a.py')
dir/a.py: error: Are you missing an __init__.py?
== Return code: 2

[case testCmdlineNonPackageSlash]
# cmd: mypy dir/
[file dir/a.py]
undef
[file dir/subdir/a.py]
import b
[file dir/subdir/b.py]
undef
import a
[out]
dir/a.py:1: error: Name 'undef' is not defined
dir/subdir/b.py:1: error: Name 'undef' is not defined

[case testCmdlinePackageContainingSubdir]
# cmd: mypy pkg
[file pkg/__init__.py]
[file pkg/a.py]
undef
import a
[file pkg/subdir/a.py]
undef
import pkg.a
[out]
pkg/a.py:1: error: Name 'undef' is not defined
pkg/subdir/a.py:1: error: Name 'undef' is not defined
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test importing from modules found via recursive directory traversal. Test cases where the directory contains __init__.py and where it doesn't exist.

Maybe test this also with --namespace-packages?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testCmdlinePackageSlash tests when the directory contains init.py and testCmdlinePackageContainingSubdir tests when it doesn't.
I added importing of the modules.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There currently isn't an interaction with --namespace-packages and cmdline tests are slow, so I won't do that unless told.
That said, I have another diff coming soon where I flesh out the package root code and make --namespace-packages the default. All these tests pass with that.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll also update the docs once all these changes (essentially important bug fixes + first three points of #8584) are shipped :-)


[case testCmdlineNonPackageContainingPackage]
# cmd: mypy dir
Expand Down