Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize fine-grained update by using Graph as the cache #4622

Merged
merged 10 commits into from
Mar 1, 2018
72 changes: 40 additions & 32 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,6 @@ def default_lib_path(data_dir: str,
CacheMeta = NamedTuple('CacheMeta',
[('id', str),
('path', str),
('memory_only', bool), # no corresponding json files (fine-grained only)
('mtime', int),
('size', int),
('hash', str),
Expand All @@ -415,7 +414,6 @@ def cache_meta_from_dict(meta: Dict[str, Any], data_json: str) -> CacheMeta:
return CacheMeta(
meta.get('id', sentinel),
meta.get('path', sentinel),
meta.get('memory_only', False),
int(meta['mtime']) if 'mtime' in meta else sentinel,
meta.get('size', sentinel),
meta.get('hash', sentinel),
Expand Down Expand Up @@ -569,7 +567,7 @@ class BuildManager:
plugin: Active mypy plugin(s)
errors: Used for reporting all errors
flush_errors: A function for processing errors after each SCC
saved_cache: Dict with saved cache state for dmypy and fine-grained incremental mode
saved_cache: Dict with saved cache state for coarse-grained dmypy
(read-write!)
stats: Dict with various instrumentation numbers
"""
Expand Down Expand Up @@ -626,6 +624,8 @@ def all_imported_modules_in_file(self,

Return list of tuples (priority, module id, import line number)
for all modules imported in file; lower numbers == higher priority.

Can generate blocking errors on bogus relative imports.
"""

def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
Expand All @@ -640,6 +640,12 @@ def correct_rel_imp(imp: Union[ImportFrom, ImportAll]) -> str:
file_id = ".".join(file_id.split(".")[:-rel])
new_id = file_id + "." + imp.id if imp.id else file_id

if not new_id:
self.errors.set_file(file.path, file.name())
self.errors.report(imp.line, 0,
"No parent module -- cannot perform relative import",
blocker=True)

return new_id

res = [] # type: List[Tuple[int, str, int]]
Expand Down Expand Up @@ -1129,12 +1135,6 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
manager.log('Metadata abandoned for {}: errors were previously ignored'.format(id))
return None

if meta.memory_only:
# Special case for fine-grained incremental mode when the JSON file is missing but
# we want to cache the module anyway.
manager.log('Memory-only metadata for {}'.format(id))
return meta

assert path is not None, "Internal error: meta was provided without a path"
# Check data_json; assume if its mtime matches it's good.
# TODO: stat() errors
Expand Down Expand Up @@ -1623,7 +1623,8 @@ def __init__(self,
self.ignore_all = True
else:
# In 'error' mode, produce special error messages.
manager.log("Skipping %s (%s)" % (path, id))
if id not in manager.missing_modules:
manager.log("Skipping %s (%s)" % (path, id))
if follow_imports == 'error':
if ancestor_for:
self.skipping_ancestor(id, path, ancestor_for)
Expand Down Expand Up @@ -1675,7 +1676,7 @@ def __init__(self,
else:
# Parse the file (and then some) to get the dependencies.
self.parse_file()
self.suppressed = []
self.compute_dependencies()
self.child_modules = set()

def skipping_ancestor(self, id: str, path: str, ancestor_for: 'State') -> None:
Expand Down Expand Up @@ -1830,6 +1831,8 @@ def fix_suppressed_dependencies(self, graph: Graph) -> None:
"""
# TODO: See if it's possible to move this check directly into parse_file in some way.
# TODO: Find a way to write a test case for this fix.
# TODO: I suspect that splitting compute_dependencies() out from parse_file
# obviates the need for this but lacking a test case for the problem this fixed...
silent_mode = (self.options.ignore_missing_imports or
self.options.follow_imports == 'skip')
if not silent_mode:
Expand Down Expand Up @@ -1896,49 +1899,48 @@ def parse_file(self) -> None:
# TODO: Why can't SemanticAnalyzerPass1 .analyze() do this?
self.tree.names = manager.semantic_analyzer.globals

self.check_blockers()

def compute_dependencies(self) -> None:
"""Compute a module's dependencies after parsing it.

This is used when we parse a file that we didn't have
up-to-date cache information for. When we have an up-to-date
cache, we just use the cached info.
"""
manager = self.manager
assert self.tree is not None

# Compute (direct) dependencies.
# Add all direct imports (this is why we needed the first pass).
# Also keep track of each dependency's source line.
dependencies = []
suppressed = []
priorities = {} # type: Dict[str, int] # id -> priority
dep_line_map = {} # type: Dict[str, int] # id -> line
for pri, id, line in manager.all_imported_modules_in_file(self.tree):
priorities[id] = min(pri, priorities.get(id, PRI_ALL))
if id == self.id:
continue
# Omit missing modules, as otherwise we could not type-check
# programs with missing modules.
if id in manager.missing_modules:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope you know what you're doing by removing this block of code. I can't assess the effect easily by just reading the code (though it's plausible that the code around line 1628 takes care of it).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The graph loading code is pretty subtle, so I wouldn't say I am 100% confident, but I am reasonably so---I discussed my reasoning some in my comment above.

if id not in dep_line_map:
suppressed.append(id)
dep_line_map[id] = line
continue
if id == '':
# Must be from a relative import.
manager.errors.set_file(self.xpath, self.id)
manager.errors.report(line, 0,
"No parent module -- cannot perform relative import",
blocker=True)
continue
if id not in dep_line_map:
dependencies.append(id)
dep_line_map[id] = line
# Every module implicitly depends on builtins.
if self.id != 'builtins' and 'builtins' not in dep_line_map:
dependencies.append('builtins')

# If self.dependencies is already set, it was read from the
# cache, but for some reason we're re-parsing the file.
# NOTE: What to do about race conditions (like editing the
# file while mypy runs)? A previous version of this code
# explicitly checked for this, but ran afoul of other reasons
# for differences (e.g. silent mode).

# Missing dependencies will be moved from dependencies to
# suppressed when they fail to be loaded in load_graph.
self.dependencies = dependencies
self.suppressed = suppressed
self.suppressed = []
self.priorities = priorities
self.dep_line_map = dep_line_map
self.check_blockers()

self.check_blockers() # Can fail due to bogus relative imports

def semantic_analysis(self) -> None:
assert self.tree is not None, "Internal error: method must be called on parsed file only"
Expand Down Expand Up @@ -2193,13 +2195,19 @@ def dump_graph(graph: Graph) -> None:
print("[" + ",\n ".join(node.dumps() for node in nodes) + "\n]")


def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph:
def load_graph(sources: List[BuildSource], manager: BuildManager,
old_graph: Optional[Graph] = None) -> Graph:
"""Given some source files, load the full dependency graph.

If an old_graph is passed in, it is used as the starting point and
modified during graph loading.

As this may need to parse files, this can raise CompileError in case
there are syntax errors.
"""
graph = {} # type: Graph

graph = old_graph or {} # type: Graph
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it matter that this may mutate the old_graph argument in place? (Except when called with an empty dict.)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that's the goal.


# The deque is used to implement breadth-first traversal.
# TODO: Consider whether to go depth-first instead. This may
# affect the order in which we process files within import cycles.
Expand Down
18 changes: 18 additions & 0 deletions mypy/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,24 @@ def __init__(self, errors: Errors, modules: Dict[str, MypyFile], options: Option
# for processing module top levels in fine-grained incremental mode.
self.recurse_into_functions = True

def reset(self) -> None:
"""Cleanup stale state that might be left over from a typechecking run.

This allows us to reuse TypeChecker objects in fine-grained
incremental mode.
"""
# TODO: verify this is still actually worth it over creating new checkers
self.partial_reported.clear()
self.module_refs.clear()
self.binder = ConditionalTypeBinder()
self.type_map.clear()

assert self.inferred_attribute_types is None
assert self.partial_types == []
assert self.deferred_nodes == []
assert len(self.scope.stack) == 1
assert self.partial_types == []

def check_first_pass(self) -> None:
"""Type check the entire file, but defer functions with unresolved references.

Expand Down
8 changes: 5 additions & 3 deletions mypy/dmypy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,10 +282,12 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
if self.options.use_fine_grained_cache:
# Pull times and hashes out of the saved_cache and stick them into
# the fswatcher, so we pick up the changes.
for meta, mypyfile, type_map in manager.saved_cache.values():
if meta.mtime is None: continue
for state in self.fine_grained_manager.graph.values():
meta = state.meta
if meta is None: continue
assert state.path is not None
self.fswatcher.set_file_data(
mypyfile.path,
state.path,
FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash))

# Run an update
Expand Down
Loading