Skip to content

Commit

Permalink
Further --changed optimization (#5579)
Browse files Browse the repository at this point in the history
### Problem

A few more performance issues were discovered in `./pants --changed=.. list` due to the removal of the v1 engine:

* In order to calculate `direct` or `transitive` dependents, `ChangeCalculator` needs to compute the dependents graph. But it was doing this using `HydratedTargets` (ie, with sources globs and other fields expanded).
* After the `ChangeCalculator` was used to compute the new set of `TargetRoots`, we were converting the matched `Address` objects back into `Spec`s, triggering #4533.
* When locating candidate owning targets for some sources, `SourceMapper` was using `HydratedTargets`, which are (implicitly/unnecessarily) transitive.
* When matching source files against candidate targets, `SourceMapper` was using un-batched regex-based spec matching calls.

### Solution

* Add and switch to computing `HydratedStructs` in `ChangeCalculator`, which do not require expanding sources globs.
* Rename `HydratedTargets` to `TransitiveHydratedTargets`, and add a non-transitive rule to compute `HydratedTargets`. Use this in `SourceMapper`.
* Preserve `Address` objects in `ChangedTargetRoots`, which allows for a single deduped `TransitiveHydratedTarget` walk to warm and populate the graph.
* Add and used batched forms of the `filespec` matching methods to avoid re-parsing/compiling regexes. These matches should be ported to rust at some point. 

### Result

Transitive runs that touch more of the graph take time closer to the `./pants list ::` time, as expected.
 
Before:
```
$ time ./pants --changed-diffspec=22ca0604b1c6ce8de019214b821e922aac66b026^..22ca060 --changed-include-dependees=transitive list | wc -l

     562

real	0m15.945s
user	0m15.180s
sys	0m1.731s
```
After:
```
$ time ./pants --changed-diffspec=22ca0604b1c6ce8de019214b821e922aac66b026^..22ca060 --changed-include-dependees=transitive list | wc -l

     563

real	0m5.402s
user	0m4.580s
sys	0m1.319s
```

Larger runs (more files/targets) see an even more significant speedup: on the order of 5-10x.
  • Loading branch information
Stu Hood authored Mar 14, 2018
1 parent 68ce613 commit c981c39
Show file tree
Hide file tree
Showing 19 changed files with 246 additions and 110 deletions.
8 changes: 8 additions & 0 deletions src/python/pants/base/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ python_library(
],
)

python_library(
name = 'target_roots',
sources = ['target_roots.py'],
dependencies = [
'src/python/pants/util:objects',
],
)

python_library(
name = 'worker_pool',
sources = ['worker_pool.py'],
Expand Down
28 changes: 28 additions & 0 deletions src/python/pants/base/target_roots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# coding=utf-8
# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

from pants.util.objects import datatype


class InvalidSpecConstraint(Exception):
"""Raised when invalid constraints are given via target specs and arguments like --changed*."""


class TargetRoots(object):
"""Determines the target roots for a given pants run."""


class ChangedTargetRoots(datatype('ChangedTargetRoots', ['addresses']), TargetRoots):
"""Target roots that have been altered by `--changed` functionality.
Contains a list of `Address`es rather than `Spec`s, because all inputs have already been
resolved, and are known to exist.
"""


class LiteralTargetRoots(datatype('LiteralTargetRoots', ['specs']), TargetRoots):
"""User defined target roots, as pants.base.specs.Spec objects."""
18 changes: 13 additions & 5 deletions src/python/pants/bin/engine_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@

from pants.base.build_environment import get_buildroot, get_scm
from pants.base.file_system_project_tree import FileSystemProjectTree
from pants.engine.build_files import create_graph_rules
from pants.base.target_roots import ChangedTargetRoots, LiteralTargetRoots
from pants.engine.build_files import BuildFileAddresses, create_graph_rules
from pants.engine.fs import create_fs_rules
from pants.engine.isolated_process import create_process_rules
from pants.engine.legacy.address_mapper import LegacyAddressMapper
from pants.engine.legacy.graph import HydratedTargets, LegacyBuildGraph, create_legacy_graph_tasks
from pants.engine.legacy.graph import (LegacyBuildGraph, TransitiveHydratedTargets,
create_legacy_graph_tasks)
from pants.engine.legacy.parser import LegacyPythonCallbacksParser
from pants.engine.legacy.structs import (GoTargetAdaptor, JavaLibraryAdaptor, JunitTestsAdaptor,
JvmAppAdaptor, PythonLibraryAdaptor, PythonTargetAdaptor,
Expand Down Expand Up @@ -74,12 +76,18 @@ class LegacyGraphHelper(namedtuple('LegacyGraphHelper', ['scheduler', 'symbol_ta
"""A container for the components necessary to construct a legacy BuildGraph facade."""

def warm_product_graph(self, target_roots):
"""Warm the scheduler's `ProductGraph` with `HydratedTargets` products.
"""Warm the scheduler's `ProductGraph` with `TransitiveHydratedTargets` products.
:param TargetRoots target_roots: The targets root of the request.
"""
logger.debug('warming target_roots for: %r', target_roots)
request = self.scheduler.execution_request([HydratedTargets], target_roots.as_specs())
if type(target_roots) is ChangedTargetRoots:
subjects = [BuildFileAddresses(target_roots.addresses)]
elif type(target_roots) is LiteralTargetRoots:
subjects = target_roots.specs
else:
raise ValueError('Unexpected TargetRoots type: `{}`.'.format(target_roots))
request = self.scheduler.execution_request([TransitiveHydratedTargets], subjects)
result = self.scheduler.execute(request)
if result.error:
raise result.error
Expand All @@ -95,7 +103,7 @@ def create_build_graph(self, target_roots, build_root=None):
graph = LegacyBuildGraph.create(self.scheduler, self.symbol_table)
logger.debug('build_graph is: %s', graph)
# Ensure the entire generator is unrolled.
for _ in graph.inject_specs_closure(target_roots.as_specs()):
for _ in graph.inject_roots_closure(target_roots):
pass

address_mapper = LegacyAddressMapper(self.scheduler, build_root or get_buildroot())
Expand Down
18 changes: 9 additions & 9 deletions src/python/pants/bin/goal_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from pants.goal.run_tracker import RunTracker
from pants.help.help_printer import HelpPrinter
from pants.init.subprocess import Subprocess
from pants.init.target_roots import TargetRoots
from pants.init.target_roots_calculator import TargetRootsCalculator
from pants.java.nailgun_executor import NailgunProcessGroup
from pants.option.ranked_value import RankedValue
from pants.reporting.reporting import Reporting
Expand Down Expand Up @@ -113,7 +113,7 @@ def _init_graph(self,
include_trace_on_error=self._options.for_global_scope().print_exception_stacktrace
)

target_roots = target_roots or TargetRoots.create(
target_roots = target_roots or TargetRootsCalculator.create(
options=self._options,
build_root=self._root_dir,
change_calculator=graph_helper.change_calculator
Expand All @@ -133,21 +133,21 @@ def _determine_goals(self, requested_goals):
goals = [Goal.by_name(goal) for goal in requested_goals]
return goals

def _specs_to_targets(self, specs):
"""Populate the BuildGraph and target list from a set of input specs."""
def _roots_to_targets(self, target_roots):
"""Populate the BuildGraph and target list from a set of input TargetRoots."""
with self._run_tracker.new_workunit(name='parse', labels=[WorkUnitLabel.SETUP]):
def filter_for_tag(tag):
return lambda target: tag in map(str, target.tags)

tag_filter = wrap_filters(create_filters(self._tag, filter_for_tag))

def generate_targets(specs):
for address in self._build_graph.inject_specs_closure(specs, self._fail_fast):
def generate_targets():
for address in self._build_graph.inject_roots_closure(target_roots, self._fail_fast):
target = self._build_graph.get_target(address)
if tag_filter(target):
yield target

return list(generate_targets(specs))
return list(generate_targets())

def _should_be_quiet(self, goals):
if self._explain:
Expand All @@ -174,7 +174,7 @@ def _setup_context(self):
goals = self._determine_goals(self._requested_goals)
is_quiet = self._should_be_quiet(goals)

literal_target_roots = self._specs_to_targets(target_roots.as_specs())
target_root_instances = self._roots_to_targets(target_roots)

# Now that we've parsed the bootstrap BUILD files, and know about the SCM system.
self._run_tracker.run_info.add_scm_info()
Expand All @@ -186,7 +186,7 @@ def _setup_context(self):

context = Context(options=self._options,
run_tracker=self._run_tracker,
target_roots=literal_target_roots,
target_roots=target_root_instances,
requested_goals=self._requested_goals,
build_graph=self._build_graph,
build_file_parser=self._build_file_parser,
Expand Down
1 change: 1 addition & 0 deletions src/python/pants/build_graph/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ python_library(
'src/python/pants/base:payload',
'src/python/pants/base:payload_field',
'src/python/pants/base:specs',
'src/python/pants/base:target_roots',
'src/python/pants/base:validation',
'src/python/pants/option',
'src/python/pants/source',
Expand Down
14 changes: 14 additions & 0 deletions src/python/pants/build_graph/mutable_build_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import logging
import traceback

from pants.base.target_roots import ChangedTargetRoots, LiteralTargetRoots
from pants.build_graph.address import Address
from pants.build_graph.address_lookup_error import AddressLookupError
from pants.build_graph.build_graph import BuildGraph
Expand Down Expand Up @@ -99,6 +100,19 @@ def inject_address_closure(self, address):
raise self.TransitiveLookupError("{message}\n referenced from {spec}"
.format(message=e, spec=target_address.spec))

def inject_roots_closure(self, target_roots, fail_fast=None):
if type(target_roots) is ChangedTargetRoots:
for address in target_roots.addresses:
self.inject_address_closure(address)
yield address
elif type(target_roots) is LiteralTargetRoots:
for address in self._address_mapper.scan_specs(target_roots.specs,
fail_fast=fail_fast):
self.inject_address_closure(address)
yield address
else:
raise ValueError('Unrecognized TargetRoots type: `{}`.'.format(target_roots))

def inject_specs_closure(self, specs, fail_fast=None):
for address in self._address_mapper.scan_specs(specs,
fail_fast=fail_fast):
Expand Down
15 changes: 15 additions & 0 deletions src/python/pants/engine/build_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,12 @@ def _recursive_dirname(f):
yield ''


# TODO: This is a bit of a lie: `Struct` is effectively abstract, so this collection
# will contain subclasses of `Struct` for the symbol table types. These APIs need more
# polish before we make them public: see #4535 in particular.
HydratedStructs = Collection.of(Struct)


BuildFilesCollection = Collection.of(BuildFiles)


Expand All @@ -343,6 +349,14 @@ def create_graph_rules(address_mapper, symbol_table):
hydrate_struct
),
resolve_unhydrated_struct,
TaskRule(
HydratedStructs,
[SelectDependencies(symbol_table_constraint,
BuildFileAddresses,
field_types=(Address,),
field='addresses')],
HydratedStructs
),
# BUILD file parsing.
parse_address_family,
build_files,
Expand All @@ -355,6 +369,7 @@ def create_graph_rules(address_mapper, symbol_table):
# Root rules representing parameters that might be provided via root subjects.
RootRule(Address),
RootRule(BuildFileAddress),
RootRule(BuildFileAddresses),
RootRule(AscendantAddresses),
RootRule(DescendantAddresses),
RootRule(SiblingAddresses),
Expand Down
23 changes: 12 additions & 11 deletions src/python/pants/engine/legacy/address_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,21 @@ def scan_build_files(self, base_path):
return build_files_set

@staticmethod
def is_declaring_file(address, file_path):
if not BuildFile._is_buildfile_name(os.path.basename(file_path)):
return False

def any_is_declaring_file(address, file_paths):
try:
# A precise check for BuildFileAddress
return address.rel_path == file_path
return address.rel_path in file_paths
except AttributeError:
# NB: this will cause any BUILD file, whether it contains the address declaration or not to be
# considered the one that declared it. That's ok though, because the spec path should be enough
# information for debugging most of the time.
#
# TODO: remove this after https://github.com/pantsbuild/pants/issues/3925 lands
return os.path.dirname(file_path) == address.spec_path
pass
# NB: this will cause any BUILD file, whether it contains the address declaration or not to be
# considered the one that declared it. That's ok though, because the spec path should be enough
# information for debugging most of the time.
return any(address.spec_path == os.path.dirname(fp)
for fp in file_paths if BuildFile._is_buildfile_name(os.path.basename(fp)))

@staticmethod
def is_declaring_file(address, file_path):
return LegacyAddressMapper.any_is_declaring_file(address, [file_path])

def addresses_in_spec_path(self, spec_path):
return self.scan_specs([SiblingAddresses(spec_path)])
Expand Down
Loading

0 comments on commit c981c39

Please sign in to comment.