From c981c392690f73bdc18e0d8184ee41920e6a273f Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Wed, 14 Mar 2018 10:12:54 -0700 Subject: [PATCH] Further --changed optimization (#5579) ### Problem A few more performance issues were discovered in `./pants --changed=.. list` due to the removal of the v1 engine: * In order to calculate `direct` or `transitive` dependents, `ChangeCalculator` needs to compute the dependents graph. But it was doing this using `HydratedTargets` (ie, with sources globs and other fields expanded). * After the `ChangeCalculator` was used to compute the new set of `TargetRoots`, we were converting the matched `Address` objects back into `Spec`s, triggering #4533. * When locating candidate owning targets for some sources, `SourceMapper` was using `HydratedTargets`, which are (implicitly/unnecessarily) transitive. * When matching source files against candidate targets, `SourceMapper` was using un-batched regex-based spec matching calls. ### Solution * Add and switch to computing `HydratedStructs` in `ChangeCalculator`, which do not require expanding sources globs. * Rename `HydratedTargets` to `TransitiveHydratedTargets`, and add a non-transitive rule to compute `HydratedTargets`. Use this in `SourceMapper`. * Preserve `Address` objects in `ChangedTargetRoots`, which allows for a single deduped `TransitiveHydratedTarget` walk to warm and populate the graph. * Add and used batched forms of the `filespec` matching methods to avoid re-parsing/compiling regexes. These matches should be ported to rust at some point. ### Result Transitive runs that touch more of the graph take time closer to the `./pants list ::` time, as expected. Before: ``` $ time ./pants --changed-diffspec=22ca0604b1c6ce8de019214b821e922aac66b026^..22ca0604b1c6ce8de019214b821e922aac66b026 --changed-include-dependees=transitive list | wc -l 562 real 0m15.945s user 0m15.180s sys 0m1.731s ``` After: ``` $ time ./pants --changed-diffspec=22ca0604b1c6ce8de019214b821e922aac66b026^..22ca0604b1c6ce8de019214b821e922aac66b026 --changed-include-dependees=transitive list | wc -l 563 real 0m5.402s user 0m4.580s sys 0m1.319s ``` Larger runs (more files/targets) see an even more significant speedup: on the order of 5-10x. --- src/python/pants/base/BUILD | 8 ++ src/python/pants/base/target_roots.py | 28 ++++++ src/python/pants/bin/engine_initializer.py | 18 ++-- src/python/pants/bin/goal_runner.py | 18 ++-- src/python/pants/build_graph/BUILD | 1 + .../pants/build_graph/mutable_build_graph.py | 14 +++ src/python/pants/engine/build_files.py | 15 ++++ .../pants/engine/legacy/address_mapper.py | 23 ++--- src/python/pants/engine/legacy/graph.py | 89 +++++++++++++++---- .../pants/engine/legacy/source_mapper.py | 20 +++-- src/python/pants/init/BUILD | 1 + ...et_roots.py => target_roots_calculator.py} | 25 +----- src/python/pants/pantsd/pants_daemon.py | 4 +- .../pants/pantsd/service/pailgun_service.py | 9 +- src/python/pants/scm/change_calculator.py | 39 ++++---- src/python/pants/source/filespec.py | 30 +++++-- .../pants_test/engine/legacy/test_graph.py | 6 +- tests/python/pants_test/engine/test_rules.py | 4 +- .../pantsd/service/test_pailgun_service.py | 4 +- 19 files changed, 246 insertions(+), 110 deletions(-) create mode 100644 src/python/pants/base/target_roots.py rename src/python/pants/init/{target_roots.py => target_roots_calculator.py} (79%) diff --git a/src/python/pants/base/BUILD b/src/python/pants/base/BUILD index 6ca8afa53eb..40fcc718f77 100644 --- a/src/python/pants/base/BUILD +++ b/src/python/pants/base/BUILD @@ -159,6 +159,14 @@ python_library( ], ) +python_library( + name = 'target_roots', + sources = ['target_roots.py'], + dependencies = [ + 'src/python/pants/util:objects', + ], +) + python_library( name = 'worker_pool', sources = ['worker_pool.py'], diff --git a/src/python/pants/base/target_roots.py b/src/python/pants/base/target_roots.py new file mode 100644 index 00000000000..b5983fcd87b --- /dev/null +++ b/src/python/pants/base/target_roots.py @@ -0,0 +1,28 @@ +# coding=utf-8 +# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import (absolute_import, division, generators, nested_scopes, print_function, + unicode_literals, with_statement) + +from pants.util.objects import datatype + + +class InvalidSpecConstraint(Exception): + """Raised when invalid constraints are given via target specs and arguments like --changed*.""" + + +class TargetRoots(object): + """Determines the target roots for a given pants run.""" + + +class ChangedTargetRoots(datatype('ChangedTargetRoots', ['addresses']), TargetRoots): + """Target roots that have been altered by `--changed` functionality. + + Contains a list of `Address`es rather than `Spec`s, because all inputs have already been + resolved, and are known to exist. + """ + + +class LiteralTargetRoots(datatype('LiteralTargetRoots', ['specs']), TargetRoots): + """User defined target roots, as pants.base.specs.Spec objects.""" diff --git a/src/python/pants/bin/engine_initializer.py b/src/python/pants/bin/engine_initializer.py index b2ae9427835..f304d3e3546 100644 --- a/src/python/pants/bin/engine_initializer.py +++ b/src/python/pants/bin/engine_initializer.py @@ -10,11 +10,13 @@ from pants.base.build_environment import get_buildroot, get_scm from pants.base.file_system_project_tree import FileSystemProjectTree -from pants.engine.build_files import create_graph_rules +from pants.base.target_roots import ChangedTargetRoots, LiteralTargetRoots +from pants.engine.build_files import BuildFileAddresses, create_graph_rules from pants.engine.fs import create_fs_rules from pants.engine.isolated_process import create_process_rules from pants.engine.legacy.address_mapper import LegacyAddressMapper -from pants.engine.legacy.graph import HydratedTargets, LegacyBuildGraph, create_legacy_graph_tasks +from pants.engine.legacy.graph import (LegacyBuildGraph, TransitiveHydratedTargets, + create_legacy_graph_tasks) from pants.engine.legacy.parser import LegacyPythonCallbacksParser from pants.engine.legacy.structs import (GoTargetAdaptor, JavaLibraryAdaptor, JunitTestsAdaptor, JvmAppAdaptor, PythonLibraryAdaptor, PythonTargetAdaptor, @@ -74,12 +76,18 @@ class LegacyGraphHelper(namedtuple('LegacyGraphHelper', ['scheduler', 'symbol_ta """A container for the components necessary to construct a legacy BuildGraph facade.""" def warm_product_graph(self, target_roots): - """Warm the scheduler's `ProductGraph` with `HydratedTargets` products. + """Warm the scheduler's `ProductGraph` with `TransitiveHydratedTargets` products. :param TargetRoots target_roots: The targets root of the request. """ logger.debug('warming target_roots for: %r', target_roots) - request = self.scheduler.execution_request([HydratedTargets], target_roots.as_specs()) + if type(target_roots) is ChangedTargetRoots: + subjects = [BuildFileAddresses(target_roots.addresses)] + elif type(target_roots) is LiteralTargetRoots: + subjects = target_roots.specs + else: + raise ValueError('Unexpected TargetRoots type: `{}`.'.format(target_roots)) + request = self.scheduler.execution_request([TransitiveHydratedTargets], subjects) result = self.scheduler.execute(request) if result.error: raise result.error @@ -95,7 +103,7 @@ def create_build_graph(self, target_roots, build_root=None): graph = LegacyBuildGraph.create(self.scheduler, self.symbol_table) logger.debug('build_graph is: %s', graph) # Ensure the entire generator is unrolled. - for _ in graph.inject_specs_closure(target_roots.as_specs()): + for _ in graph.inject_roots_closure(target_roots): pass address_mapper = LegacyAddressMapper(self.scheduler, build_root or get_buildroot()) diff --git a/src/python/pants/bin/goal_runner.py b/src/python/pants/bin/goal_runner.py index c640f0d837d..b24f2d5f8c6 100644 --- a/src/python/pants/bin/goal_runner.py +++ b/src/python/pants/bin/goal_runner.py @@ -21,7 +21,7 @@ from pants.goal.run_tracker import RunTracker from pants.help.help_printer import HelpPrinter from pants.init.subprocess import Subprocess -from pants.init.target_roots import TargetRoots +from pants.init.target_roots_calculator import TargetRootsCalculator from pants.java.nailgun_executor import NailgunProcessGroup from pants.option.ranked_value import RankedValue from pants.reporting.reporting import Reporting @@ -113,7 +113,7 @@ def _init_graph(self, include_trace_on_error=self._options.for_global_scope().print_exception_stacktrace ) - target_roots = target_roots or TargetRoots.create( + target_roots = target_roots or TargetRootsCalculator.create( options=self._options, build_root=self._root_dir, change_calculator=graph_helper.change_calculator @@ -133,21 +133,21 @@ def _determine_goals(self, requested_goals): goals = [Goal.by_name(goal) for goal in requested_goals] return goals - def _specs_to_targets(self, specs): - """Populate the BuildGraph and target list from a set of input specs.""" + def _roots_to_targets(self, target_roots): + """Populate the BuildGraph and target list from a set of input TargetRoots.""" with self._run_tracker.new_workunit(name='parse', labels=[WorkUnitLabel.SETUP]): def filter_for_tag(tag): return lambda target: tag in map(str, target.tags) tag_filter = wrap_filters(create_filters(self._tag, filter_for_tag)) - def generate_targets(specs): - for address in self._build_graph.inject_specs_closure(specs, self._fail_fast): + def generate_targets(): + for address in self._build_graph.inject_roots_closure(target_roots, self._fail_fast): target = self._build_graph.get_target(address) if tag_filter(target): yield target - return list(generate_targets(specs)) + return list(generate_targets()) def _should_be_quiet(self, goals): if self._explain: @@ -174,7 +174,7 @@ def _setup_context(self): goals = self._determine_goals(self._requested_goals) is_quiet = self._should_be_quiet(goals) - literal_target_roots = self._specs_to_targets(target_roots.as_specs()) + target_root_instances = self._roots_to_targets(target_roots) # Now that we've parsed the bootstrap BUILD files, and know about the SCM system. self._run_tracker.run_info.add_scm_info() @@ -186,7 +186,7 @@ def _setup_context(self): context = Context(options=self._options, run_tracker=self._run_tracker, - target_roots=literal_target_roots, + target_roots=target_root_instances, requested_goals=self._requested_goals, build_graph=self._build_graph, build_file_parser=self._build_file_parser, diff --git a/src/python/pants/build_graph/BUILD b/src/python/pants/build_graph/BUILD index afe064480e7..f4dd17ee70d 100644 --- a/src/python/pants/build_graph/BUILD +++ b/src/python/pants/build_graph/BUILD @@ -17,6 +17,7 @@ python_library( 'src/python/pants/base:payload', 'src/python/pants/base:payload_field', 'src/python/pants/base:specs', + 'src/python/pants/base:target_roots', 'src/python/pants/base:validation', 'src/python/pants/option', 'src/python/pants/source', diff --git a/src/python/pants/build_graph/mutable_build_graph.py b/src/python/pants/build_graph/mutable_build_graph.py index 97142cd5eea..2a55d3acb18 100644 --- a/src/python/pants/build_graph/mutable_build_graph.py +++ b/src/python/pants/build_graph/mutable_build_graph.py @@ -8,6 +8,7 @@ import logging import traceback +from pants.base.target_roots import ChangedTargetRoots, LiteralTargetRoots from pants.build_graph.address import Address from pants.build_graph.address_lookup_error import AddressLookupError from pants.build_graph.build_graph import BuildGraph @@ -99,6 +100,19 @@ def inject_address_closure(self, address): raise self.TransitiveLookupError("{message}\n referenced from {spec}" .format(message=e, spec=target_address.spec)) + def inject_roots_closure(self, target_roots, fail_fast=None): + if type(target_roots) is ChangedTargetRoots: + for address in target_roots.addresses: + self.inject_address_closure(address) + yield address + elif type(target_roots) is LiteralTargetRoots: + for address in self._address_mapper.scan_specs(target_roots.specs, + fail_fast=fail_fast): + self.inject_address_closure(address) + yield address + else: + raise ValueError('Unrecognized TargetRoots type: `{}`.'.format(target_roots)) + def inject_specs_closure(self, specs, fail_fast=None): for address in self._address_mapper.scan_specs(specs, fail_fast=fail_fast): diff --git a/src/python/pants/engine/build_files.py b/src/python/pants/engine/build_files.py index 6bdbc6e6e96..268a3c42721 100644 --- a/src/python/pants/engine/build_files.py +++ b/src/python/pants/engine/build_files.py @@ -318,6 +318,12 @@ def _recursive_dirname(f): yield '' +# TODO: This is a bit of a lie: `Struct` is effectively abstract, so this collection +# will contain subclasses of `Struct` for the symbol table types. These APIs need more +# polish before we make them public: see #4535 in particular. +HydratedStructs = Collection.of(Struct) + + BuildFilesCollection = Collection.of(BuildFiles) @@ -343,6 +349,14 @@ def create_graph_rules(address_mapper, symbol_table): hydrate_struct ), resolve_unhydrated_struct, + TaskRule( + HydratedStructs, + [SelectDependencies(symbol_table_constraint, + BuildFileAddresses, + field_types=(Address,), + field='addresses')], + HydratedStructs + ), # BUILD file parsing. parse_address_family, build_files, @@ -355,6 +369,7 @@ def create_graph_rules(address_mapper, symbol_table): # Root rules representing parameters that might be provided via root subjects. RootRule(Address), RootRule(BuildFileAddress), + RootRule(BuildFileAddresses), RootRule(AscendantAddresses), RootRule(DescendantAddresses), RootRule(SiblingAddresses), diff --git a/src/python/pants/engine/legacy/address_mapper.py b/src/python/pants/engine/legacy/address_mapper.py index 8bf72db9caf..06487ce9d58 100644 --- a/src/python/pants/engine/legacy/address_mapper.py +++ b/src/python/pants/engine/legacy/address_mapper.py @@ -46,20 +46,21 @@ def scan_build_files(self, base_path): return build_files_set @staticmethod - def is_declaring_file(address, file_path): - if not BuildFile._is_buildfile_name(os.path.basename(file_path)): - return False - + def any_is_declaring_file(address, file_paths): try: # A precise check for BuildFileAddress - return address.rel_path == file_path + return address.rel_path in file_paths except AttributeError: - # NB: this will cause any BUILD file, whether it contains the address declaration or not to be - # considered the one that declared it. That's ok though, because the spec path should be enough - # information for debugging most of the time. - # - # TODO: remove this after https://github.com/pantsbuild/pants/issues/3925 lands - return os.path.dirname(file_path) == address.spec_path + pass + # NB: this will cause any BUILD file, whether it contains the address declaration or not to be + # considered the one that declared it. That's ok though, because the spec path should be enough + # information for debugging most of the time. + return any(address.spec_path == os.path.dirname(fp) + for fp in file_paths if BuildFile._is_buildfile_name(os.path.basename(fp))) + + @staticmethod + def is_declaring_file(address, file_path): + return LegacyAddressMapper.any_is_declaring_file(address, [file_path]) def addresses_in_spec_path(self, spec_path): return self.scan_specs([SiblingAddresses(spec_path)]) diff --git a/src/python/pants/engine/legacy/graph.py b/src/python/pants/engine/legacy/graph.py index 99f3f2ba6b7..1491ce2bfc9 100644 --- a/src/python/pants/engine/legacy/graph.py +++ b/src/python/pants/engine/legacy/graph.py @@ -6,6 +6,7 @@ unicode_literals, with_statement) import logging +from contextlib import contextmanager from twitter.common.collections import OrderedSet @@ -13,6 +14,7 @@ from pants.base.exceptions import TargetDefinitionException from pants.base.parse_context import ParseContext from pants.base.specs import SingleAddress +from pants.base.target_roots import ChangedTargetRoots, LiteralTargetRoots from pants.build_graph.address import Address from pants.build_graph.address_lookup_error import AddressLookupError from pants.build_graph.build_graph import BuildGraph @@ -51,7 +53,7 @@ class _DestWrapper(datatype('DestWrapper', ['target_types'])): class LegacyBuildGraph(BuildGraph): """A directed acyclic graph of Targets and dependencies. Not necessarily connected. - This implementation is backed by a Scheduler that is able to resolve HydratedTargets. + This implementation is backed by a Scheduler that is able to resolve TransitiveHydratedTargets. """ class InvalidCommandLineSpecError(AddressLookupError): @@ -65,7 +67,7 @@ def create(cls, scheduler, symbol_table): def __init__(self, scheduler, target_types): """Construct a graph given a Scheduler, Engine, and a SymbolTable class. - :param scheduler: A Scheduler that is configured to be able to resolve HydratedTargets. + :param scheduler: A Scheduler that is configured to be able to resolve TransitiveHydratedTargets. :param symbol_table: A SymbolTable instance used to instantiate Target objects. Must match the symbol table installed in the scheduler (TODO: see comment in `_instantiate_target`). """ @@ -87,7 +89,7 @@ def _index(self, roots): # Index the ProductGraph. for product in roots: - # We have a successful HydratedTargets value (for a particular input Spec). + # We have a successful TransitiveHydratedTargets value (for a particular input Spec). for hydrated_target in product.dependencies: target_adaptor = hydrated_target.adaptor address = target_adaptor.address @@ -206,16 +208,26 @@ def inject_addresses_closure(self, addresses): addresses = set(addresses) - set(self._target_by_address.keys()) if not addresses: return - matched = set(self._inject([SingleAddress(a.spec_path, a.target_name) for a in addresses])) + matched = set(self._inject_specs([SingleAddress(a.spec_path, a.target_name) for a in addresses])) missing = addresses - matched if missing: # TODO: When SingleAddress resolution converted from projection of a directory # and name to a match for PathGlobs, we lost our useful AddressLookupError formatting. raise AddressLookupError('Addresses were not matched: {}'.format(missing)) + def inject_roots_closure(self, target_roots, fail_fast=None): + if type(target_roots) is ChangedTargetRoots: + for address in self._inject_addresses(target_roots.addresses): + yield address + elif type(target_roots) is LiteralTargetRoots: + for address in self._inject_specs(target_roots.specs): + yield address + else: + raise ValueError('Unrecognized TargetRoots type: `{}`.'.format(target_roots)) + def inject_specs_closure(self, specs, fail_fast=None): # Request loading of these specs. - for address in self._inject(specs): + for address in self._inject_specs(specs): yield address def resolve_address(self, address): @@ -223,12 +235,10 @@ def resolve_address(self, address): self.inject_address_closure(address) return self.get_target(address) - def _inject(self, subjects): - """Inject Targets into the graph for each of the subjects and yield the resulting addresses.""" - logger.debug('Injecting to %s: %s', self, subjects) + @contextmanager + def _resolve_context(self): try: - product_results = self._scheduler.products_request([HydratedTargets, BuildFileAddresses], - subjects) + yield except ResolveError as e: # NB: ResolveError means that a target was not found, which is a common user facing error. raise AddressLookupError(str(e)) @@ -237,8 +247,37 @@ def _inject(self, subjects): 'Build graph construction failed: {} {}'.format(type(e).__name__, str(e)) ) - # Update the base class indexes for this request. - self._index(product_results[HydratedTargets]) + def _inject_addresses(self, subjects): + """Injects targets into the graph for each of the given `Address` objects, and then yields them. + + TODO: See #4533 about unifying "collection of literal Addresses" with the `Spec` types, which + would avoid the need for the independent `_inject_addresses` and `_inject_specs` codepaths. + """ + logger.debug('Injecting addresses to %s: %s', self, subjects) + with self._resolve_context(): + addresses = tuple(subjects) + hydrated_targets = self._scheduler.product_request(TransitiveHydratedTargets, + [BuildFileAddresses(addresses)]) + + self._index(hydrated_targets) + + yielded_addresses = set() + for address in subjects: + if address not in yielded_addresses: + yielded_addresses.add(address) + yield address + + def _inject_specs(self, subjects): + """Injects targets into the graph for each of the given `Spec` objects. + + Yields the resulting addresses. + """ + logger.debug('Injecting specs to %s: %s', self, subjects) + with self._resolve_context(): + product_results = self._scheduler.products_request([TransitiveHydratedTargets, BuildFileAddresses], + subjects) + + self._index(product_results[TransitiveHydratedTargets]) yielded_addresses = set() for subject, product in zip(subjects, product_results[BuildFileAddresses]): @@ -254,7 +293,7 @@ def _inject(self, subjects): class HydratedTarget(datatype('HydratedTarget', ['address', 'adaptor', 'dependencies'])): """A wrapper for a fully hydrated TargetAdaptor object. - Transitive graph walks collect ordered sets of HydratedTargets which involve a huge amount + Transitive graph walks collect ordered sets of TransitiveHydratedTargets which involve a huge amount of hashing: we implement eq/hash via direct usage of an Address field to speed that up. """ @@ -274,12 +313,29 @@ def __hash__(self): return hash(self.address) -HydratedTargets = Collection.of(HydratedTarget) +class TransitiveHydratedTargets(Collection.of(HydratedTarget)): + """A transitive set of HydratedTarget objects.""" -@rule(HydratedTargets, [SelectTransitive(HydratedTarget, BuildFileAddresses, field_types=(Address,), field='addresses')]) +class HydratedTargets(Collection.of(HydratedTarget)): + """An intransitive set of HydratedTarget objects.""" + + +@rule(TransitiveHydratedTargets, [SelectTransitive(HydratedTarget, + BuildFileAddresses, + field_types=(Address,), + field='addresses')]) def transitive_hydrated_targets(targets): - """Recursively requests HydratedTargets, which will result in an eager, transitive graph walk.""" + """Recursively requests HydratedTarget instances, which will result in an eager, transitive graph walk.""" + return TransitiveHydratedTargets(targets) + + +@rule(HydratedTargets, [SelectDependencies(HydratedTarget, + BuildFileAddresses, + field_types=(Address,), + field='addresses')]) +def hydrated_targets(targets): + """Requests HydratedTarget instances.""" return HydratedTargets(targets) @@ -352,6 +408,7 @@ def create_legacy_graph_tasks(symbol_table): symbol_table_constraint = symbol_table.constraint() return [ transitive_hydrated_targets, + hydrated_targets, TaskRule( HydratedTarget, [Select(symbol_table_constraint), diff --git a/src/python/pants/engine/legacy/source_mapper.py b/src/python/pants/engine/legacy/source_mapper.py index 94c93968c5f..38d75c80c85 100644 --- a/src/python/pants/engine/legacy/source_mapper.py +++ b/src/python/pants/engine/legacy/source_mapper.py @@ -14,7 +14,7 @@ from pants.build_graph.source_mapper import SourceMapper from pants.engine.legacy.address_mapper import LegacyAddressMapper from pants.engine.legacy.graph import HydratedTargets -from pants.source.filespec import matches_filespec +from pants.source.filespec import any_matches_filespec def iter_resolve_and_parse_specs(rel_path, specs): @@ -50,10 +50,14 @@ def _unique_dirs_for_sources(self, sources): def target_addresses_for_source(self, source): return list(self.iter_target_addresses_for_sources([source])) - def _match_source(self, source, fileset): - return fileset.matches(source) or matches_filespec(source, fileset.filespec) + def _match_sources(self, sources_set, fileset): + # NB: Deleted files can only be matched against the 'filespec' (ie, `PathGlobs`) for a target, + # so we don't actually call `fileset.matches` here. + # TODO: This call should be pushed down into the engine to match directly against + # `PathGlobs` as we erode the `AddressMapper`/`SourceMapper` split. + return any_matches_filespec(sources_set, fileset.filespec) - def _owns_source(self, source, legacy_target): + def _owns_any_source(self, sources_set, legacy_target): """Given a `HydratedTarget` instance, check if it owns the given source file.""" target_kwargs = legacy_target.adaptor.kwargs() @@ -61,12 +65,12 @@ def _owns_source(self, source, legacy_target): target_source = target_kwargs.get('source') if target_source: path_from_build_root = os.path.join(legacy_target.adaptor.address.spec_path, target_source) - if path_from_build_root == source: + if path_from_build_root in sources_set: return True # Handle `sources`-declaring targets. target_sources = target_kwargs.get('sources', []) - if target_sources and self._match_source(source, target_sources): + if target_sources and self._match_sources(sources_set, target_sources): return True return False @@ -86,6 +90,6 @@ def iter_target_addresses_for_sources(self, sources): for hydrated_target, legacy_address in six.iteritems(hydrated_target_to_address): # Handle BUILD files. - if (any(LegacyAddressMapper.is_declaring_file(legacy_address, f) for f in sources_set) or - any(self._owns_source(source, hydrated_target) for source in sources_set)): + if (LegacyAddressMapper.any_is_declaring_file(legacy_address, sources_set) or + self._owns_any_source(sources_set, hydrated_target)): yield legacy_address diff --git a/src/python/pants/init/BUILD b/src/python/pants/init/BUILD index 32c1f0243c7..5ad6d82a30c 100644 --- a/src/python/pants/init/BUILD +++ b/src/python/pants/init/BUILD @@ -11,6 +11,7 @@ python_library( 'src/python/pants:version', 'src/python/pants/base:build_environment', 'src/python/pants/base:exceptions', + 'src/python/pants/base:target_roots', 'src/python/pants/binaries:binary_util', 'src/python/pants/build_graph', 'src/python/pants/core_tasks', diff --git a/src/python/pants/init/target_roots.py b/src/python/pants/init/target_roots_calculator.py similarity index 79% rename from src/python/pants/init/target_roots.py rename to src/python/pants/init/target_roots_calculator.py index 6bf68fdc988..afe9d4c7890 100644 --- a/src/python/pants/init/target_roots.py +++ b/src/python/pants/init/target_roots_calculator.py @@ -11,7 +11,7 @@ from pants.base.build_environment import get_buildroot from pants.base.cmd_line_spec_parser import CmdLineSpecParser -from pants.base.specs import SingleAddress +from pants.base.target_roots import ChangedTargetRoots, LiteralTargetRoots from pants.scm.subsystems.changed import ChangedRequest @@ -22,7 +22,7 @@ class InvalidSpecConstraint(Exception): """Raised when invalid constraints are given via target specs and arguments like --changed*.""" -class TargetRoots(object): +class TargetRootsCalculator(object): """Determines the target roots for a given pants run.""" @classmethod @@ -64,25 +64,6 @@ def create(cls, options, build_root=None, change_calculator=None): # alternate target roots. changed_addresses = change_calculator.changed_target_addresses(changed_request) logger.debug('changed addresses: %s', changed_addresses) - return ChangedTargetRoots(tuple(SingleAddress(a.spec_path, a.target_name) - for a in changed_addresses)) + return ChangedTargetRoots(tuple(changed_addresses)) return LiteralTargetRoots(spec_roots) - - def __init__(self, spec_roots): - self._spec_roots = spec_roots - - def as_specs(self): - """Returns the current target roots as Specs.""" - return self._spec_roots - - def __repr__(self): - return '{}({!r})'.format(self.__class__.__name__, self.as_specs()) - - -class ChangedTargetRoots(TargetRoots): - """Target roots that have been altered by `--changed` functionality.""" - - -class LiteralTargetRoots(TargetRoots): - """User defined target roots.""" diff --git a/src/python/pants/pantsd/pants_daemon.py b/src/python/pants/pantsd/pants_daemon.py index 513f6c18b6a..d2e9f09838d 100644 --- a/src/python/pants/pantsd/pants_daemon.py +++ b/src/python/pants/pantsd/pants_daemon.py @@ -18,7 +18,7 @@ from pants.bin.daemon_pants_runner import DaemonExiter, DaemonPantsRunner from pants.bin.engine_initializer import EngineInitializer from pants.engine.native import Native -from pants.init.target_roots import TargetRoots +from pants.init.target_roots_calculator import TargetRootsCalculator from pants.logging.setup import setup_logging from pants.option.arg_splitter import GLOBAL_SCOPE from pants.option.options_bootstrapper import OptionsBootstrapper @@ -146,7 +146,7 @@ def _setup_services(build_root, bootstrap_options, legacy_graph_helper, watchman bind_addr=(bootstrap_options.pantsd_pailgun_host, bootstrap_options.pantsd_pailgun_port), exiter_class=DaemonExiter, runner_class=DaemonPantsRunner, - target_roots_class=TargetRoots, + target_roots_calculator=TargetRootsCalculator, scheduler_service=scheduler_service ) diff --git a/src/python/pants/pantsd/service/pailgun_service.py b/src/python/pants/pantsd/service/pailgun_service.py index 8ea135f4eba..b315a2c16db 100644 --- a/src/python/pants/pantsd/service/pailgun_service.py +++ b/src/python/pants/pantsd/service/pailgun_service.py @@ -20,12 +20,13 @@ class PailgunService(PantsService): """A service that runs the Pailgun server.""" - def __init__(self, bind_addr, exiter_class, runner_class, target_roots_class, scheduler_service): + def __init__(self, bind_addr, exiter_class, runner_class, target_roots_calculator, scheduler_service): """ :param tuple bind_addr: The (hostname, port) tuple to bind the Pailgun server to. :param class exiter_class: The `Exiter` class to be used for Pailgun runs. :param class runner_class: The `PantsRunner` class to be used for Pailgun runs. - :param class target_roots_class: The `TargetRoots` class to be used for target root parsing. + :param class target_roots_calculator: The `TargetRootsCalculator` class to be used for target + root parsing. :param SchedulerService scheduler_service: The SchedulerService instance for access to the resident scheduler. """ @@ -33,7 +34,7 @@ def __init__(self, bind_addr, exiter_class, runner_class, target_roots_class, sc self._bind_addr = bind_addr self._exiter_class = exiter_class self._runner_class = runner_class - self._target_roots_class = target_roots_class + self._target_roots_calculator = target_roots_calculator self._scheduler_service = scheduler_service self._logger = logging.getLogger(__name__) @@ -63,7 +64,7 @@ def runner_factory(sock, arguments, environment): self._logger.debug('execution commandline: %s', arguments) options, _ = OptionsInitializer(OptionsBootstrapper(args=arguments)).setup(init_logging=False) - target_roots = self._target_roots_class.create( + target_roots = self._target_roots_calculator.create( options, change_calculator=self._scheduler_service.change_calculator ) diff --git a/src/python/pants/scm/change_calculator.py b/src/python/pants/scm/change_calculator.py index 5f64ab901e4..6aa3c7bea79 100644 --- a/src/python/pants/scm/change_calculator.py +++ b/src/python/pants/scm/change_calculator.py @@ -13,7 +13,8 @@ from pants.base.build_environment import get_scm from pants.base.specs import DescendantAddresses from pants.build_graph.address import Address -from pants.engine.legacy.graph import HydratedTargets, target_types_from_symbol_table +from pants.engine.build_files import HydratedStructs +from pants.engine.legacy.graph import target_types_from_symbol_table from pants.engine.legacy.source_mapper import EngineSourceMapper from pants.goal.workspace import ScmWorkspace from pants.util.meta import AbstractClass @@ -22,8 +23,8 @@ logger = logging.getLogger(__name__) -class _HydratedTargetDependentGraph(object): - """A graph for walking dependent addresses of HydratedTarget objects. +class _DependentGraph(object): + """A graph for walking dependent addresses of TargetAdaptor objects. This avoids/imitates constructing a v1 BuildGraph object, because that codepath results in many references held in mutable global state (ie, memory leaks). @@ -41,26 +42,27 @@ class _HydratedTargetDependentGraph(object): """ @classmethod - def from_iterable(cls, target_types, iterable): - """Create a new HydratedTargetDependentGraph from an iterable of HydratedTarget instances.""" + def from_iterable(cls, target_types, adaptor_iter): + """Create a new DependentGraph from an iterable of TargetAdaptor subclasses.""" inst = cls(target_types) - for hydrated_target in iterable: - inst.inject_target(hydrated_target) + for target_adaptor in adaptor_iter: + inst.inject_target(target_adaptor) return inst def __init__(self, target_types): self._dependent_address_map = defaultdict(set) self._target_types = target_types - def inject_target(self, hydrated_target): + def inject_target(self, target_adaptor): """Inject a target, respecting all sources of dependencies.""" - target_cls = self._target_types[hydrated_target.adaptor.type_alias] + target_cls = self._target_types[target_adaptor.type_alias] - declared_deps = hydrated_target.dependencies - implicit_deps = (Address.parse(s) for s in target_cls.compute_dependency_specs(kwargs=hydrated_target.adaptor.kwargs())) + declared_deps = target_adaptor.dependencies + implicit_deps = (Address.parse(s) + for s in target_cls.compute_dependency_specs(kwargs=target_adaptor.kwargs())) for dep in itertools.chain(declared_deps, implicit_deps): - self._dependent_address_map[dep].add(hydrated_target.adaptor.address) + self._dependent_address_map[dep].add(target_adaptor.address) def dependents_of_addresses(self, addresses): """Given an iterable of addresses, yield all of those addresses dependents.""" @@ -142,12 +144,15 @@ def iter_changed_target_addresses(self, changed_request): if changed_request.include_dependees not in ('direct', 'transitive'): return - # For dependee finding, we need to parse all build files. - product_iter = (t - for targets in self._scheduler.product_request(HydratedTargets, [DescendantAddresses('')]) + # For dependee finding, we need to parse all build files to collect all structs. But we + # don't need to fully hydrate targets (ie, expand their source globs), and so we use + # the `HydratedStructs` product. See #4535 for more info. + adaptor_iter = (t + for targets in self._scheduler.product_request(HydratedStructs, + [DescendantAddresses('')]) for t in targets.dependencies) - graph = _HydratedTargetDependentGraph.from_iterable(target_types_from_symbol_table(self._symbol_table), - product_iter) + graph = _DependentGraph.from_iterable(target_types_from_symbol_table(self._symbol_table), + adaptor_iter) if changed_request.include_dependees == 'direct': for address in graph.dependents_of_addresses(changed_addresses): diff --git a/src/python/pants/source/filespec.py b/src/python/pants/source/filespec.py index 9eb30f9f360..882d54dd0be 100644 --- a/src/python/pants/source/filespec.py +++ b/src/python/pants/source/filespec.py @@ -44,16 +44,28 @@ def glob_to_regex(pattern): return ''.join(out) -def globs_matches(path, patterns): - return any(re.match(glob_to_regex(pattern), path) for pattern in patterns) +def globs_matches(paths, patterns, exclude_patterns): + def excluded(path): + if excluded.regexes is None: + excluded.regexes = [re.compile(glob_to_regex(ex)) for ex in exclude_patterns] + return any(ex.match(path) for ex in excluded.regexes) + excluded.regexes = None + for pattern in patterns: + regex = re.compile(glob_to_regex(pattern)) + for path in paths: + if regex.match(path) and not excluded(path): + return True + return False def matches_filespec(path, spec): - if spec is None: - return False - if not globs_matches(path, spec.get('globs', [])): + return any_matches_filespec([path], spec) + + +def any_matches_filespec(paths, spec): + if not paths or not spec: return False - for spec in spec.get('exclude', []): - if matches_filespec(path, spec): - return False - return True + exclude_patterns = [] + for exclude_spec in spec.get('exclude', []): + exclude_patterns.extend(exclude_spec.get('globs', [])) + return globs_matches(paths, spec.get('globs', []), exclude_patterns) diff --git a/tests/python/pants_test/engine/legacy/test_graph.py b/tests/python/pants_test/engine/legacy/test_graph.py index a70eb257110..ee85ec89d60 100644 --- a/tests/python/pants_test/engine/legacy/test_graph.py +++ b/tests/python/pants_test/engine/legacy/test_graph.py @@ -18,7 +18,7 @@ from pants.build_graph.build_file_aliases import BuildFileAliases, TargetMacro from pants.build_graph.target import Target from pants.init.options_initializer import OptionsInitializer -from pants.init.target_roots import TargetRoots +from pants.init.target_roots_calculator import TargetRootsCalculator from pants.option.options_bootstrapper import OptionsBootstrapper from pants.subsystem.subsystem import Subsystem from pants.util.contextutil import temporary_dir @@ -57,7 +57,7 @@ def graph_helper(self, build_file_aliases=None, build_file_imports_behavior='all def open_scheduler(self, specs, build_file_aliases=None): with self.graph_helper(build_file_aliases=build_file_aliases) as graph_helper: graph, target_roots = self.create_graph_from_specs(graph_helper, specs) - addresses = tuple(graph.inject_specs_closure(target_roots.as_specs())) + addresses = tuple(graph.inject_roots_closure(target_roots)) yield graph, addresses, graph_helper.scheduler def create_graph_from_specs(self, graph_helper, specs): @@ -67,7 +67,7 @@ def create_graph_from_specs(self, graph_helper, specs): return graph, target_roots def create_target_roots(self, specs): - return TargetRoots.create(self._make_setup_args(specs)) + return TargetRootsCalculator.create(self._make_setup_args(specs)) class GraphTargetScanFailureTests(GraphTestBase): diff --git a/tests/python/pants_test/engine/test_rules.py b/tests/python/pants_test/engine/test_rules.py index 7dd3358da97..ff8faf4c545 100644 --- a/tests/python/pants_test/engine/test_rules.py +++ b/tests/python/pants_test/engine/test_rules.py @@ -303,8 +303,8 @@ def test_full_graph_for_planner_example(self): else: pass - self.assertEquals(36, len(all_rules)) - self.assertEquals(66, len(root_rule_lines)) # 2 lines per entry + self.assertEquals(41, len(all_rules)) + self.assertEquals(78, len(root_rule_lines)) # 2 lines per entry def test_smallest_full_test_multiple_root_subject_types(self): rules = [ diff --git a/tests/python/pants_test/pantsd/service/test_pailgun_service.py b/tests/python/pants_test/pantsd/service/test_pailgun_service.py index 283dbf236c7..7e5363bc6d7 100644 --- a/tests/python/pants_test/pantsd/service/test_pailgun_service.py +++ b/tests/python/pants_test/pantsd/service/test_pailgun_service.py @@ -24,11 +24,11 @@ def setUp(self): self.mock_exiter_class = mock.Mock(side_effect=Exception('should not be called')) self.mock_runner_class = mock.Mock(side_effect=Exception('should not be called')) self.mock_scheduler_service = mock.Mock(side_effect=Exception('should not be called')) - self.mock_target_roots_class = mock.Mock(side_effect=Exception('should not be called')) + self.mock_target_roots_calculator = mock.Mock(side_effect=Exception('should not be called')) self.service = PailgunService(bind_addr=(None, None), exiter_class=self.mock_exiter_class, runner_class=self.mock_runner_class, - target_roots_class=self.mock_target_roots_class, + target_roots_calculator=self.mock_target_roots_calculator, scheduler_service=self.mock_scheduler_service) @mock.patch.object(PailgunService, '_setup_pailgun', **PATCH_OPTS)