From cead01ee8a1c9c10020f8cec290d9e9439f0517c Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Mon, 19 Nov 2018 17:42:49 -0800 Subject: [PATCH] Move file owners computation into the engine and make lighter (#6790) The solution used in #5636 though, was heavy-handed. By requesting `TransitiveHydratedTargets`, we fully constructed the graph of dependencies and then later _also_ constructed the dependent graph (in `DependentGraph`). Moreover, fully hydrating the targets meant unnecessarily fully expanding their source globs. Create a `find_owners` rule that consumes an `OwnersRequest` to compute the owning targets of some sources (and, optionally, their transitive dependents). Moving this logic into an `@rule` allows us to directly consume the symbol_table_contraint, without a wrapping collection (as in #5579). Additionally, switch back to requesting target adaptor subclasses while building the `DependentGraph`. The graph-validation that use to occur via requesting `TransitiveHydratedTargets` now occurs as an explicit step during construction of a `DependentGraph`. A TODO there explains an existing weakness of change-detection in the engine. ``` ./pants --changed-diffspec='49cc12e5cccc6164a2c4aa83c23300ad7254836f^..49cc12e5cccc6164a2c4aa83c23300ad7254836f' --changed-include-dependees=direct --glob-expansion-failure=ignore list ``` is 30 percent faster in the pantsbuild pants repo, primarily due to not expanding source globs. --- src/python/pants/bin/BUILD | 1 - src/python/pants/build_graph/source_mapper.py | 158 ------------- src/python/pants/engine/build_files.py | 2 +- src/python/pants/engine/legacy/BUILD | 13 -- src/python/pants/engine/legacy/graph.py | 177 ++++++++++++++- .../pants/engine/legacy/source_mapper.py | 94 -------- src/python/pants/init/BUILD | 1 - .../pants/init/target_roots_calculator.py | 210 +++--------------- .../build_graph/test_source_mapper.py | 55 +---- .../engine/legacy/test_changed_integration.py | 2 +- 10 files changed, 217 insertions(+), 496 deletions(-) delete mode 100644 src/python/pants/build_graph/source_mapper.py delete mode 100644 src/python/pants/engine/legacy/source_mapper.py diff --git a/src/python/pants/bin/BUILD b/src/python/pants/bin/BUILD index 6b090ef0afe..9639c51620b 100644 --- a/src/python/pants/bin/BUILD +++ b/src/python/pants/bin/BUILD @@ -21,7 +21,6 @@ python_library( 'src/python/pants/engine/legacy:address_mapper', 'src/python/pants/engine/legacy:graph', 'src/python/pants/engine/legacy:parser', - 'src/python/pants/engine/legacy:source_mapper', 'src/python/pants/engine:build_files', 'src/python/pants/engine:fs', 'src/python/pants/engine:legacy_engine', diff --git a/src/python/pants/build_graph/source_mapper.py b/src/python/pants/build_graph/source_mapper.py deleted file mode 100644 index 38afa225e16..00000000000 --- a/src/python/pants/build_graph/source_mapper.py +++ /dev/null @@ -1,158 +0,0 @@ -# coding=utf-8 -# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -from __future__ import absolute_import, division, print_function, unicode_literals - -import os -from builtins import object -from collections import defaultdict - -from pants.build_graph.build_file_address_mapper import BuildFileAddressMapper - - -class SourceMapper(object): - """A utility for making a mapping of source files to targets that own them.""" - - def target_addresses_for_source(self, source): - raise NotImplementedError - - -# TODO: Kill this in favor of `EngineSourceMapper` once pants/backend/graph_info/tasks/list_owners.py -# (which consumes LazySourceMapper) is ported to the v2 engine. -class SpecSourceMapper(SourceMapper): - """ - Uses sources specs to identify an owner target of a source. - - Note: it doesn't check if a file exists. - """ - - def __init__(self, address_mapper, build_graph, stop_after_match=False): - """ - :param AddressMapper address_mapper: An address mapper that can be used to populate the - `build_graph` with targets source mappings are needed for. - :param BuildGraph build_graph: The build graph to map sources from. - :param bool stop_after_match: If `True` a search will not traverse into parent directories once - an owner is identified. - """ - self._stop_after_match = stop_after_match - self._build_graph = build_graph - self._address_mapper = address_mapper - - def target_addresses_for_source(self, source): - result = [] - path = source - - # a top-level source has empty dirname, so do/while instead of straight while loop. - while path: - path = os.path.dirname(path) - try: - result.extend(self._find_targets_for_source(source, path)) - except BuildFileAddressMapper.BuildFileScanError: - pass - if self._stop_after_match and len(result) > 0: - break - - return result - - def _find_targets_for_source(self, source, spec_path): - for address in self._address_mapper.addresses_in_spec_path(spec_path): - self._build_graph.inject_address_closure(address) - target = self._build_graph.get_target(address) - sources_field = target.payload.get_field('sources') - if sources_field and sources_field.matches(source): - yield address - elif self._address_mapper.is_declaring_file(address, source): - yield address - - -class LazySourceMapper(SourceMapper): - """ - This attempts to avoid loading more than needed by lazily searching for and loading BUILD files - adjacent to or in parent directories (up to the buildroot) of a source to construct a (partial) - mapping of sources to owning targets. - - If in stop-after-match mode, a search will not traverse into parent directories once an owner - is identified. THIS MAY FAIL TO FIND ADDITIONAL OWNERS in parent directories, or only find them - when other sources are also mapped first, which cause those owners to be loaded. Some repositories - may be able to use this to avoid expensive walks, but others may need to prefer correctness. - - A LazySourceMapper reuses computed mappings and only searches a given path once as - populating the BuildGraph is expensive, so in general there should only be one instance of it. - """ - - def __init__(self, address_mapper, build_graph, stop_after_match=False): - """Initialize LazySourceMapper. - - :param AddressMapper address_mapper: An address mapper that can be used to populate the - `build_graph` with targets source mappings are needed for. - :param BuildGraph build_graph: The build graph to map sources from. - :param bool stop_after_match: If `True` a search will not traverse into parent directories once - an owner is identified. - """ - self._stop_after_match = stop_after_match - self._build_graph = build_graph - self._address_mapper = address_mapper - self._source_to_address = defaultdict(set) - self._mapped_paths = set() - self._searched_sources = set() - - def _find_owners(self, source): - """Searches for BUILD files adjacent or above a source in the file hierarchy. - - Walks up the directory tree until it reaches a previously searched path. - - Stops after looking in the buildroot. - - If self._stop_after_match is set, stops searching once a source is mapped, even if the parent - has yet to be searched. See class docstring for discussion. - - :param str source: The source at which to start the search. - """ - # Bail instantly if a source has already been searched - if source in self._searched_sources: - return - self._searched_sources.add(source) - - path = os.path.dirname(source) - - # a top-level source has empty dirname, so do/while instead of straight while loop. - walking = True - while walking: - # It is possible - if path not in self._mapped_paths: - try: - self._map_sources_from_spec_path(path) - except BuildFileAddressMapper.BuildFileScanError: - pass - self._mapped_paths.add(path) - elif not self._stop_after_match: - # If not in stop-after-match mode, once a path is seen visited, all parents can be assumed. - return - - # See class docstring - if self._stop_after_match and source in self._source_to_address: - return - - walking = bool(path) - path = os.path.dirname(path) - - def _map_sources_from_spec_path(self, spec_path): - """Populate mapping of source to owning addresses with targets from given BUILD files. - - :param spec_path: a spec_path of targets from which to map sources. - """ - for address in self._address_mapper.addresses_in_spec_path(spec_path): - self._build_graph.inject_address_closure(address) - target = self._build_graph.get_target(address) - - for target_source in target.sources_relative_to_buildroot(): - self._source_to_address[target_source].add(address) - if not target.is_synthetic: - self._source_to_address[address.rel_path].add(address) - - def target_addresses_for_source(self, source): - """Attempt to find targets which own a source by searching up directory structure to buildroot. - - :param string source: The source to look up. - """ - self._find_owners(source) - return self._source_to_address[source] diff --git a/src/python/pants/engine/build_files.py b/src/python/pants/engine/build_files.py index 9db6ffc29d7..300764b908b 100644 --- a/src/python/pants/engine/build_files.py +++ b/src/python/pants/engine/build_files.py @@ -264,7 +264,7 @@ def create_graph_rules(address_mapper, symbol_table): symbol_table_constraint = symbol_table.constraint() partial_hydrate_struct = functools.partial(hydrate_struct, symbol_table_constraint) - partial_hydrate_struct.__name__ = 'hydrate_struct' + functools.update_wrapper(partial_hydrate_struct, hydrate_struct) return [ # A singleton to provide the AddressMapper. diff --git a/src/python/pants/engine/legacy/BUILD b/src/python/pants/engine/legacy/BUILD index 82f93f857b0..ae02f2c6dc1 100644 --- a/src/python/pants/engine/legacy/BUILD +++ b/src/python/pants/engine/legacy/BUILD @@ -82,16 +82,3 @@ python_library( 'src/python/pants/util:objects', ], ) - -python_library( - name='source_mapper', - sources=['source_mapper.py'], - dependencies=[ - ':address_mapper', - ':graph', - 'src/python/pants/base:specs', - 'src/python/pants/build_graph', - 'src/python/pants/source', - '3rdparty/python:six' - ] -) diff --git a/src/python/pants/engine/legacy/graph.py b/src/python/pants/engine/legacy/graph.py index 7d7614f67b4..69e4ef406cb 100644 --- a/src/python/pants/engine/legacy/graph.py +++ b/src/python/pants/engine/legacy/graph.py @@ -4,16 +4,19 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import functools import logging from builtins import str, zip -from collections import deque +from collections import defaultdict, deque from contextlib import contextmanager +from os.path import dirname +from future.utils import iteritems from twitter.common.collections import OrderedSet from pants.base.exceptions import TargetDefinitionException from pants.base.parse_context import ParseContext -from pants.base.specs import SingleAddress, Specs +from pants.base.specs import AscendantAddresses, DescendantAddresses, SingleAddress, Specs from pants.build_graph.address import Address from pants.build_graph.address_lookup_error import AddressLookupError from pants.build_graph.app_base import AppBase, Bundle @@ -21,10 +24,13 @@ from pants.build_graph.remote_sources import RemoteSources from pants.engine.addressable import BuildFileAddresses from pants.engine.fs import PathGlobs, Snapshot +from pants.engine.legacy.address_mapper import LegacyAddressMapper from pants.engine.legacy.structs import BundleAdaptor, BundlesField, SourcesField, TargetAdaptor -from pants.engine.rules import TaskRule, rule +from pants.engine.mapper import AddressMapper +from pants.engine.rules import RootRule, TaskRule, rule from pants.engine.selectors import Get, Select from pants.option.global_options import GlobMatchErrorBehavior +from pants.source.filespec import any_matches_filespec from pants.source.wrapped_globs import EagerFilesetWithSpec, FilesetRelPathWrapper from pants.util.objects import Collection, datatype @@ -269,6 +275,98 @@ def _inject_specs(self, subjects): yield hydrated_target.address +class _DependentGraph(object): + """A graph for walking dependent addresses of TargetAdaptor objects. + + This avoids/imitates constructing a v1 BuildGraph object, because that codepath results + in many references held in mutable global state (ie, memory leaks). + + The long term goal is to deprecate the `changed` goal in favor of sufficiently good cache + hit rates, such that rather than running: + + ./pants --changed-parent=master test + + ...you would always be able to run: + + ./pants test :: + + ...and have it complete in a similar amount of time by hitting relevant caches. + """ + + @classmethod + def from_iterable(cls, target_types, address_mapper, adaptor_iter): + """Create a new DependentGraph from an iterable of TargetAdaptor subclasses.""" + inst = cls(target_types, address_mapper) + all_valid_addresses = set() + for target_adaptor in adaptor_iter: + inst._inject_target(target_adaptor) + all_valid_addresses.add(target_adaptor.address) + inst._validate(all_valid_addresses) + return inst + + def __init__(self, target_types, address_mapper): + # TODO: Dependencies and implicit dependencies are mapped independently, because the latter + # cannot be validated until: + # 1) Subsystems are computed in engine: #5869. Currently instantiating a subsystem to find + # its injectable specs would require options parsing. + # 2) Targets-class Subsystem deps can be expanded in-engine (similar to Fields): #4535, + self._dependent_address_map = defaultdict(set) + self._implicit_dependent_address_map = defaultdict(set) + self._target_types = target_types + self._address_mapper = address_mapper + + def _validate(self, all_valid_addresses): + """Validate that all of the dependencies in the graph exist in the given addresses set.""" + for dependency, dependents in iteritems(self._dependent_address_map): + if dependency not in all_valid_addresses: + raise AddressLookupError( + 'Dependent graph construction failed: {} did not exist. Was depended on by:\n {}'.format( + dependency.spec, + '\n '.join(d.spec for d in dependents) + ) + ) + + def _inject_target(self, target_adaptor): + """Inject a target, respecting all sources of dependencies.""" + target_cls = self._target_types[target_adaptor.type_alias] + + declared_deps = target_adaptor.dependencies + implicit_deps = (Address.parse(s, + relative_to=target_adaptor.address.spec_path, + subproject_roots=self._address_mapper.subproject_roots) + for s in target_cls.compute_dependency_specs(kwargs=target_adaptor.kwargs())) + for dep in declared_deps: + self._dependent_address_map[dep].add(target_adaptor.address) + for dep in implicit_deps: + self._implicit_dependent_address_map[dep].add(target_adaptor.address) + + def dependents_of_addresses(self, addresses): + """Given an iterable of addresses, yield all of those addresses dependents.""" + seen = OrderedSet(addresses) + for address in addresses: + seen.update(self._dependent_address_map[address]) + seen.update(self._implicit_dependent_address_map[address]) + return seen + + def transitive_dependents_of_addresses(self, addresses): + """Given an iterable of addresses, yield all of those addresses dependents, transitively.""" + closure = set() + result = [] + to_visit = deque(addresses) + + while to_visit: + address = to_visit.popleft() + if address in closure: + continue + + closure.add(address) + result.append(address) + to_visit.extend(self._dependent_address_map[address]) + to_visit.extend(self._implicit_dependent_address_map[address]) + + return result + + class HydratedTarget(datatype(['address', 'adaptor', 'dependencies'])): """A wrapper for a fully hydrated TargetAdaptor object. @@ -296,6 +394,65 @@ class HydratedTargets(Collection.of(HydratedTarget)): """An intransitive set of HydratedTarget objects.""" +class OwnersRequest(datatype([ + ('sources', tuple), + ('include_dependees', str), +])): + """A request for the owners (and optionally, transitive dependees) of a set of file paths. + + TODO: `include_dependees` should become an `enum` of the choices from the + `--changed-include-dependees` global option. + """ + + +def find_owners(symbol_table, address_mapper, owners_request): + sources_set = OrderedSet(owners_request.sources) + dirs_set = OrderedSet(dirname(source) for source in sources_set) + + # Walk up the buildroot looking for targets that would conceivably claim changed sources. + candidate_specs = tuple(AscendantAddresses(directory=d) for d in dirs_set) + candidate_targets = yield Get(HydratedTargets, Specs(candidate_specs)) + + # Match the source globs against the expanded candidate targets. + def owns_any_source(legacy_target): + """Given a `HydratedTarget` instance, check if it owns the given source file.""" + target_kwargs = legacy_target.adaptor.kwargs() + + # Handle `sources`-declaring targets. + # NB: Deleted files can only be matched against the 'filespec' (ie, `PathGlobs`) for a target, + # so we don't actually call `fileset.matches` here. + # TODO: This matching logic should be implemented using the rust `fs` crate for two reasons: + # 1) having two implementations isn't great + # 2) we're expanding sources via HydratedTarget, but it isn't necessary to do that to match + target_sources = target_kwargs.get('sources', None) + if target_sources and any_matches_filespec(sources_set, target_sources.filespec): + return True + + return False + + direct_owners = tuple(ht.adaptor.address + for ht in candidate_targets.dependencies + if LegacyAddressMapper.any_is_declaring_file(ht.adaptor.address, sources_set) or + owns_any_source(ht)) + + # If the OwnersRequest does not require dependees, then we're done. + if owners_request.include_dependees == 'none': + yield BuildFileAddresses(direct_owners) + else: + # Otherwise: find dependees. + all_addresses = yield Get(BuildFileAddresses, Specs((DescendantAddresses(''),))) + all_structs = yield [Get(symbol_table.constraint(), Address, a.to_address()) for a in all_addresses.dependencies] + + graph = _DependentGraph.from_iterable(target_types_from_symbol_table(symbol_table), + address_mapper, + all_structs) + if owners_request.include_dependees == 'direct': + yield BuildFileAddresses(tuple(graph.dependents_of_addresses(direct_owners))) + else: + assert owners_request.include_dependees == 'transitive' + yield BuildFileAddresses(tuple(graph.transitive_dependents_of_addresses(direct_owners))) + + @rule(TransitiveHydratedTargets, [Select(BuildFileAddresses)]) def transitive_hydrated_targets(build_file_addresses): """Given BuildFileAddresses, kicks off recursion on expansion of TransitiveHydratedTargets. @@ -417,6 +574,9 @@ def create_legacy_graph_tasks(symbol_table): """Create tasks to recursively parse the legacy graph.""" symbol_table_constraint = symbol_table.constraint() + partial_find_owners = functools.partial(find_owners, symbol_table) + functools.update_wrapper(partial_find_owners, find_owners) + return [ transitive_hydrated_targets, transitive_hydrated_target, @@ -430,6 +590,17 @@ def create_legacy_graph_tasks(symbol_table): Get(HydratedField, BundlesField), ] ), + TaskRule( + BuildFileAddresses, + [Select(AddressMapper), Select(OwnersRequest)], + partial_find_owners, + input_gets=[ + Get(HydratedTargets, Specs), + Get(BuildFileAddresses, Specs), + Get(symbol_table_constraint, Address), + ] + ), hydrate_sources, hydrate_bundles, + RootRule(OwnersRequest), ] diff --git a/src/python/pants/engine/legacy/source_mapper.py b/src/python/pants/engine/legacy/source_mapper.py deleted file mode 100644 index 73fd6ab8221..00000000000 --- a/src/python/pants/engine/legacy/source_mapper.py +++ /dev/null @@ -1,94 +0,0 @@ -# coding=utf-8 -# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -from __future__ import absolute_import, division, print_function, unicode_literals - -import os - -import six - -from pants.base.specs import AscendantAddresses, SingleAddress, Specs -from pants.build_graph.address import parse_spec -from pants.build_graph.source_mapper import SourceMapper -from pants.engine.legacy.address_mapper import LegacyAddressMapper -from pants.engine.legacy.graph import HydratedTargets -from pants.source.filespec import any_matches_filespec - - -def iter_resolve_and_parse_specs(rel_path, specs): - """Given a relative path and set of input specs, produce a list of proper `Spec` objects. - - :param string rel_path: The relative path to the input specs from the build root. - :param iterable specs: An iterable of specs. - """ - for spec in specs: - spec_path, target_name = parse_spec(spec, rel_path) - yield SingleAddress(spec_path, target_name) - - -def resolve_and_parse_specs(*args, **kwargs): - return list(iter_resolve_and_parse_specs(*args, **kwargs)) - - -class EngineSourceMapper(SourceMapper): - """A v2 engine backed SourceMapper that supports pre-`BuildGraph` cache warming in the daemon.""" - - def __init__(self, scheduler): - self._scheduler = scheduler - - def _unique_dirs_for_sources(self, sources): - """Given an iterable of sources, yield unique dirname'd paths.""" - seen = set() - for source in sources: - source_dir = os.path.dirname(source) - if source_dir not in seen: - seen.add(source_dir) - yield source_dir - - def target_addresses_for_source(self, source): - return list(self.iter_target_addresses_for_sources([source])) - - def _match_sources(self, sources_set, fileset): - # NB: Deleted files can only be matched against the 'filespec' (ie, `PathGlobs`) for a target, - # so we don't actually call `fileset.matches` here. - # TODO: This call should be pushed down into the engine to match directly against - # `PathGlobs` as we erode the `AddressMapper`/`SourceMapper` split. - return any_matches_filespec(sources_set, fileset.filespec) - - def _owns_any_source(self, sources_set, legacy_target): - """Given a `HydratedTarget` instance, check if it owns the given source file.""" - target_kwargs = legacy_target.adaptor.kwargs() - - # Handle targets like `python_binary` which have a singular `source='main.py'` declaration. - target_source = target_kwargs.get('source') - if target_source: - path_from_build_root = os.path.join(legacy_target.adaptor.address.spec_path, target_source) - if path_from_build_root in sources_set: - return True - - # Handle `sources`-declaring targets. - target_sources = target_kwargs.get('sources', []) - if target_sources and self._match_sources(sources_set, target_sources): - return True - - return False - - def iter_target_addresses_for_sources(self, sources): - """Bulk, iterable form of `target_addresses_for_source`.""" - # Walk up the buildroot looking for targets that would conceivably claim changed sources. - sources_set = set(sources) - dependencies = tuple(AscendantAddresses(directory=d) for d in self._unique_dirs_for_sources(sources_set)) - - # Uniqify all transitive hydrated targets. - hydrated_target_to_address = {} - hydrated_targets, = self._scheduler.product_request(HydratedTargets, [Specs(dependencies=dependencies)]) - for hydrated_target in hydrated_targets.dependencies: - if hydrated_target not in hydrated_target_to_address: - hydrated_target_to_address[hydrated_target] = hydrated_target.adaptor.address - - for hydrated_target, legacy_address in six.iteritems(hydrated_target_to_address): - # Handle BUILD files. - if (LegacyAddressMapper.any_is_declaring_file(legacy_address, sources_set) or - self._owns_any_source(sources_set, hydrated_target)): - yield legacy_address diff --git a/src/python/pants/init/BUILD b/src/python/pants/init/BUILD index 3fc80744820..2111b1a2096 100644 --- a/src/python/pants/init/BUILD +++ b/src/python/pants/init/BUILD @@ -20,7 +20,6 @@ python_library( 'src/python/pants/engine/legacy:graph', 'src/python/pants/engine/legacy:options_parsing', 'src/python/pants/engine/legacy:parser', - 'src/python/pants/engine/legacy:source_mapper', 'src/python/pants/engine/legacy:structs', 'src/python/pants/engine:build_files', 'src/python/pants/engine:console', diff --git a/src/python/pants/init/target_roots_calculator.py b/src/python/pants/init/target_roots_calculator.py index 31c33606d32..04a622897c6 100644 --- a/src/python/pants/init/target_roots_calculator.py +++ b/src/python/pants/init/target_roots_calculator.py @@ -4,20 +4,17 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import itertools import logging -from builtins import object -from collections import defaultdict +from builtins import object, str from twitter.common.collections import OrderedSet from pants.base.build_environment import get_buildroot, get_scm from pants.base.cmd_line_spec_parser import CmdLineSpecParser -from pants.base.specs import DescendantAddresses, SingleAddress, Specs +from pants.base.specs import SingleAddress, Specs from pants.base.target_roots import TargetRoots -from pants.build_graph.address import Address -from pants.engine.legacy.graph import TransitiveHydratedTargets, target_types_from_symbol_table -from pants.engine.legacy.source_mapper import EngineSourceMapper +from pants.engine.addressable import BuildFileAddresses +from pants.engine.legacy.graph import OwnersRequest from pants.goal.workspace import ScmWorkspace from pants.scm.subsystems.changed import ChangedRequest @@ -25,74 +22,6 @@ logger = logging.getLogger(__name__) -class _DependentGraph(object): - """A graph for walking dependent addresses of TargetAdaptor objects. - - This avoids/imitates constructing a v1 BuildGraph object, because that codepath results - in many references held in mutable global state (ie, memory leaks). - - The long term goal is to deprecate the `changed` goal in favor of sufficiently good cache - hit rates, such that rather than running: - - ./pants --changed-parent=master test - - ...you would always be able to run: - - ./pants test :: - - ...and have it complete in a similar amount of time by hitting relevant caches. - """ - - @classmethod - def from_iterable(cls, target_types, adaptor_iter): - """Create a new DependentGraph from an iterable of TargetAdaptor subclasses.""" - inst = cls(target_types) - for target_adaptor in adaptor_iter: - inst.inject_target(target_adaptor) - return inst - - def __init__(self, target_types): - self._dependent_address_map = defaultdict(set) - self._target_types = target_types - - def inject_target(self, target_adaptor): - """Inject a target, respecting all sources of dependencies.""" - target_cls = self._target_types[target_adaptor.type_alias] - - declared_deps = target_adaptor.dependencies - implicit_deps = (Address.parse(s) - for s in target_cls.compute_dependency_specs(kwargs=target_adaptor.kwargs())) - - for dep in itertools.chain(declared_deps, implicit_deps): - self._dependent_address_map[dep].add(target_adaptor.address) - - def dependents_of_addresses(self, addresses): - """Given an iterable of addresses, yield all of those addresses dependents.""" - seen = set(addresses) - for address in addresses: - for dependent_address in self._dependent_address_map[address]: - if dependent_address not in seen: - seen.add(dependent_address) - yield dependent_address - - def transitive_dependents_of_addresses(self, addresses): - """Given an iterable of addresses, yield all of those addresses dependents, transitively.""" - addresses_to_visit = set(addresses) - while 1: - dependents = set(self.dependents_of_addresses(addresses)) - # If we've exhausted all dependencies or visited all remaining nodes, break. - if (not dependents) or dependents.issubset(addresses_to_visit): - break - addresses = dependents.difference(addresses_to_visit) - addresses_to_visit.update(dependents) - - transitive_set = itertools.chain( - *(self._dependent_address_map[address] for address in addresses_to_visit) - ) - for dep in transitive_set: - yield dep - - class InvalidSpecConstraint(Exception): """Raised when invalid constraints are given via target specs and arguments like --changed*.""" @@ -120,6 +49,16 @@ def parse_specs(cls, target_specs, build_root=None, exclude_patterns=None, tags= tags=tags) ] + @classmethod + def changed_files(cls, scm, changes_since=None, diffspec=None): + """Determines the files changed according to SCM/workspace and options.""" + workspace = ScmWorkspace(scm) + if diffspec: + return workspace.changes_in(diffspec) + + changes_since = changes_since or scm.current_rev_identifier() + return workspace.touched_files(changes_since) + @classmethod def create(cls, options, session, symbol_table, build_root=None, exclude_patterns=None, tags=None): """ @@ -146,126 +85,43 @@ def create(cls, options, session, symbol_table, build_root=None, exclude_pattern logger.debug('spec_roots are: %s', spec_roots) logger.debug('changed_request is: %s', changed_request) logger.debug('owned_files are: %s', owned_files) - scm = get_scm() - change_calculator = ChangeCalculator(scheduler=session, symbol_table=symbol_table, scm=scm) if scm else None - owner_calculator = OwnerCalculator(scheduler=session, symbol_table=symbol_table) if owned_files else None targets_specified = sum(1 for item in (changed_request.is_actionable(), owned_files, spec_roots) if item) if targets_specified > 1: - # We've been provided a more than one of: a change request, an owner request, or spec roots. + # We've been provided more than one of: a change request, an owner request, or spec roots. raise InvalidSpecConstraint( 'Multiple target selection methods provided. Please use only one of ' '--changed-*, --owner-of, or target specs' ) - if change_calculator and changed_request.is_actionable(): + if changed_request.is_actionable(): + scm = get_scm() + if not scm: + raise InvalidSpecConstraint( + 'The --changed-* options are not available without a recognized SCM (usually git).' + ) + changed_files = cls.changed_files( + scm, + changes_since=changed_request.changes_since, + diffspec=changed_request.diffspec) # We've been provided no spec roots (e.g. `./pants list`) AND a changed request. Compute # alternate target roots. - changed_addresses = change_calculator.changed_target_addresses(changed_request) + request = OwnersRequest(sources=tuple(changed_files), + include_dependees=str(changed_request.include_dependees)) + changed_addresses, = session.product_request(BuildFileAddresses, [request]) logger.debug('changed addresses: %s', changed_addresses) - dependencies = tuple(SingleAddress(a.spec_path, a.target_name) for a in changed_addresses) + dependencies = tuple(SingleAddress(a.spec_path, a.target_name) for a in changed_addresses.dependencies) return TargetRoots([Specs(dependencies=dependencies, exclude_patterns=exclude_patterns, tags=tags)]) - if owner_calculator and owned_files: + if owned_files: # We've been provided no spec roots (e.g. `./pants list`) AND a owner request. Compute # alternate target roots. - owner_addresses = owner_calculator.owner_target_addresses(owned_files) + request = OwnersRequest(sources=tuple(owned_files), include_dependees=str('none')) + owner_addresses, = session.product_request(BuildFileAddresses, [request]) logger.debug('owner addresses: %s', owner_addresses) - dependencies = tuple(SingleAddress(a.spec_path, a.target_name) for a in owner_addresses) + dependencies = tuple(SingleAddress(a.spec_path, a.target_name) for a in owner_addresses.dependencies) return TargetRoots([Specs(dependencies=dependencies, exclude_patterns=exclude_patterns, tags=tags)]) return TargetRoots(spec_roots) - - -class ChangeCalculator(object): - """A ChangeCalculator that finds the target addresses of changed files based on scm.""" - - def __init__(self, scheduler, symbol_table, scm, workspace=None, changes_since=None, - diffspec=None): - """ - :param scheduler: The `Scheduler` instance to use for computing file to target mappings. - :param symbol_table: The symbol table. - :param scm: The `Scm` instance to use for change determination. - """ - self._scm = scm or get_scm() - self._scheduler = scheduler - self._symbol_table = symbol_table - self._mapper = EngineSourceMapper(self._scheduler) - self._workspace = workspace or ScmWorkspace(scm) - self._changes_since = changes_since - self._diffspec = diffspec - - def changed_files(self, changes_since=None, diffspec=None): - """Determines the files changed according to SCM/workspace and options.""" - diffspec = diffspec or self._diffspec - if diffspec: - return self._workspace.changes_in(diffspec) - - changes_since = changes_since or self._changes_since or self._scm.current_rev_identifier() - return self._workspace.touched_files(changes_since) - - def iter_changed_target_addresses(self, changed_request): - """Given a `ChangedRequest`, compute and yield all affected target addresses.""" - changed_files = self.changed_files(changed_request.changes_since, changed_request.diffspec) - logger.debug('changed files: %s', changed_files) - if not changed_files: - return - - changed_addresses = set(address - for address - in self._mapper.iter_target_addresses_for_sources(changed_files)) - for address in changed_addresses: - yield address - - if changed_request.include_dependees not in ('direct', 'transitive'): - return - - # TODO: For dependee finding, we technically only need to parse all build files to collect target - # dependencies. But in order to fully validate the graph and account for the fact that deleted - # targets do not show up as changed roots, we use the `TransitiveHydratedTargets` product. - # see https://github.com/pantsbuild/pants/issues/382 - specs = (DescendantAddresses(''),) - adaptor_iter = (t.adaptor - for targets in self._scheduler.product_request(TransitiveHydratedTargets, - [Specs(specs)]) - for t in targets.roots) - graph = _DependentGraph.from_iterable(target_types_from_symbol_table(self._symbol_table), - adaptor_iter) - - if changed_request.include_dependees == 'direct': - for address in graph.dependents_of_addresses(changed_addresses): - yield address - elif changed_request.include_dependees == 'transitive': - for address in graph.transitive_dependents_of_addresses(changed_addresses): - yield address - - def changed_target_addresses(self, changed_request): - return list(self.iter_changed_target_addresses(changed_request)) - - -class OwnerCalculator(object): - """An OwnerCalculator that finds the target addresses of the files passed down as arguments - to --owner-of - """ - - def __init__(self, scheduler, symbol_table): - """ - :param scheduler: The `Scheduler` instance to use for computing file to target mapping - :param symbol_table: The symbol table. - """ - self._scheduler = scheduler - self._symbol_table = symbol_table - self._mapper = EngineSourceMapper(self._scheduler) - - def iter_owner_target_addresses(self, owned_files): - """Given an list of owned files, compute and yield all affected target addresses""" - owner_addresses = set(address - for address - in self._mapper.iter_target_addresses_for_sources(owned_files)) - for address in owner_addresses: - yield address - - def owner_target_addresses(self, owner_request): - return list(self.iter_owner_target_addresses(owner_request)) diff --git a/tests/python/pants_test/build_graph/test_source_mapper.py b/tests/python/pants_test/build_graph/test_source_mapper.py index 219a41c5943..70f1e6cd276 100644 --- a/tests/python/pants_test/build_graph/test_source_mapper.py +++ b/tests/python/pants_test/build_graph/test_source_mapper.py @@ -4,24 +4,18 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from builtins import object +from builtins import str from textwrap import dedent # TODO: Create a dummy target type in this test and remove this dep. from pants.backend.jvm.targets.java_library import JavaLibrary from pants.build_graph.build_file_aliases import BuildFileAliases -from pants.build_graph.source_mapper import LazySourceMapper, SpecSourceMapper +from pants.engine.addressable import BuildFileAddresses +from pants.engine.legacy.graph import OwnersRequest from pants_test.test_base import TestBase -class SourceMapperTest(object): - def __init__(self, methodName): - super(SourceMapperTest, self).__init__(methodName) - self._mapper = None - - def get_mapper(self): - return self._mapper - +class SourceMapperTest(TestBase): @classmethod def alias_groups(cls): return BuildFileAliases( @@ -30,15 +24,10 @@ def alias_groups(cls): }, ) - def setUp(self): - super(SourceMapperTest, self).setUp() - self.set_mapper() - - def set_mapper(self, fast=False): - raise NotImplementedError - def owner(self, owner, f): - self.assertEqual(set(owner), {i.spec for i in self.get_mapper().target_addresses_for_source(f)}) + request = OwnersRequest(sources=(f,), include_dependees=str('none')) + addresses, = self.scheduler.product_request(BuildFileAddresses, [request]) + self.assertEqual(set(owner), {i.spec for i in addresses}) def test_target_address_for_source_yields_unique_addresses(self): # NB If the mapper returns more than one copy of an address, it may cause other code to do @@ -53,8 +42,7 @@ def test_target_address_for_source_yields_unique_addresses(self): ) ''')) - self.assertEqual({'path:target', 'path:buildholder'}, - {a.spec for a in self.get_mapper().target_addresses_for_source('path/BUILD')}) + self.owner(['path:target', 'path:buildholder'], 'path/BUILD') def test_joint_ownership(self): # A simple target with two sources. @@ -95,30 +83,3 @@ def test_with_root_level_build(self): self.owner(['//:top', 'text/common/const:const'], 'text/common/const/emoji.py') self.owner(['//:top'], 'foo.py') self.owner([], 'bar.py') - - def test_fast_vs_correct(self): - self.create_library('', 'java_library', 'top', ['foo.py', 'a/b/bar.py']) - self.create_library('a/b', 'java_library', 'b', ['bar.py', 'baz.py']) - - # With fast=False, order doesn't matter. - self.set_mapper(fast=False) - self.owner(['//:top', 'a/b:b'], 'a/b/bar.py') - self.owner(['//:top'], 'foo.py') - self.set_mapper(fast=False) - self.owner(['//:top'], 'foo.py') - self.owner(['//:top', 'a/b:b'], 'a/b/bar.py') - - # With fast=False, loading a parent first vs after now affects results. - self.set_mapper(fast=True) - self.owner(['a/b:b'], 'a/b/bar.py') - self.owner(['//:top'], 'foo.py') - - -class LazySourceMapperTest(SourceMapperTest, TestBase): - def set_mapper(self, fast=False): - self._mapper = LazySourceMapper(self.address_mapper, self.build_graph, fast) - - -class SpecSourceMapperTest(SourceMapperTest, TestBase): - def set_mapper(self, fast=False): - self._mapper = SpecSourceMapper(self.address_mapper, self.build_graph, fast) diff --git a/tests/python/pants_test/engine/legacy/test_changed_integration.py b/tests/python/pants_test/engine/legacy/test_changed_integration.py index 821c5cfe19e..8bbcbce98bb 100644 --- a/tests/python/pants_test/engine/legacy/test_changed_integration.py +++ b/tests/python/pants_test/engine/legacy/test_changed_integration.py @@ -368,7 +368,7 @@ def test_changed_with_deleted_target_transitive(self): safe_delete(os.path.join(worktree, 'src/resources/org/pantsbuild/resourceonly/BUILD')) pants_run = self.run_pants(['list', '--changed-parent=HEAD', '--changed-include-dependees=transitive']) self.assert_failure(pants_run) - self.assertIn('src/resources/org/pantsbuild/resourceonly', pants_run.stderr_data) + self.assertRegexpMatches(pants_run.stderr_data, 'src/resources/org/pantsbuild/resourceonly:.*did not exist') def test_changed_in_directory_without_build_file(self): with create_isolated_git_repo() as worktree: