-
-
Notifications
You must be signed in to change notification settings - Fork 652
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add core abstractions for the V2 Target API (#9284)
## Problem See #4535 and the [recent design doc](https://docs.google.com/document/d/1nxPdvuzgCPKhTabhfYBN2tbcr85enSR83OpcvdfWXfY/edit). This design implements the main goals of the Target API: 1. Extensibility - add new target types. 2. Extensibility - add new fields to pre-existing target types. 3. Typed fields. * See [Typed Fields](https://docs.google.com/document/d/1nxPdvuzgCPKhTabhfYBN2tbcr85enSR83OpcvdfWXfY/edit#heading=h.ctjckb8e5t03) for a justification of this. 4. All fields are lazily hydrated/validated. 5. Has a utility for filtering based on required fields (see `test_has_fields()`) * This is important to how rules will consume the Target API. `python_test_runner.py` might say something like `if my_tgt.has_fields([PythonSources, Timeout])`. * See [Proposed Design](https://docs.google.com/document/d/1nxPdvuzgCPKhTabhfYBN2tbcr85enSR83OpcvdfWXfY/edit#heading=h.z97og7gj9rvs) for the importance of using Pure Python for this filtering, rather than engine code. 6. Works 100% with MyPy. 7. Nice `repr` and `dir` functions to allow for easy debugging. ## Solution Add `Target` and `Field` types. A `Target` type is a combination of several `Field`s that are valid _together_. Given a target, the two main expected API calls will be: ```python my_tgt.has_fields([PythonSources, Compatibility]) compatibility: Compatibility = my_tgt.get(Compatibility) ``` MyPy understands both of these calls, including `Target.get` thanks to generics. ### Lazy hydration - primitive vs. async fields About 5% of fields require the engine for hydration, e.g. `sources` and `bundles`. The other 95% of fields, though, are nothing more than a `str`, `List[str]`, `bool`, etc. We do not want to complicate accessing the majority of fields. While some field values will need to be accessed via `await Get`, the majority should not be this complex. So, we introduce `PrimitiveField` and `AsyncField` to distinguish between the two. Thanks to MyPy and autocomplete, rule authors will quickly know when working with an `AsyncField` that they cannot call `my_field.value` like they normally could. ### Extensibility - add new fields on pre-existing targets This is a new feature that we want as a better version of `tags`. To add a new field, plugin authors simply need to create the new field then register, for example, `UnionRule(PythonLibrary, TypeChecked)`. The Target constructor will recognize this new field and treat it as first-class. Core Pants code can safely ignore the field, while plugins can now use the new field. ## Followups 1. Add a way to go from `HydratedStruct -> Target`, so that we can actually use this with production code. 2. Add syntactic sugar for declaring new fields, e.g. add `BoolField` and `StringField`. 3. Add a mechanism to actually register new target types and their aliases, e.g. a new backend registration hook ```python def targets() -> Sequence[Type[Target]]: return (PythonLibrary, PythonTests) ``` 4. Add implementations for Python targets as a real-world use case. 5. Use the new Python target types in some rules like Black and Isort. 6. Allow going from a V2 `Target` to a V1 `Target`. 7. Improve error messages for invalid fields and targets, e.g. point to the actual target upon failure. 8. Remove `TargetAdaptor`.
- Loading branch information
1 parent
23a14d7
commit 8ed6795
Showing
3 changed files
with
352 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). | ||
# Licensed under the Apache License, Version 2.0 (see LICENSE). | ||
|
||
from abc import ABC, ABCMeta, abstractmethod | ||
from dataclasses import dataclass | ||
from typing import Any, ClassVar, Dict, Iterable, Optional, Tuple, Type, TypeVar, cast | ||
|
||
from pants.engine.rules import UnionMembership | ||
from pants.util.memo import memoized_property | ||
from pants.util.meta import frozen_after_init | ||
|
||
|
||
@dataclass(frozen=True) | ||
class Field(ABC): | ||
alias: ClassVar[str] | ||
raw_value: Optional[Any] # None indicates that the field was not explicitly defined | ||
|
||
def __repr__(self) -> str: | ||
return f"{self.__class__}(alias={repr(self.alias)}, raw_value={self.raw_value})" | ||
|
||
|
||
class PrimitiveField(Field, metaclass=ABCMeta): | ||
"""A Field that does not need the engine in order to be hydrated. | ||
This should be subclassed by the majority of fields. | ||
""" | ||
|
||
def __str__(self) -> str: | ||
return f"{self.alias}={self.value}" | ||
|
||
@memoized_property | ||
@abstractmethod | ||
def value(self) -> Any: | ||
"""Get the field's value. | ||
The value will possibly be first hydrated and/or validated, such as using a default value | ||
if the field was not defined or ensuring that an int value is positive. | ||
This property is memoized because hydration and validation can often be costly. This | ||
hydration is lazy, i.e. it will only happen when a downstream rule explicitly requests this | ||
field. | ||
""" | ||
|
||
|
||
class AsyncField(Field, metaclass=ABCMeta): | ||
"""A field that needs the engine in order to be hydrated. | ||
You should create a corresponding Result class and define a rule to go from this AsyncField to | ||
the Result. For example: | ||
class Sources(AsyncField): | ||
alias: ClassVar = "sources" | ||
raw_value: Optional[List[str]] | ||
@dataclass(frozen=True) | ||
class SourcesResult: | ||
snapshot: Snapshot | ||
@rule | ||
def hydrate_sources(sources: Sources) -> SourcesResult: | ||
# possibly validate `sources.raw_value` | ||
... | ||
result = await Get[Snapshot](PathGlobs(sources.raw_value)) | ||
# possibly validate `result` | ||
... | ||
return SourcesResult(result) | ||
def rules(): | ||
return [hydrate_sources] | ||
Then, call sites can `await Get` if they need to hydrate the field: | ||
sources = await Get[SourcesResult](Sources, my_tgt.get(Sources)) | ||
""" | ||
|
||
|
||
_F = TypeVar("_F", bound=Field) | ||
|
||
|
||
@frozen_after_init | ||
@dataclass(unsafe_hash=True) | ||
class Target(ABC): | ||
"""A Target represents a combination of fields that are valid _together_. | ||
Plugin authors may add additional fields to pre-existing target types by simply registering | ||
UnionRules between the `Target` type and the custom field, e.g. `UnionRule(PythonLibrary, | ||
TypeChecked)`. The `Target` will then treat `TypeChecked` as a first-class citizen and plugins | ||
can use that Field like any other Field. | ||
""" | ||
|
||
# Subclasses must define these | ||
alias: ClassVar[str] | ||
core_fields: ClassVar[Tuple[Type[Field], ...]] | ||
|
||
# These get calculated in the constructor | ||
plugin_fields: Tuple[Type[Field], ...] | ||
field_values: Dict[Type[Field], Any] | ||
|
||
def __init__( | ||
self, | ||
unhydrated_values: Dict[str, Any], | ||
*, | ||
union_membership: Optional[UnionMembership] = None, | ||
) -> None: | ||
self.plugin_fields = cast( | ||
Tuple[Type[Field], ...], | ||
( | ||
() | ||
if union_membership is None | ||
else tuple(union_membership.union_rules.get(self.__class__, ())) | ||
), | ||
) | ||
|
||
self.field_values = {} | ||
aliases_to_field_types = {field_type.alias: field_type for field_type in self.field_types} | ||
for alias, value in unhydrated_values.items(): | ||
if alias not in aliases_to_field_types: | ||
raise ValueError( | ||
f"Unrecognized field `{alias}={value}` for target type `{self.alias}`." | ||
) | ||
field_type = aliases_to_field_types[alias] | ||
self.field_values[field_type] = field_type(value) | ||
# For undefined fields, mark the raw value as None. | ||
for field_type in set(self.field_types) - set(self.field_values.keys()): | ||
self.field_values[field_type] = field_type(raw_value=None) | ||
|
||
@property | ||
def field_types(self) -> Tuple[Type[Field], ...]: | ||
return (*self.core_fields, *self.plugin_fields) | ||
|
||
def __repr__(self) -> str: | ||
return ( | ||
f"{self.__class__}(" | ||
f"alias={repr(self.alias)}, " | ||
f"core_fields={list(self.core_fields)}, " | ||
f"plugin_fields={list(self.plugin_fields)}, " | ||
f"raw_field_values={list(self.field_values.values())}" | ||
f")" | ||
) | ||
|
||
def __str__(self) -> str: | ||
fields = ", ".join(str(field) for field in self.field_values.values()) | ||
return f"{self.alias}({fields})" | ||
|
||
def get(self, field: Type[_F]) -> _F: | ||
return cast(_F, self.field_values[field]) | ||
|
||
def has_fields(self, fields: Iterable[Type[Field]]) -> bool: | ||
# TODO: consider if this should support subclasses. For example, if a target has a | ||
# field PythonSources(Sources), then .has_fields(Sources) should still return True. Why? | ||
# This allows overriding how fields behave for custom target types, e.g. a `python3_library` | ||
# subclassing the Field Compatibility with its own custom Python3Compatibility field. | ||
# When adding this, be sure to update `.get()` to allow looking up by subclass, too. | ||
# (Is it possible to do that in a performant way, i.e. w/o having to iterate over every | ||
# self.field_type (O(n) vs the current O(1))?) | ||
return all(field in self.field_types for field in fields) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). | ||
# Licensed under the Apache License, Version 2.0 (see LICENSE). | ||
|
||
from collections import OrderedDict | ||
from dataclasses import dataclass | ||
from pathlib import PurePath | ||
from typing import ClassVar, List, Optional, Tuple | ||
|
||
import pytest | ||
|
||
from pants.engine.fs import EMPTY_DIRECTORY_DIGEST, PathGlobs, Snapshot | ||
from pants.engine.rules import UnionMembership, rule | ||
from pants.engine.selectors import Get | ||
from pants.engine.target import AsyncField, PrimitiveField, Target | ||
from pants.testutil.engine.util import MockGet, run_rule | ||
from pants.util.collections import ensure_str_list | ||
from pants.util.memo import memoized_property | ||
from pants.util.ordered_set import OrderedSet | ||
|
||
|
||
class HaskellGhcExtensions(PrimitiveField): | ||
alias: ClassVar = "ghc_extensions" | ||
raw_value: Optional[List[str]] | ||
|
||
@memoized_property | ||
def value(self) -> List[str]: | ||
if self.raw_value is None: | ||
return [] | ||
# Add some arbitrary validation to test that hydration works properly. | ||
bad_extensions = [ | ||
extension for extension in self.raw_value if not extension.startswith("Ghc") | ||
] | ||
if bad_extensions: | ||
raise ValueError( | ||
f"All elements of `{self.alias}` must be prefixed by `Ghc`. Received " | ||
f"{bad_extensions}." | ||
) | ||
return self.raw_value | ||
|
||
|
||
class HaskellSources(AsyncField): | ||
alias: ClassVar = "sources" | ||
raw_value: Optional[List[str]] | ||
|
||
|
||
@dataclass(frozen=True) | ||
class HaskellSourcesResult: | ||
snapshot: Snapshot | ||
|
||
|
||
@rule | ||
async def hydrate_haskell_sources(sources: HaskellSources) -> HaskellSourcesResult: | ||
# Validate before hydration | ||
ensure_str_list(sources.raw_value) | ||
result = await Get[Snapshot](PathGlobs(sources.raw_value)) | ||
# Validate after hydration | ||
non_haskell_sources = [fp for fp in result.files if PurePath(fp).suffix != ".hs"] | ||
if non_haskell_sources: | ||
raise ValueError(f"Received non-Haskell sources in {sources.alias}: {non_haskell_sources}.") | ||
return HaskellSourcesResult(result) | ||
|
||
|
||
class HaskellTarget(Target): | ||
alias: ClassVar = "haskell" | ||
core_fields: ClassVar = (HaskellGhcExtensions, HaskellSources) | ||
|
||
|
||
def test_invalid_fields_rejected() -> None: | ||
with pytest.raises(ValueError) as exc: | ||
HaskellTarget({"invalid_field": True}) | ||
assert "Unrecognized field `invalid_field=True` for target type `haskell`." in str(exc) | ||
|
||
|
||
def test_get_primitive_field() -> None: | ||
extensions = ["GhcExistentialQuantification"] | ||
extensions_field = HaskellTarget({HaskellGhcExtensions.alias: extensions}).get( | ||
HaskellGhcExtensions | ||
) | ||
assert extensions_field.raw_value == extensions | ||
assert extensions_field.value == extensions | ||
|
||
default_extensions_field = HaskellTarget({}).get(HaskellGhcExtensions) | ||
assert default_extensions_field.raw_value is None | ||
assert default_extensions_field.value == [] | ||
|
||
|
||
def test_get_async_field() -> None: | ||
def hydrate_field( | ||
*, raw_source_files: List[str], hydrated_source_files: Tuple[str, ...] | ||
) -> HaskellSourcesResult: | ||
sources_field = HaskellTarget({HaskellSources.alias: raw_source_files}).get(HaskellSources) | ||
assert sources_field.raw_value == raw_source_files | ||
result: HaskellSourcesResult = run_rule( | ||
hydrate_haskell_sources, | ||
rule_args=[sources_field], | ||
mock_gets=[ | ||
MockGet( | ||
product_type=Snapshot, | ||
subject_type=PathGlobs, | ||
mock=lambda _: Snapshot( | ||
directory_digest=EMPTY_DIRECTORY_DIGEST, | ||
files=hydrated_source_files, | ||
dirs=(), | ||
), | ||
) | ||
], | ||
) | ||
return result | ||
|
||
# Normal field | ||
expected_files = ("monad.hs", "abstract_art.hs", "abstract_algebra.hs") | ||
assert ( | ||
hydrate_field( | ||
raw_source_files=["monad.hs", "abstract_*.hs"], hydrated_source_files=expected_files | ||
).snapshot.files | ||
== expected_files | ||
) | ||
|
||
# Test pre-hydration validation | ||
with pytest.raises(ValueError) as exc: | ||
hydrate_field(raw_source_files=[0, 1, 2], hydrated_source_files=()) # type: ignore[call-arg] | ||
assert "Not all elements of the iterable have type" in str(exc) | ||
|
||
# Test post-hydration validation | ||
with pytest.raises(ValueError) as exc: | ||
hydrate_field(raw_source_files=["*.js"], hydrated_source_files=("not_haskell.js",)) | ||
assert "Received non-Haskell sources" in str(exc) | ||
|
||
|
||
def test_has_fields() -> None: | ||
class UnrelatedField(PrimitiveField): | ||
alias: ClassVar = "unrelated" | ||
raw_value: Optional[bool] | ||
|
||
@memoized_property | ||
def value(self) -> bool: | ||
if self.raw_value is None: | ||
return False | ||
return self.raw_value | ||
|
||
tgt = HaskellTarget({}) | ||
assert tgt.has_fields([]) is True | ||
assert tgt.has_fields([HaskellGhcExtensions]) is True | ||
assert tgt.has_fields([UnrelatedField]) is False | ||
assert tgt.has_fields([HaskellGhcExtensions, UnrelatedField]) is False | ||
|
||
|
||
def test_field_hydration_is_lazy() -> None: | ||
# No error upon creating the Target because validation does not happen until a call site | ||
# hydrates the specific field. | ||
tgt = HaskellTarget( | ||
{HaskellGhcExtensions.alias: ["GhcExistentialQuantification", "DoesNotStartWithGhc"]} | ||
) | ||
# When hydrating, we expect a failure. | ||
with pytest.raises(ValueError) as exc: | ||
tgt.get(HaskellGhcExtensions).value | ||
assert "must be prefixed by `Ghc`" in str(exc) | ||
|
||
|
||
def test_add_custom_fields() -> None: | ||
class CustomField(PrimitiveField): | ||
alias: ClassVar = "custom_field" | ||
raw_value: Optional[bool] | ||
|
||
@memoized_property | ||
def value(self) -> bool: | ||
if self.raw_value is None: | ||
return False | ||
return self.raw_value | ||
|
||
union_membership = UnionMembership(OrderedDict({HaskellTarget: OrderedSet([CustomField])})) | ||
tgt_values = {CustomField.alias: True} | ||
tgt = HaskellTarget(tgt_values, union_membership=union_membership) | ||
assert tgt.field_types == (HaskellGhcExtensions, HaskellSources, CustomField) | ||
assert tgt.core_fields == (HaskellGhcExtensions, HaskellSources) | ||
assert tgt.plugin_fields == (CustomField,) | ||
assert tgt.get(CustomField).value is True | ||
|
||
default_tgt = HaskellTarget({}, union_membership=union_membership) | ||
assert default_tgt.get(CustomField).value is False |