Skip to content

Commit

Permalink
Remove datatype() and TypedCollection (#8576)
Browse files Browse the repository at this point in the history
Resolves #7074. Now that we always use `@dataclass` instead of `datatype`, and replaced `Collection.of(T)` with `Collection[T]`, it is safe to remove `datatype()`.

We also remove `TypedCollection` as it is no longer in use. Instead, we use explicit type hints like `Tuple[str, ...]` or `Collection[T]`.

We avoid a deprecation cycle because V2 engine code has always been experimental.

### Keeps `util/objects.py`
We still keep `TypeConstraintError`, `Exactly`, `SuperclassesOf`, and `SubclassesOf`. These are used widely in the codebase and have no standard library equivalent. They also do not seem to cause problems for MyPy.
  • Loading branch information
Eric-Arellano authored Nov 5, 2019
1 parent 8c9ba48 commit 08e76b3
Show file tree
Hide file tree
Showing 7 changed files with 2 additions and 860 deletions.
1 change: 0 additions & 1 deletion src/python/pants/base/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ python_library(
sources = ['hash_utils.py'],
dependencies = [
'3rdparty/python:typing-extensions',
'src/python/pants/util:objects',
'src/python/pants/util:strutil',
],
)
Expand Down
8 changes: 0 additions & 8 deletions src/python/pants/base/hash_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from twitter.common.collections import OrderedSet
from typing_extensions import Protocol

from pants.util.objects import DatatypeMixin
from pants.util.strutil import ensure_binary


Expand Down Expand Up @@ -136,13 +135,6 @@ def default(self, o):
.format(cls=type(self).__name__, val=o))
# Set order is arbitrary in python 3.6 and 3.7, so we need to keep this sorted() call.
return sorted(self.default(i) for i in o)
if isinstance(o, DatatypeMixin):
# datatype objects will intentionally raise in the __iter__ method, but the Iterable abstract
# base class will match any class with any superclass that has the attribute __iter__ in the
# __dict__ (see https://docs.python.org/2/library/abc.html), so we need to check for it
# specially here.
# TODO: determine if the __repr__ should be some abstractmethod on DatatypeMixin!
return self.default(repr(o))
if isinstance(o, Iterable) and not isinstance(o, (bytes, list, str)):
return list(self.default(i) for i in o)
logger.debug("Our custom json encoder {} is trying to hash a primitive type, but has gone through"
Expand Down
7 changes: 1 addition & 6 deletions src/python/pants/util/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,7 @@ python_library(
python_library(
name = 'objects',
sources = ['objects.py'],
dependencies = [
'3rdparty/python/twitter/commons:twitter.common.collections',
':strutil',
':memo',
':meta',
],
tags = {'partially_type_checked'},
)

python_library(
Expand Down
322 changes: 0 additions & 322 deletions src/python/pants/util/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,233 +2,6 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from abc import ABC, abstractmethod
from collections import OrderedDict, namedtuple
from collections.abc import Iterable

from pants.util.memo import memoized_classproperty
from pants.util.meta import classproperty
from pants.util.strutil import pluralize


class TypeCheckError(TypeError):

# TODO: make some wrapper exception class to make this kind of
# prefixing easy (maybe using a class field format string?).
def __init__(self, type_name, msg, *args, **kwargs):
formatted_msg = f"type check error in class {type_name}: {msg}"
super().__init__(formatted_msg, *args, **kwargs)


# TODO: remove the `.type_check_error_type` property in `DatatypeMixin` and just have mixers
# override a class object!
class TypedDatatypeInstanceConstructionError(TypeCheckError):
"""Raised when a datatype()'s fields fail a type check upon construction."""


class DatatypeMixin(ABC):
"""Decouple datatype logic from the way it's created to ease migration to python 3 dataclasses."""

@classproperty
@abstractmethod
def type_check_error_type(cls):
"""The exception type to use in make_type_error()."""

@classmethod
def make_type_error(cls, msg, *args, **kwargs):
"""A helper method to generate an exception type for type checking errors.
This method uses `cls.type_check_error_type` to ensure that type checking errors can be caught
with a reliable exception type. The type returned by `cls.type_check_error_type` should ensure
that the exception messages are prefixed with enough context to be useful and *not* confusing.
"""
return cls.type_check_error_type(cls.__name__, msg, *args, **kwargs)

@abstractmethod
def copy(self, **kwargs):
"""Return a new object of the same type, replacing specified fields with new values"""


# TODO(#7074): Migrate to python 3 dataclasses!
def datatype(field_decls, superclass_name=None, **kwargs):
"""A wrapper for `namedtuple` that accounts for the type of the object in equality.
Field declarations can be a string, which declares a field with that name and
no type checking. Field declarations can also be a tuple `('field_name',
field_type)`, which declares a field named `field_name` which is type-checked
at construction. If a type is given, the value provided to the constructor for
that field must be exactly that type (i.e. `type(x) == field_type`), and not
e.g. a subclass.
:param field_decls: Iterable of field declarations.
:return: A type object which can then be subclassed.
:raises: :class:`TypeError`
"""
field_names = []
fields_with_constraints = OrderedDict()
for maybe_decl in field_decls:
# ('field_name', type)
if isinstance(maybe_decl, tuple):
field_name, type_spec = maybe_decl
if isinstance(type_spec, type):
type_constraint = Exactly(type_spec)
elif isinstance(type_spec, TypeConstraint):
type_constraint = type_spec
else:
raise TypeError(
"type spec for field '{}' was not a type or TypeConstraint: was {!r} (type {!r})."
.format(field_name, type_spec, type(type_spec).__name__))
fields_with_constraints[field_name] = type_constraint
else:
# interpret it as a field name without a type to check
field_name = maybe_decl
# namedtuple() already checks field uniqueness
field_names.append(field_name)

if not superclass_name:
superclass_name = '_anonymous_namedtuple_subclass'

namedtuple_cls = namedtuple(superclass_name, field_names, **kwargs)

class DataType(namedtuple_cls, DatatypeMixin):
type_check_error_type = TypedDatatypeInstanceConstructionError

def __new__(cls, *args, **kwargs):
# TODO: Ideally we could execute this exactly once per `cls` but it should be a
# relatively cheap check.
if not hasattr(cls.__eq__, '_eq_override_canary'):
raise cls.make_type_error('Should not override __eq__.')

try:
this_object = super().__new__(cls, *args, **kwargs)
except TypeError as e:
raise cls.make_type_error(
f"error in namedtuple() base constructor: {e}")

# TODO: Make this kind of exception pattern (filter for errors then display them all at once)
# more ergonomic.
type_failure_msgs = []
for field_name, field_constraint in fields_with_constraints.items():
# TODO: figure out how to disallow users from accessing datatype fields by index!
# TODO: gettattr() with a specific `field_name` against a `namedtuple` is apparently
# converted into a __getitem__() call with the argument being the integer index of the field
# with that name -- this indirection is not shown in the stack trace when overriding
# __getitem__() to raise on `int` inputs. See https://stackoverflow.com/a/6738724 for the
# greater context of how `namedtuple` differs from other "normal" python classes.
field_value = getattr(this_object, field_name)
try:
field_constraint.validate_satisfied_by(field_value)
except TypeConstraintError as e:
type_failure_msgs.append(
f"field '{field_name}' was invalid: {e}")
if type_failure_msgs:
raise cls.make_type_error(
'{} type checking constructor arguments:\n{}'
.format(pluralize(len(type_failure_msgs), 'error'),
'\n'.join(type_failure_msgs)))

return this_object

def __eq__(self, other):
if self is other:
return True

# Compare types and fields.
if type(self) != type(other):
return False
# Explicitly return super.__eq__'s value in case super returns NotImplemented
return super().__eq__(other)
# We define an attribute on the `cls` level definition of `__eq__` that will allow us to detect
# that it has been overridden.
__eq__._eq_override_canary = None

def __ne__(self, other):
return not (self == other)

# NB: in Python 3, whenever __eq__ is overridden, __hash__() must also be
# explicitly implemented, otherwise Python will raise "unhashable type". See
# https://docs.python.org/3/reference/datamodel.html#object.__hash__.
def __hash__(self):
try:
return super().__hash__()
except TypeError:
# If any fields are unhashable, we want to be able to specify which ones in the error
# message, but we don't want to slow down the normal __hash__ code path, so we take the time
# to break it down by field if we know the __hash__ fails for some reason.
for field_name, value in self._asdict().items():
try:
hash(value)
except TypeError as e:
raise TypeError("For datatype object {} (type '{}'): in field '{}': {}"
.format(self, type(self).__name__, field_name, e))
# If the error doesn't seem to be with hashing any of the fields, just re-raise the
# original error.
raise

# NB: As datatype is not iterable, we need to override both __iter__ and all of the
# namedtuple methods that expect self to be iterable.
def __iter__(self):
raise self.make_type_error("datatype object is not iterable")

def _super_iter(self):
return super().__iter__()

def _asdict(self):
"""Return a new OrderedDict which maps field names to their values.
Overrides a namedtuple() method which calls __iter__.
"""
return OrderedDict(zip(self._fields, self._super_iter()))

def _replace(self, **kwargs):
"""Return a new datatype object replacing specified fields with new values.
Overrides a namedtuple() method which calls __iter__.
"""
field_dict = self._asdict()
field_dict.update(**kwargs)
return type(self)(**field_dict)

def copy(self, **kwargs):
return self._replace(**kwargs)

# NB: it is *not* recommended to rely on the ordering of the tuple returned by this method.
def __getnewargs__(self):
"""Return self as a plain tuple. Used by copy and pickle."""
return tuple(self._super_iter())

def __repr__(self):
args_formatted = []
for field_name in field_names:
field_value = getattr(self, field_name)
args_formatted.append(f"{field_name}={field_value!r}")
return '{class_name}({args_joined})'.format(
class_name=type(self).__name__,
args_joined=', '.join(args_formatted))

def __str__(self):
elements_formatted = []
for field_name in field_names:
constraint_for_field = fields_with_constraints.get(field_name, None)
field_value = getattr(self, field_name)
if not constraint_for_field:
elements_formatted.append(
# TODO: consider using the repr of arguments in this method.
"{field_name}={field_value}"
.format(field_name=field_name,
field_value=field_value))
else:
elements_formatted.append(
"{field_name}<{type_constraint}>={field_value}"
.format(field_name=field_name,
type_constraint=constraint_for_field,
field_value=field_value))
return '{class_name}({typed_tagged_elements})'.format(
class_name=type(self).__name__,
typed_tagged_elements=', '.join(elements_formatted))

# Return a new type with the given name, inheriting from the DataType class
# just defined, with an empty class body.
return type(superclass_name, (DataType,), {})


# TODO: make this error into an attribute on the `TypeConstraint` class object!
Expand Down Expand Up @@ -373,98 +146,3 @@ class SubclassesOf(TypeOnlyConstraint):

def satisfied_by_type(self, obj_type):
return issubclass(obj_type, self._types)


class TypedCollection(TypeConstraint):
"""A `TypeConstraint` which accepts a TypeOnlyConstraint and validates a collection."""

@memoized_classproperty
def iterable_constraint(cls):
"""Define what kind of collection inputs are accepted by this type constraint.
:rtype: TypeConstraint
"""
return SubclassesOf(Iterable)

# TODO: extend TypeConstraint to specify includes and excludes in a single constraint!
@classproperty
def exclude_iterable_constraint(cls):
"""Define what collection inputs are *not* accepted by this type constraint.
Strings (unicode and byte) in Python are considered iterables of substrings, but we only want
to allow explicit collection types.
:rtype: TypeConstraint
"""
return SubclassesOf(str, bytes)

def __init__(self, constraint):
"""Create a `TypeConstraint` which validates each member of a collection with `constraint`.
:param TypeOnlyConstraint constraint: the `TypeConstraint` to apply to each element. This is
currently required to be a `TypeOnlyConstraint` to avoid
complex prototypal type relationships.
"""

if not isinstance(constraint, TypeOnlyConstraint):
raise TypeError("constraint for collection must be a {}! was: {}"
.format(TypeOnlyConstraint.__name__, constraint))

description = '{}({})'.format(type(self).__name__, constraint)

self._constraint = constraint

super().__init__(description=description)

def _is_iterable(self, obj):
return (self.iterable_constraint.satisfied_by(obj)
and not self.exclude_iterable_constraint.satisfied_by(obj))

# TODO: consider making this a private method of TypeConstraint, as it now duplicates the logic in
# self.validate_satisfied_by()!
def satisfied_by(self, obj):
return (self._is_iterable(obj)
and all(self._constraint.satisfied_by(el) for el in obj))

def make_collection_type_constraint_error(self, base_obj, el):
base_error = self.make_type_constraint_error(el, self._constraint)
return TypeConstraintError("in wrapped constraint {} matching iterable object {}: {}"
.format(self, base_obj, base_error))

def validate_satisfied_by(self, obj):
if not self._is_iterable(obj):
base_iterable_error = self.make_type_constraint_error(obj, self.iterable_constraint)
raise TypeConstraintError(
"in wrapped constraint {}: {}\nNote that objects matching {} are not considered iterable."
.format(self, base_iterable_error, self.exclude_iterable_constraint))
for el in obj:
if not self._constraint.satisfied_by(el):
raise self.make_collection_type_constraint_error(obj, el)
return obj

def __hash__(self):
return hash((type(self), self._constraint))

def __eq__(self, other):
return type(self) == type(other) and self._constraint == other._constraint

def __repr__(self):
return ('{type_constraint_type}({constraint!r})'
.format(type_constraint_type=type(self).__name__,
constraint=self._constraint))


# TODO(#6742): Useful type constraints for datatype fields before we start using mypy type hints!
hashable_collection_constraint = Exactly(tuple)


class HashableTypedCollection(TypedCollection):
iterable_constraint = hashable_collection_constraint


string_type = Exactly(str)
string_list = TypedCollection(string_type)
string_optional = Exactly(str, type(None))


hashable_string_list = HashableTypedCollection(string_type)
1 change: 0 additions & 1 deletion tests/python/pants_test/help/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ python_tests(
dependencies=[
'src/python/pants/build_graph',
'src/python/pants/help',
'src/python/pants/util:objects',
]
)

Expand Down
Loading

0 comments on commit 08e76b3

Please sign in to comment.