Remove datatype() and TypedCollection (#8576)

Resolves #7074. Now that we always use `@dataclass` instead of `datatype`, and replaced `Collection.of(T)` with `Collection[T]`, it is safe to remove `datatype()`. We also remove `TypedCollection` as it is no longer in use. Instead, we use explicit type hints like `Tuple[str, ...]` or `Collection[T]`. We avoid a deprecation cycle because V2 engine code has always been experimental. ### Keeps `util/objects.py` We still keep `TypeConstraintError`, `Exactly`, `SuperclassesOf`, and `SubclassesOf`. These are used widely in the codebase and have no standard library equivalent. They also do not seem to cause problems for MyPy.
pantsbuild · Nov 5, 2019 · 08e76b3 · 08e76b3
1 parent 8c9ba48
commit 08e76b3
Show file tree

Hide file tree

Showing 7 changed files with 2 additions and 860 deletions.
diff --git a/src/python/pants/base/BUILD b/src/python/pants/base/BUILD
@@ -104,7 +104,6 @@ python_library(
   sources = ['hash_utils.py'],
   dependencies = [
     '3rdparty/python:typing-extensions',
-    'src/python/pants/util:objects',
     'src/python/pants/util:strutil',
   ],
 )

diff --git a/src/python/pants/base/hash_utils.py b/src/python/pants/base/hash_utils.py
@@ -14,7 +14,6 @@
 from twitter.common.collections import OrderedSet
 from typing_extensions import Protocol
 
-from pants.util.objects import DatatypeMixin
 from pants.util.strutil import ensure_binary
 
 
@@ -136,13 +135,6 @@ def default(self, o):
                         .format(cls=type(self).__name__, val=o))
       # Set order is arbitrary in python 3.6 and 3.7, so we need to keep this sorted() call.
       return sorted(self.default(i) for i in o)
-    if isinstance(o, DatatypeMixin):
-      # datatype objects will intentionally raise in the __iter__ method, but the Iterable abstract
-      # base class will match any class with any superclass that has the attribute __iter__ in the
-      # __dict__ (see https://docs.python.org/2/library/abc.html), so we need to check for it
-      # specially here.
-      # TODO: determine if the __repr__ should be some abstractmethod on DatatypeMixin!
-      return self.default(repr(o))
     if isinstance(o, Iterable) and not isinstance(o, (bytes, list, str)):
       return list(self.default(i) for i in o)
     logger.debug("Our custom json encoder {} is trying to hash a primitive type, but has gone through"

diff --git a/src/python/pants/util/BUILD b/src/python/pants/util/BUILD
@@ -96,12 +96,7 @@ python_library(
 python_library(
   name = 'objects',
   sources = ['objects.py'],
-  dependencies = [
-    '3rdparty/python/twitter/commons:twitter.common.collections',
-    ':strutil',
-    ':memo',
-    ':meta',
-  ],
+  tags = {'partially_type_checked'},
 )
 
 python_library(

diff --git a/src/python/pants/util/objects.py b/src/python/pants/util/objects.py
@@ -2,233 +2,6 @@
 # Licensed under the Apache License, Version 2.0 (see LICENSE).
 
 from abc import ABC, abstractmethod
-from collections import OrderedDict, namedtuple
-from collections.abc import Iterable
-
-from pants.util.memo import memoized_classproperty
-from pants.util.meta import classproperty
-from pants.util.strutil import pluralize
-
-
-class TypeCheckError(TypeError):
-
-  # TODO: make some wrapper exception class to make this kind of
-  # prefixing easy (maybe using a class field format string?).
-  def __init__(self, type_name, msg, *args, **kwargs):
-    formatted_msg = f"type check error in class {type_name}: {msg}"
-    super().__init__(formatted_msg, *args, **kwargs)
-
-
-# TODO: remove the `.type_check_error_type` property in `DatatypeMixin` and just have mixers
-# override a class object!
-class TypedDatatypeInstanceConstructionError(TypeCheckError):
-  """Raised when a datatype()'s fields fail a type check upon construction."""
-
-
-class DatatypeMixin(ABC):
-  """Decouple datatype logic from the way it's created to ease migration to python 3 dataclasses."""
-
-  @classproperty
-  @abstractmethod
-  def type_check_error_type(cls):
-    """The exception type to use in make_type_error()."""
-
-  @classmethod
-  def make_type_error(cls, msg, *args, **kwargs):
-    """A helper method to generate an exception type for type checking errors.
-
-    This method uses `cls.type_check_error_type` to ensure that type checking errors can be caught
-    with a reliable exception type. The type returned by `cls.type_check_error_type` should ensure
-    that the exception messages are prefixed with enough context to be useful and *not* confusing.
-    """
-    return cls.type_check_error_type(cls.__name__, msg, *args, **kwargs)
-
-  @abstractmethod
-  def copy(self, **kwargs):
-    """Return a new object of the same type, replacing specified fields with new values"""
-
-
-# TODO(#7074): Migrate to python 3 dataclasses!
-def datatype(field_decls, superclass_name=None, **kwargs):
-  """A wrapper for `namedtuple` that accounts for the type of the object in equality.
-
-  Field declarations can be a string, which declares a field with that name and
-  no type checking. Field declarations can also be a tuple `('field_name',
-  field_type)`, which declares a field named `field_name` which is type-checked
-  at construction. If a type is given, the value provided to the constructor for
-  that field must be exactly that type (i.e. `type(x) == field_type`), and not
-  e.g. a subclass.
-
-  :param field_decls: Iterable of field declarations.
-  :return: A type object which can then be subclassed.
-  :raises: :class:`TypeError`
-  """
-  field_names = []
-  fields_with_constraints = OrderedDict()
-  for maybe_decl in field_decls:
-    # ('field_name', type)
-    if isinstance(maybe_decl, tuple):
-      field_name, type_spec = maybe_decl
-      if isinstance(type_spec, type):
-        type_constraint = Exactly(type_spec)
-      elif isinstance(type_spec, TypeConstraint):
-        type_constraint = type_spec
-      else:
-        raise TypeError(
-          "type spec for field '{}' was not a type or TypeConstraint: was {!r} (type {!r})."
-          .format(field_name, type_spec, type(type_spec).__name__))
-      fields_with_constraints[field_name] = type_constraint
-    else:
-      # interpret it as a field name without a type to check
-      field_name = maybe_decl
-    # namedtuple() already checks field uniqueness
-    field_names.append(field_name)
-
-  if not superclass_name:
-    superclass_name = '_anonymous_namedtuple_subclass'
-
-  namedtuple_cls = namedtuple(superclass_name, field_names, **kwargs)
-
-  class DataType(namedtuple_cls, DatatypeMixin):
-    type_check_error_type = TypedDatatypeInstanceConstructionError
-
-    def __new__(cls, *args, **kwargs):
-      # TODO: Ideally we could execute this exactly once per `cls` but it should be a
-      # relatively cheap check.
-      if not hasattr(cls.__eq__, '_eq_override_canary'):
-        raise cls.make_type_error('Should not override __eq__.')
-
-      try:
-        this_object = super().__new__(cls, *args, **kwargs)
-      except TypeError as e:
-        raise cls.make_type_error(
-          f"error in namedtuple() base constructor: {e}")
-
-      # TODO: Make this kind of exception pattern (filter for errors then display them all at once)
-      # more ergonomic.
-      type_failure_msgs = []
-      for field_name, field_constraint in fields_with_constraints.items():
-        # TODO: figure out how to disallow users from accessing datatype fields by index!
-        # TODO: gettattr() with a specific `field_name` against a `namedtuple` is apparently
-        # converted into a __getitem__() call with the argument being the integer index of the field
-        # with that name -- this indirection is not shown in the stack trace when overriding
-        # __getitem__() to raise on `int` inputs. See https://stackoverflow.com/a/6738724 for the
-        # greater context of how `namedtuple` differs from other "normal" python classes.
-        field_value = getattr(this_object, field_name)
-        try:
-          field_constraint.validate_satisfied_by(field_value)
-        except TypeConstraintError as e:
-          type_failure_msgs.append(
-            f"field '{field_name}' was invalid: {e}")
-      if type_failure_msgs:
-        raise cls.make_type_error(
-          '{} type checking constructor arguments:\n{}'
-          .format(pluralize(len(type_failure_msgs), 'error'),
-                  '\n'.join(type_failure_msgs)))
-
-      return this_object
-
-    def __eq__(self, other):
-      if self is other:
-        return True
-
-      # Compare types and fields.
-      if type(self) != type(other):
-        return False
-      # Explicitly return super.__eq__'s value in case super returns NotImplemented
-      return super().__eq__(other)
-    # We define an attribute on the `cls` level definition of `__eq__` that will allow us to detect
-    # that it has been overridden.
-    __eq__._eq_override_canary = None
-
-    def __ne__(self, other):
-      return not (self == other)
-
-    # NB: in Python 3, whenever __eq__ is overridden, __hash__() must also be
-    # explicitly implemented, otherwise Python will raise "unhashable type". See
-    # https://docs.python.org/3/reference/datamodel.html#object.__hash__.
-    def __hash__(self):
-      try:
-        return super().__hash__()
-      except TypeError:
-        # If any fields are unhashable, we want to be able to specify which ones in the error
-        # message, but we don't want to slow down the normal __hash__ code path, so we take the time
-        # to break it down by field if we know the __hash__ fails for some reason.
-        for field_name, value in self._asdict().items():
-          try:
-            hash(value)
-          except TypeError as e:
-            raise TypeError("For datatype object {} (type '{}'): in field '{}': {}"
-                            .format(self, type(self).__name__, field_name, e))
-        # If the error doesn't seem to be with hashing any of the fields, just re-raise the
-        # original error.
-        raise
-
-    # NB: As datatype is not iterable, we need to override both __iter__ and all of the
-    # namedtuple methods that expect self to be iterable.
-    def __iter__(self):
-      raise self.make_type_error("datatype object is not iterable")
-
-    def _super_iter(self):
-      return super().__iter__()
-
-    def _asdict(self):
-      """Return a new OrderedDict which maps field names to their values.
-
-      Overrides a namedtuple() method which calls __iter__.
-      """
-      return OrderedDict(zip(self._fields, self._super_iter()))
-
-    def _replace(self, **kwargs):
-      """Return a new datatype object replacing specified fields with new values.
-
-      Overrides a namedtuple() method which calls __iter__.
-      """
-      field_dict = self._asdict()
-      field_dict.update(**kwargs)
-      return type(self)(**field_dict)
-
-    def copy(self, **kwargs):
-      return self._replace(**kwargs)
-
-    # NB: it is *not* recommended to rely on the ordering of the tuple returned by this method.
-    def __getnewargs__(self):
-      """Return self as a plain tuple.  Used by copy and pickle."""
-      return tuple(self._super_iter())
-
-    def __repr__(self):
-      args_formatted = []
-      for field_name in field_names:
-        field_value = getattr(self, field_name)
-        args_formatted.append(f"{field_name}={field_value!r}")
-      return '{class_name}({args_joined})'.format(
-        class_name=type(self).__name__,
-        args_joined=', '.join(args_formatted))
-
-    def __str__(self):
-      elements_formatted = []
-      for field_name in field_names:
-        constraint_for_field = fields_with_constraints.get(field_name, None)
-        field_value = getattr(self, field_name)
-        if not constraint_for_field:
-          elements_formatted.append(
-            # TODO: consider using the repr of arguments in this method.
-            "{field_name}={field_value}"
-            .format(field_name=field_name,
-                    field_value=field_value))
-        else:
-          elements_formatted.append(
-            "{field_name}<{type_constraint}>={field_value}"
-            .format(field_name=field_name,
-                    type_constraint=constraint_for_field,
-                    field_value=field_value))
-      return '{class_name}({typed_tagged_elements})'.format(
-        class_name=type(self).__name__,
-        typed_tagged_elements=', '.join(elements_formatted))
-
-  # Return a new type with the given name, inheriting from the DataType class
-  # just defined, with an empty class body.
-  return type(superclass_name, (DataType,), {})
 
 
 # TODO: make this error into an attribute on the `TypeConstraint` class object!
@@ -373,98 +146,3 @@ class SubclassesOf(TypeOnlyConstraint):
 
   def satisfied_by_type(self, obj_type):
     return issubclass(obj_type, self._types)
-
-
-class TypedCollection(TypeConstraint):
-  """A `TypeConstraint` which accepts a TypeOnlyConstraint and validates a collection."""
-
-  @memoized_classproperty
-  def iterable_constraint(cls):
-    """Define what kind of collection inputs are accepted by this type constraint.
-
-    :rtype: TypeConstraint
-    """
-    return SubclassesOf(Iterable)
-
-  # TODO: extend TypeConstraint to specify includes and excludes in a single constraint!
-  @classproperty
-  def exclude_iterable_constraint(cls):
-    """Define what collection inputs are *not* accepted by this type constraint.
-
-    Strings (unicode and byte) in Python are considered iterables of substrings, but we only want
-    to allow explicit collection types.
-
-    :rtype: TypeConstraint
-    """
-    return SubclassesOf(str, bytes)
-
-  def __init__(self, constraint):
-    """Create a `TypeConstraint` which validates each member of a collection with `constraint`.
-
-    :param TypeOnlyConstraint constraint: the `TypeConstraint` to apply to each element. This is
-                                          currently required to be a `TypeOnlyConstraint` to avoid
-                                          complex prototypal type relationships.
-    """
-
-    if not isinstance(constraint, TypeOnlyConstraint):
-      raise TypeError("constraint for collection must be a {}! was: {}"
-                      .format(TypeOnlyConstraint.__name__, constraint))
-
-    description = '{}({})'.format(type(self).__name__, constraint)
-
-    self._constraint = constraint
-
-    super().__init__(description=description)
-
-  def _is_iterable(self, obj):
-    return (self.iterable_constraint.satisfied_by(obj)
-            and not self.exclude_iterable_constraint.satisfied_by(obj))
-
-  # TODO: consider making this a private method of TypeConstraint, as it now duplicates the logic in
-  # self.validate_satisfied_by()!
-  def satisfied_by(self, obj):
-    return (self._is_iterable(obj)
-            and all(self._constraint.satisfied_by(el) for el in obj))
-
-  def make_collection_type_constraint_error(self, base_obj, el):
-    base_error = self.make_type_constraint_error(el, self._constraint)
-    return TypeConstraintError("in wrapped constraint {} matching iterable object {}: {}"
-                               .format(self, base_obj, base_error))
-
-  def validate_satisfied_by(self, obj):
-    if not self._is_iterable(obj):
-      base_iterable_error = self.make_type_constraint_error(obj, self.iterable_constraint)
-      raise TypeConstraintError(
-        "in wrapped constraint {}: {}\nNote that objects matching {} are not considered iterable."
-        .format(self, base_iterable_error, self.exclude_iterable_constraint))
-    for el in obj:
-      if not self._constraint.satisfied_by(el):
-        raise self.make_collection_type_constraint_error(obj, el)
-    return obj
-
-  def __hash__(self):
-    return hash((type(self), self._constraint))
-
-  def __eq__(self, other):
-    return type(self) == type(other) and self._constraint == other._constraint
-
-  def __repr__(self):
-    return ('{type_constraint_type}({constraint!r})'
-            .format(type_constraint_type=type(self).__name__,
-                    constraint=self._constraint))
-
-
-# TODO(#6742): Useful type constraints for datatype fields before we start using mypy type hints!
-hashable_collection_constraint = Exactly(tuple)
-
-
-class HashableTypedCollection(TypedCollection):
-  iterable_constraint = hashable_collection_constraint
-
-
-string_type = Exactly(str)
-string_list = TypedCollection(string_type)
-string_optional = Exactly(str, type(None))
-
-
-hashable_string_list = HashableTypedCollection(string_type)
diff --git a/tests/python/pants_test/help/BUILD b/tests/python/pants_test/help/BUILD
@@ -8,7 +8,6 @@ python_tests(
   dependencies=[
     'src/python/pants/build_graph',
     'src/python/pants/help',
-    'src/python/pants/util:objects',
   ]
 )