From 749ceda98febd2d5034a5aa1c01c77650827f257 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 8 Nov 2017 20:52:37 -0800 Subject: [PATCH 1/8] Make Indexer classes not inherit from tuple. I'm not entirely sure this is a good idea. The advantage is that it ensures that all our indexing code is entirely explicit: everything that reaches a backend *must* be an ExplicitIndexer. The downside is that it removes a bit of internal flexibility: we can't just use tuples in place of basic indexers anymore. On the whole, I think this is probably worth it but I would appreciate feedback. --- xarray/backends/common.py | 10 +- xarray/backends/h5netcdf_.py | 7 +- xarray/backends/netCDF4_.py | 17 +--- xarray/backends/pydap_.py | 20 ++-- xarray/backends/pynio_.py | 18 ++-- xarray/backends/rasterio_.py | 14 ++- xarray/backends/scipy_.py | 9 +- xarray/conventions.py | 24 ++--- xarray/core/formatting.py | 2 +- xarray/core/indexing.py | 166 +++++++++++++++++++++++----------- xarray/core/utils.py | 10 +- xarray/core/variable.py | 5 +- xarray/tests/__init__.py | 4 +- xarray/tests/test_backends.py | 2 +- xarray/tests/test_variable.py | 2 +- 15 files changed, 173 insertions(+), 137 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 1c63c368260..d33bffb1c1e 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -10,7 +10,8 @@ from distutils.version import LooseVersion from ..conventions import cf_encoder -from ..core.utils import FrozenOrderedDict +from ..core import indexing +from ..core.utils import FrozenOrderedDict, NdimSizeLenMixin from ..core.pycompat import iteritems, dask_array_type try: @@ -76,6 +77,13 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, time.sleep(1e-3 * next_delay) +class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): + + def __array__(self, dtype=None): + key = indexing.BasicIndexer((slice(None),) * self.ndim) + return np.asarray(self[key], dtype=dtype) + + class AbstractDataStore(Mapping): _autoclose = False diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index edd5b34d6d3..b4d2dc7e689 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -15,11 +15,8 @@ class H5NetCDFArrayWrapper(BaseNetCDF4Array): def __getitem__(self, key): - if isinstance(key, indexing.VectorizedIndexer): - raise NotImplementedError( - 'Vectorized indexing for {} is not implemented. Load your ' - 'data first with .load() or .compute().'.format(type(self))) - key = indexing.to_tuple(key) + key = indexing.unwrap_explicit_indexer( + key, self, allow=(indexing.BasicIndexer, indexing.OuterIndexer)) with self.datastore.ensure_open(autoclose=True): return self.get_array()[key] diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 87bea8fe07c..e70810ef51d 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -10,12 +10,10 @@ from .. import Variable from ..conventions import pop_to from ..core import indexing -from ..core.utils import (FrozenOrderedDict, NdimSizeLenMixin, - DunderArrayMixin, close_on_error, - is_remote_uri) +from ..core.utils import (FrozenOrderedDict, close_on_error, is_remote_uri) from ..core.pycompat import iteritems, basestring, OrderedDict, PY3, suppress -from .common import (WritableCFDataStore, robust_getitem, +from .common import (WritableCFDataStore, robust_getitem, BackendArray, DataStorePickleMixin, find_root) from .netcdf3 import (encode_nc3_attr_value, encode_nc3_variable) @@ -27,8 +25,7 @@ '|': 'native'} -class BaseNetCDF4Array(NdimSizeLenMixin, DunderArrayMixin, - indexing.NDArrayIndexable): +class BaseNetCDF4Array(BackendArray): def __init__(self, variable_name, datastore): self.datastore = datastore self.variable_name = variable_name @@ -51,12 +48,8 @@ def get_array(self): class NetCDF4ArrayWrapper(BaseNetCDF4Array): def __getitem__(self, key): - if isinstance(key, indexing.VectorizedIndexer): - raise NotImplementedError( - 'Vectorized indexing for {} is not implemented. Load your ' - 'data first with .load() or .compute().'.format(type(self))) - - key = indexing.to_tuple(key) + key = indexing.unwrap_explicit_indexer( + key, self, allow=(indexing.BasicIndexer, indexing.OuterIndexer)) if self.datastore.is_remote: # pragma: no cover getitem = functools.partial(robust_getitem, catch=RuntimeError) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 2ab7873cbf3..044dcc21e5f 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -4,14 +4,14 @@ import numpy as np from .. import Variable -from ..core.utils import FrozenOrderedDict, Frozen, NDArrayMixin +from ..core.utils import FrozenOrderedDict, Frozen from ..core import indexing from ..core.pycompat import integer_types -from .common import AbstractDataStore, robust_getitem +from .common import AbstractDataStore, BackendArray, robust_getitem -class PydapArrayWrapper(NDArrayMixin, indexing.NDArrayIndexable): +class PydapArrayWrapper(BackendArray): def __init__(self, array): self.array = array @@ -27,17 +27,9 @@ def dtype(self): return np.dtype(t.typecode + str(t.size)) def __getitem__(self, key): - if isinstance(key, indexing.VectorizedIndexer): - raise NotImplementedError( - 'Vectorized indexing for {} is not implemented. Load your ' - 'data first with .load() or .compute().'.format(type(self))) - key = indexing.to_tuple(key) - if not isinstance(key, tuple): - key = (key,) - for k in key: - if not (isinstance(k, integer_types + (slice,)) or k is Ellipsis): - raise IndexError('pydap only supports indexing with int, ' - 'slice and Ellipsis objects') + key = indexing.unwrap_explicit_indexer( + key, target=self, allow=indexing.BasicIndexer) + # pull the data from the array attribute if possible, to avoid # downloading coordinate data twice array = getattr(self.array, 'array', self.array) diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index 8b9e49745f4..fedd74d1148 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -7,16 +7,14 @@ import numpy as np from .. import Variable -from ..core.utils import (FrozenOrderedDict, Frozen, - NdimSizeLenMixin, DunderArrayMixin) +from ..core.utils import (FrozenOrderedDict, Frozen) from ..core import indexing from ..core.pycompat import integer_types -from .common import AbstractDataStore, DataStorePickleMixin +from .common import AbstractDataStore, DataStorePickleMixin, BackendArray -class NioArrayWrapper(NdimSizeLenMixin, DunderArrayMixin, - indexing.NDArrayIndexable): +class NioArrayWrapper(BackendArray): def __init__(self, variable_name, datastore): self.datastore = datastore @@ -30,13 +28,9 @@ def get_array(self): return self.datastore.ds.variables[self.variable_name] def __getitem__(self, key): - if isinstance(key, (indexing.VectorizedIndexer, - indexing.OuterIndexer)): - raise NotImplementedError( - 'Nio backend does not support vectorized / outer indexing. ' - 'Load your data first with .load() or .compute(). ' - 'Given {}'.format(key)) - key = indexing.to_tuple(key) + key = indexing.unwrap_explicit_indexer( + key, target=self, allow=indexing.BasicIndexer) + with self.datastore.ensure_open(autoclose=True): array = self.get_array() if key == () and self.ndim == 0: diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index b39299f3541..13f9c0a3de6 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -3,8 +3,9 @@ import numpy as np from .. import DataArray -from ..core.utils import DunderArrayMixin, NdimSizeLenMixin, is_scalar +from ..core.utils import is_scalar from ..core import indexing +from .common import BackendArray try: from dask.utils import SerializableLock as Lock except ImportError: @@ -17,8 +18,7 @@ 'first.') -class RasterioArrayWrapper(NdimSizeLenMixin, DunderArrayMixin, - indexing.NDArrayIndexable): +class RasterioArrayWrapper(BackendArray): """A wrapper around rasterio dataset objects""" def __init__(self, rasterio_ds): self.rasterio_ds = rasterio_ds @@ -38,11 +38,9 @@ def shape(self): return self._shape def __getitem__(self, key): - if isinstance(key, indexing.VectorizedIndexer): - raise NotImplementedError( - 'Vectorized indexing for {} is not implemented. Load your ' - 'data first with .load() or .compute().'.format(type(self))) - key = indexing.to_tuple(key) + key = indexing.unwrap_explicit_indexer( + key, self, allow=(indexing.BasicIndexer, indexing.OuterIndexer)) + # bands cannot be windowed but they can be listed band_key = key[0] n_bands = self.shape[0] diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 346caf76579..240b8f2ebaa 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -9,11 +9,10 @@ from .. import Variable from ..core.pycompat import iteritems, OrderedDict, basestring -from ..core.utils import (Frozen, FrozenOrderedDict, NdimSizeLenMixin, - DunderArrayMixin) -from ..core.indexing import NumpyIndexingAdapter, NDArrayIndexable +from ..core.utils import (Frozen, FrozenOrderedDict) +from ..core.indexing import NumpyIndexingAdapter -from .common import WritableCFDataStore, DataStorePickleMixin +from .common import WritableCFDataStore, DataStorePickleMixin, BackendArray from .netcdf3 import (is_valid_nc3_name, encode_nc3_attr_value, encode_nc3_variable) @@ -31,7 +30,7 @@ def _decode_attrs(d): for (k, v) in iteritems(d)) -class ScipyArrayWrapper(NdimSizeLenMixin, DunderArrayMixin, NDArrayIndexable): +class ScipyArrayWrapper(BackendArray): def __init__(self, variable_name, datastore): self.datastore = datastore diff --git a/xarray/conventions.py b/xarray/conventions.py index 94c963da765..6836775eb83 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -333,7 +333,7 @@ def encode_cf_timedelta(timedeltas, units=None): return (num, units) -class MaskedAndScaledArray(utils.NDArrayMixin, indexing.NDArrayIndexable): +class MaskedAndScaledArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically scaled and masked according to CF conventions for packed and missing data values. @@ -391,7 +391,7 @@ def __repr__(self): self.scale_factor, self.add_offset, self._dtype)) -class DecodedCFDatetimeArray(utils.NDArrayMixin, indexing.NDArrayIndexable): +class DecodedCFDatetimeArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically converted into datetime objects using decode_cf_datetime. @@ -430,7 +430,7 @@ def __getitem__(self, key): calendar=self.calendar) -class DecodedCFTimedeltaArray(utils.NDArrayMixin, indexing.NDArrayIndexable): +class DecodedCFTimedeltaArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically converted into timedelta objects using decode_cf_timedelta. @@ -447,7 +447,7 @@ def __getitem__(self, key): return decode_cf_timedelta(self.array[key], units=self.units) -class StackedBytesArray(utils.NDArrayMixin, indexing.NDArrayIndexable): +class StackedBytesArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. @@ -478,7 +478,7 @@ def shape(self): def __str__(self): # TODO(shoyer): figure out why we need this special case? if self.ndim == 0: - return str(self[...].item()) + return str(np.array(self).item()) else: return repr(self) @@ -487,13 +487,13 @@ def __repr__(self): def __getitem__(self, key): # require slicing the last dimension completely - key = indexing.expanded_indexer(key, self.array.ndim) - if key[-1] != slice(None): + key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) + if key.tuple[-1] != slice(None): raise IndexError('too many indices') return char_to_bytes(self.array[key]) -class BytesToStringArray(utils.NDArrayMixin, indexing.NDArrayIndexable): +class BytesToStringArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): """Wrapper that decodes bytes to unicode when values are read. >>> BytesToStringArray(np.array([b'abc']))[:] @@ -520,7 +520,7 @@ def dtype(self): def __str__(self): # TODO(shoyer): figure out why we need this special case? if self.ndim == 0: - return str(self[...].item()) + return str(np.array(self).item()) else: return repr(self) @@ -532,7 +532,7 @@ def __getitem__(self, key): return decode_bytes_array(self.array[key], self.encoding) -class NativeEndiannessArray(utils.NDArrayMixin, indexing.NDArrayIndexable): +class NativeEndiannessArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): """Decode arrays on the fly from non-native to native endianness This is useful for decoding arrays from netCDF3 files (which are all @@ -561,7 +561,7 @@ def __getitem__(self, key): return np.asarray(self.array[key], dtype=self.dtype) -class BoolTypeArray(utils.NDArrayMixin, indexing.NDArrayIndexable): +class BoolTypeArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): """Decode arrays on the fly from integer to boolean datatype This is useful for decoding boolean arrays from integer typed netCDF @@ -589,7 +589,7 @@ def __getitem__(self, key): return np.asarray(self.array[key], dtype=self.dtype) -class UnsignedIntTypeArray(utils.NDArrayMixin, indexing.NDArrayIndexable): +class UnsignedIntTypeArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): """Decode arrays on the fly from signed integer to unsigned integer. Typically used when _Unsigned is set at as a netCDF attribute on a signed integer variable. diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index b9e381149b7..10b77495e13 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -99,7 +99,7 @@ def last_item(x): # work around for https://github.com/numpy/numpy/issues/5195 return [] - indexer = (slice(-1, None),) * x.ndim + indexer = BasicIndexer((slice(-1, None),) * x.ndim) return np.ravel(x[indexer]).tolist() diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 183f4e5eaa0..ac4265ea7fd 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -277,38 +277,91 @@ def _index_indexer_1d(old_indexer, applied_indexer, size): return indexer -class IndexerTuple(tuple): - """ Base class for xarray indexing tuples """ +class ExplicitIndexer(object): + """Base class for explicit indexer objects. + + ExplicitIndexer objects wrap a tuple of values given by their ``tuple`` + property. These tuples should always have length equal to the number of + dimensions on the indexed array. + + Do not instantiate BaseIndexer objects directly: instead, use one of the + sub-classes BasicIndexer, OuterIndexer or VectorizedIndexer. + """ + def __init__(self, key): + if type(self) is ExplicitIndexer: + raise TypeError('cannot instantiate base ExplicitIndexer objects') + self._key = tuple(key) + + @property + def tuple(self): + return self._key def __repr__(self): - return type(self).__name__ + super(IndexerTuple, self).__repr__() + return '{}({})'.format(type(self).__name__, self.tuple) -def to_tuple(key): - """ Converts our indexer tuple to a native python tuple """ - return tuple(key) if isinstance(key, IndexerTuple) else key +class BasicIndexer(ExplicitIndexer): + """Tuple for basic indexing. + All elements should be int or slice objects. Indexing follows NumPy's + rules for basic indexing: each axis is independently sliced and axes + indexed with an integer are dropped from the result. + """ -class BasicIndexer(IndexerTuple): - """ Tuple for basic indexing. """ +class OuterIndexer(ExplicitIndexer): + """Tuple for outer/orthogonal indexing. -class OuterIndexer(IndexerTuple): - """ Tuple for outer/orthogonal indexing. - All the items are one of integer, slice, and 1d-np.ndarray. + All elements should be int, slice or 1-dimensional np.ndarray objects with + an integer dtype. Indexing is applied independently along each axis, and + axes indexed with an integer are dropped from the result. This type of + indexing works like MATLAB/Fortran. """ -class VectorizedIndexer(IndexerTuple): - """ Tuple for vectorized indexing """ +class VectorizedIndexer(ExplicitIndexer): + """Tuple for vectorized indexing. + + All elements should be slice or N-dimensional np.ndarray objects with an + integer dtype. Indexing follows proposed rules for np.ndarray.vindex, which + matches NumPy's advanced indexing rules (including broadcasting) except + sliced axes are always moved to the end: + https://github.com/numpy/numpy/pull/6256 + """ + + +class ExplicitlyIndexed(object): + """Mixin to mark support for Indexer subclasses in indexing.""" + + +def unwrap_explicit_indexer(key, target, allow): + """Unwrap an explicit key into a tuple.""" + if not isinstance(key, allow): + key_type_name = { + BasicIndexer: 'Basic', + OuterIndexer: 'Outer', + VectorizedIndexer: 'Vectorized' + }[type(key)] + raise NotImplementedError( + '{} indexing for {} is not implemented. Load your data first with ' + '.load(), .compute() or .persist(), or disable caching by setting ' + 'cache=False in open_dataset.'.format(key_type_name, type(target))) + if not isinstance(key, ExplicitIndexer): + raise TypeError('unexpected key type: {}'.format(key)) + return key.tuple + +class BasicToExplicitArray(utils.NDArrayMixin, ExplicitlyIndexed): -class NDArrayIndexable(object): - """ Mixin to mark support for IndexerTuple subclasses in indexing.""" + def __init__(self, array): + self.array = array + + def __getitem__(self, key): + return self.array[BasicIndexer(key)] -class LazilyIndexedArray(utils.NDArrayMixin, NDArrayIndexable): - """Wrap an array that handles orthogonal indexing to make indexing lazy +class LazilyIndexedArray(utils.NDArrayMixin, ExplicitlyIndexed): + """Wrap an array to make basic and orthogonal indexing lazy. """ def __init__(self, array, key=None): """ @@ -316,37 +369,36 @@ def __init__(self, array, key=None): ---------- array : array_like Array like object to index. - key : tuple, optional + key : ExplicitIndexer, optional Array indexer. If provided, it is assumed to already be in canonical expanded form. """ - # We need to avoid doubly wrapping. - if isinstance(array, type(self)): - self.array = as_indexable(array.array) - self.key = array.key - if key is not None: - self.key = self._updated_key(key) + if isinstance(array, type(self)) and key is None: + # unwrap + key = array.key + array = array.array - else: - if key is None: - key = (slice(None),) * array.ndim - key = BasicIndexer(key) - self.array = as_indexable(array) - self.key = key + if key is None: + key = BasicIndexer((slice(None),) * array.ndim) + + self.array = as_indexable(array) + self.key = key def _updated_key(self, new_key): # TODO should suport VectorizedIndexer if isinstance(new_key, VectorizedIndexer): raise NotImplementedError( 'Vectorized indexing for {} is not implemented. Load your ' - 'data first with .load() or .compute().'.format(type(self))) - new_key = iter(expanded_indexer(new_key, self.ndim)) + 'data first with .load() or .compute(), or disable caching by ' + 'setting cache=False in open_dataset.'.format(type(self))) + + iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) key = [] - for size, k in zip(self.array.shape, self.key): + for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, integer_types): key.append(k) else: - key.append(_index_indexer_1d(k, next(new_key), size)) + key.append(_index_indexer_1d(k, next(iter_new_key), size)) if all(isinstance(k, integer_types + (slice, )) for k in key): return BasicIndexer(key) return OuterIndexer(key) @@ -354,7 +406,7 @@ def _updated_key(self, new_key): @property def shape(self): shape = [] - for size, k in zip(self.array.shape, self.key): + for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, slice): shape.append(len(range(*k.indices(size)))) elif isinstance(k, np.ndarray): @@ -365,8 +417,8 @@ def __array__(self, dtype=None): array = as_indexable(self.array) return np.asarray(array[self.key], dtype=None) - def __getitem__(self, key): - return type(self)(self.array, self._updated_key(key)) + def __getitem__(self, indexer): + return type(self)(self.array, self._updated_key(indexer)) def __setitem__(self, key, value): key = self._updated_key(key) @@ -385,7 +437,7 @@ def _wrap_numpy_scalars(array): return array -class CopyOnWriteArray(utils.NDArrayMixin, NDArrayIndexable): +class CopyOnWriteArray(utils.NDArrayMixin, ExplicitlyIndexed): def __init__(self, array): self.array = as_indexable(array) self._copied = False @@ -406,7 +458,7 @@ def __setitem__(self, key, value): self.array[key] = value -class MemoryCachedArray(utils.NDArrayMixin, NDArrayIndexable): +class MemoryCachedArray(utils.NDArrayMixin, ExplicitlyIndexed): def __init__(self, array): self.array = _wrap_numpy_scalars(as_indexable(array)) @@ -427,11 +479,11 @@ def __setitem__(self, key, value): def as_indexable(array): """ - This function always returns a NDArrayIndexable subclass, + This function always returns a ExplicitlyIndexed subclass, so that the vectorized indexing is always possible with the returned object. """ - if isinstance(array, NDArrayIndexable): + if isinstance(array, ExplicitlyIndexed): return array if isinstance(array, np.ndarray): return NumpyIndexingAdapter(array) @@ -442,12 +494,12 @@ def as_indexable(array): raise TypeError('Invalid array type: {}'.format(type(array))) -def _outer_to_numpy_indexer(key, shape): +def _outer_to_numpy_indexer(indexer, shape): """Convert an OuterIndexer into an indexer for NumPy. Parameters ---------- - key : OuterIndexer + indexer : OuterIndexer Outer indexing tuple to convert. shape : tuple Shape of the array subject to the indexing. @@ -457,6 +509,8 @@ def _outer_to_numpy_indexer(key, shape): tuple Base tuple suitable for use to index a NumPy array. """ + key = indexer.tuple + if len([k for k in key if not isinstance(k, slice)]) <= 1: # If there is only one vector and all others are slice, # it can be safely used in mixed basic/advanced indexing. @@ -480,7 +534,7 @@ def _outer_to_numpy_indexer(key, shape): return tuple(new_key) -class NumpyIndexingAdapter(utils.NDArrayMixin, NDArrayIndexable): +class NumpyIndexingAdapter(utils.NDArrayMixin, ExplicitlyIndexed): """Wrap a NumPy array to use broadcasted indexing """ def __init__(self, array): @@ -501,14 +555,16 @@ def _ensure_ndarray(self, value): def _indexing_array_and_key(self, key): if isinstance(key, OuterIndexer): + array = self.array key = _outer_to_numpy_indexer(key, self.array.shape) - - if isinstance(key, VectorizedIndexer): + elif isinstance(key, VectorizedIndexer): array = nputils.NumpyVIndexAdapter(self.array) + key = key.tuple else: array = self.array + key = key.tuple - return array, to_tuple(key) + return array, key def __getitem__(self, key): array, key = self._indexing_array_and_key(key) @@ -519,7 +575,7 @@ def __setitem__(self, key, value): array[key] = value -class DaskIndexingAdapter(utils.NDArrayMixin, NDArrayIndexable): +class DaskIndexingAdapter(utils.NDArrayMixin, ExplicitlyIndexed): """Wrap a dask array to support xarray-style indexing. """ def __init__(self, array): @@ -537,14 +593,14 @@ def to_int_tuple(key): for k in key]) if isinstance(key, BasicIndexer): - return self.array[to_int_tuple(key)] + return self.array[to_int_tuple(key.tuple)] elif isinstance(key, VectorizedIndexer): - return self.array.vindex[to_int_tuple(tuple(key))] + return self.array.vindex[to_int_tuple(key.tuple)] elif key is Ellipsis: return self.array else: assert isinstance(key, OuterIndexer) - key = to_int_tuple(tuple(key)) + key = to_int_tuple(key.tuple) try: return self.array[key] except NotImplementedError: @@ -563,7 +619,7 @@ def __setitem__(self, key, value): 'method or accessing its .values attribute.') -class PandasIndexAdapter(utils.NDArrayMixin, NDArrayIndexable): +class PandasIndexAdapter(utils.NDArrayMixin, ExplicitlyIndexed): """Wrap a pandas.Index to be better about preserving dtypes and to handle indexing by length 1 tuples like numpy """ @@ -600,15 +656,15 @@ def shape(self): # .shape is broken on pandas prior to v0.15.2 return (len(self.array),) - def __getitem__(self, tuple_key): - key = to_tuple(tuple_key) + def __getitem__(self, indexer): + key = indexer.tuple if isinstance(key, tuple) and len(key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) key, = key if getattr(key, 'ndim', 0) > 1: # Return np-array if multidimensional - return NumpyIndexingAdapter(self.array.values)[tuple_key] + return NumpyIndexingAdapter(self.array.values)[indexer] result = self.array[key] diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 91aae25b071..8fb1f9acce3 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -437,12 +437,7 @@ def __len__(self): raise TypeError('len() of unsized object') -class DunderArrayMixin(object): - def __array__(self, dtype=None): - return np.asarray(self[...], dtype=dtype) - - -class NDArrayMixin(NdimSizeLenMixin, DunderArrayMixin): +class NDArrayMixin(NdimSizeLenMixin): """Mixin class for making wrappers of N-dimensional arrays that conform to the ndarray interface required for the data argument to Variable objects. @@ -460,6 +455,9 @@ def shape(self): def __getitem__(self, key): return self.array[key] + def __array__(self, dtype=None): + return np.asarray(self.array, dtype=dtype) + def __repr__(self): return '%s(array=%r)' % (type(self).__name__, self.array) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index aa64dcab1fc..8bc6e28b4d2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -32,7 +32,7 @@ NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( - indexing.NDArrayIndexable, pd.Index) + dask_array_type + indexing.ExplicitlyIndexed, pd.Index) + dask_array_type BASIC_INDEXING_TYPES = integer_types + (slice,) @@ -766,6 +766,7 @@ def chunk(self, chunks=None, name=None, lock=False): if utils.is_dict_like(chunks): chunks = tuple(chunks.get(n, s) for n, s in enumerate(self.shape)) + data = indexing.BasicToExplicitArray(indexing.as_indexable(data)) data = da.from_array(data, chunks, name=name, lock=lock) return type(self)(self.dims, data, self._attrs, self._encoding, @@ -1261,7 +1262,7 @@ def equals(self, other, equiv=duck_array_ops.array_equiv): return (self.dims == other.dims and (self._data is other._data or equiv(self.data, other.data))) - except (TypeError, AttributeError): + except (TypeError, AttributeError) as e: return False def broadcast_equals(self, other, equiv=duck_array_ops.array_equiv): diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index d4d1b092631..738b0be5a56 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -14,7 +14,7 @@ from xarray.core import utils from xarray.core.pycompat import PY3 -from xarray.core.indexing import NDArrayIndexable +from xarray.core.indexing import ExplicitlyIndexed from xarray.testing import assert_equal, assert_identical, assert_allclose from xarray.plot.utils import import_seaborn @@ -199,7 +199,7 @@ class UnexpectedDataAccess(Exception): pass -class InaccessibleArray(utils.NDArrayMixin, NDArrayIndexable): +class InaccessibleArray(utils.NDArrayMixin, ExplicitlyIndexed): def __init__(self, array): self.array = array diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1701b8d4957..c454f4c8f75 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -436,7 +436,7 @@ def validate_array_type(self, ds): def find_and_validate_array(obj): # recursively called function. obj: array or array wrapper. if hasattr(obj, 'array'): - if isinstance(obj.array, indexing.NDArrayIndexable): + if isinstance(obj.array, indexing.ExplicitlyIndexed): find_and_validate_array(obj.array) else: if isinstance(obj.array, np.ndarray): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index e07f01af585..d8ef9c29ec2 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1723,7 +1723,7 @@ class CustomArray(NDArrayMixin): def __init__(self, array): self.array = array - class CustomIndexable(CustomArray, indexing.NDArrayIndexable): + class CustomIndexable(CustomArray, indexing.ExplicitlyIndexed): pass array = CustomArray(np.arange(3)) From bc9b2eacdc66311834b205330fd47aa519dea714 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sat, 11 Nov 2017 20:53:51 -0800 Subject: [PATCH 2/8] Add validation to ExplicitIndexer classes --- xarray/conventions.py | 21 ++-- xarray/core/formatting.py | 42 +++---- xarray/core/indexing.py | 156 ++++++++++++++++++------ xarray/core/utils.py | 3 - xarray/core/variable.py | 25 ++-- xarray/tests/__init__.py | 11 ++ xarray/tests/test_conventions.py | 35 ++++-- xarray/tests/test_dataset.py | 3 +- xarray/tests/test_indexing.py | 196 ++++++++++++++++++++++++------- xarray/tests/test_variable.py | 13 +- 10 files changed, 359 insertions(+), 146 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 6836775eb83..5978ec3a5cb 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -333,7 +333,7 @@ def encode_cf_timedelta(timedeltas, units=None): return (num, units) -class MaskedAndScaledArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): +class MaskedAndScaledArray(indexing.ExplicitNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically scaled and masked according to CF conventions for packed and missing data values. @@ -391,7 +391,7 @@ def __repr__(self): self.scale_factor, self.add_offset, self._dtype)) -class DecodedCFDatetimeArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): +class DecodedCFDatetimeArray(indexing.ExplicitNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically converted into datetime objects using decode_cf_datetime. @@ -404,8 +404,9 @@ def __init__(self, array, units, calendar=None): # Verify that at least the first and last date can be decoded # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. - example_value = np.concatenate([first_n_items(array, 1) or [0], - last_item(array) or [0]]) + values = indexing.WrapImplicitIndexing(self.array) + example_value = np.concatenate([first_n_items(values, 1) or [0], + last_item(values) or [0]]) try: result = decode_cf_datetime(example_value, units, calendar) @@ -430,7 +431,7 @@ def __getitem__(self, key): calendar=self.calendar) -class DecodedCFTimedeltaArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): +class DecodedCFTimedeltaArray(indexing.ExplicitNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically converted into timedelta objects using decode_cf_timedelta. @@ -447,7 +448,7 @@ def __getitem__(self, key): return decode_cf_timedelta(self.array[key], units=self.units) -class StackedBytesArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): +class StackedBytesArray(indexing.ExplicitNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. @@ -493,7 +494,7 @@ def __getitem__(self, key): return char_to_bytes(self.array[key]) -class BytesToStringArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): +class BytesToStringArray(indexing.ExplicitNDArrayMixin): """Wrapper that decodes bytes to unicode when values are read. >>> BytesToStringArray(np.array([b'abc']))[:] @@ -532,7 +533,7 @@ def __getitem__(self, key): return decode_bytes_array(self.array[key], self.encoding) -class NativeEndiannessArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): +class NativeEndiannessArray(indexing.ExplicitNDArrayMixin): """Decode arrays on the fly from non-native to native endianness This is useful for decoding arrays from netCDF3 files (which are all @@ -561,7 +562,7 @@ def __getitem__(self, key): return np.asarray(self.array[key], dtype=self.dtype) -class BoolTypeArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): +class BoolTypeArray(indexing.ExplicitNDArrayMixin): """Decode arrays on the fly from integer to boolean datatype This is useful for decoding boolean arrays from integer typed netCDF @@ -589,7 +590,7 @@ def __getitem__(self, key): return np.asarray(self.array[key], dtype=self.dtype) -class UnsignedIntTypeArray(utils.NDArrayMixin, indexing.ExplicitlyIndexed): +class UnsignedIntTypeArray(indexing.ExplicitNDArrayMixin): """Decode arrays on the fly from signed integer to unsigned integer. Typically used when _Unsigned is set at as a netCDF attribute on a signed integer variable. diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 10b77495e13..a449a3b5bb4 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -69,38 +69,39 @@ def _get_indexer_at_least_n_items(shape, n_desired): cum_items = np.cumprod(shape[::-1]) n_steps = np.argmax(cum_items >= n_desired) stop = int(np.ceil(float(n_desired) / np.r_[1, cum_items][n_steps])) - indexer = BasicIndexer((0, ) * (len(shape) - 1 - n_steps) + (slice(stop), ) - + (slice(None), ) * n_steps) + indexer = ((0,) * (len(shape) - 1 - n_steps) + + (slice(stop),) + + (slice(None),) * n_steps) return indexer -def first_n_items(x, n_desired): +def first_n_items(array, n_desired): """Returns the first n_desired items of an array""" - # Unfortunately, we can't just do x.flat[:n_desired] here because x might - # not be a numpy.ndarray. Moreover, access to elements of x could be very - # expensive (e.g. if it's only available over DAP), so go out of our way to - # get them in a single call to __getitem__ using only slices. + # Unfortunately, we can't just do array.flat[:n_desired] here because it + # might not be a numpy.ndarray. Moreover, access to elements of the array + # could be very expensive (e.g. if it's only available over DAP), so go out + # of our way to get them in a single call to __getitem__ using only slices. if n_desired < 1: raise ValueError('must request at least one item') - if x.size == 0: + if array.size == 0: # work around for https://github.com/numpy/numpy/issues/5195 return [] - if n_desired < x.size: - indexer = _get_indexer_at_least_n_items(x.shape, n_desired) - x = x[indexer] - return np.asarray(x).flat[:n_desired] + if n_desired < array.size: + indexer = _get_indexer_at_least_n_items(array.shape, n_desired) + array = array[indexer] + return np.asarray(array).flat[:n_desired] -def last_item(x): +def last_item(array): """Returns the last item of an array in a list or an empty list.""" - if x.size == 0: + if array.size == 0: # work around for https://github.com/numpy/numpy/issues/5195 return [] - indexer = BasicIndexer((slice(-1, None),) * x.ndim) - return np.ravel(x[indexer]).tolist() + indexer = (slice(-1, None),) * array.ndim + return np.ravel(array[indexer]).tolist() def format_timestamp(t): @@ -174,19 +175,18 @@ def format_items(x): return formatted -def format_array_flat(items_ndarray, max_width): +def format_array_flat(array, max_width): """Return a formatted string for as many items in the flattened version of - items_ndarray that will fit within max_width characters + array that will fit within max_width characters. """ # every item will take up at least two characters, but we always want to # print at least one item max_possibly_relevant = max(int(np.ceil(max_width / 2.0)), 1) - relevant_items = first_n_items(items_ndarray, max_possibly_relevant) + relevant_items = first_n_items(array, max_possibly_relevant) pprint_items = format_items(relevant_items) cum_len = np.cumsum([len(s) + 1 for s in pprint_items]) - 1 - if (max_possibly_relevant < items_ndarray.size or - (cum_len > max_width).any()): + if (max_possibly_relevant < array.size or (cum_len > max_width).any()): end_padding = u' ...' count = max(np.argmax((cum_len + len(end_padding)) > max_width), 1) pprint_items = pprint_items[:count] diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index ac4265ea7fd..a4030a189fb 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -3,6 +3,7 @@ from __future__ import print_function from datetime import timedelta from collections import defaultdict, Hashable +import operator import numpy as np import pandas as pd @@ -300,6 +301,17 @@ def __repr__(self): return '{}({})'.format(type(self).__name__, self.tuple) +def as_integer_or_none(value): + return None if value is None else operator.index(value) + + +def as_integer_slice(value): + start = as_integer_or_none(value.start) + stop = as_integer_or_none(value.stop) + step = as_integer_or_none(value.step) + return slice(start, stop, step) + + class BasicIndexer(ExplicitIndexer): """Tuple for basic indexing. @@ -307,6 +319,22 @@ class BasicIndexer(ExplicitIndexer): rules for basic indexing: each axis is independently sliced and axes indexed with an integer are dropped from the result. """ + def __init__(self, key): + if not isinstance(key, tuple): + raise TypeError('key must be a tuple: {!r}'.format(key)) + + new_key = [] + for k in key: + if isinstance(k, integer_types): + k = int(k) + elif isinstance(k, slice): + k = as_integer_slice(k) + else: + raise TypeError('unexpected indexer type for {}: {!r}' + .format(type(self).__name__, k)) + new_key.append(k) + + super(BasicIndexer, self).__init__(new_key) class OuterIndexer(ExplicitIndexer): @@ -317,6 +345,31 @@ class OuterIndexer(ExplicitIndexer): axes indexed with an integer are dropped from the result. This type of indexing works like MATLAB/Fortran. """ + def __init__(self, key): + if not isinstance(key, tuple): + raise TypeError('key must be a tuple: {!r}'.format(key)) + + new_key = [] + for k in key: + if isinstance(k, integer_types): + k = int(k) + elif isinstance(k, slice): + k = as_integer_slice(k) + elif isinstance(k, np.ndarray): + if not np.issubdtype(k.dtype, np.integer): + raise TypeError('invalid indexer array, does not have ' + 'integer dtype: {!r}'.format(k)) + if k.ndim != 1: + raise TypeError('invalid indexer array for {}, must have ' + 'exactly 1 dimension: ' + .format(type(self).__name__, k)) + k = np.asarray(k, dtype=np.int64) + else: + raise TypeError('unexpected indexer type for {}: {!r}' + .format(type(self).__name__, k)) + new_key.append(k) + + super(OuterIndexer, self).__init__(new_key) class VectorizedIndexer(ExplicitIndexer): @@ -328,14 +381,42 @@ class VectorizedIndexer(ExplicitIndexer): sliced axes are always moved to the end: https://github.com/numpy/numpy/pull/6256 """ + def __init__(self, key): + if not isinstance(key, tuple): + raise TypeError('key must be a tuple: {!r}'.format(key)) + + new_key = [] + for k in key: + if isinstance(k, slice): + k = as_integer_slice(k) + elif isinstance(k, np.ndarray): + if not np.issubdtype(k.dtype, np.integer): + raise TypeError('invalid indexer array, does not have ' + 'integer dtype: {!r}'.format(k)) + k = np.asarray(k, dtype=np.int64) + else: + raise TypeError('unexpected indexer type for {}: {!r}' + .format(type(self).__name__, k)) + new_key.append(k) + + super(VectorizedIndexer, self).__init__(new_key) class ExplicitlyIndexed(object): """Mixin to mark support for Indexer subclasses in indexing.""" +class ExplicitNDArrayMixin(utils.NDArrayMixin, ExplicitlyIndexed): + + def __array__(self, dtype=None): + key = BasicIndexer((slice(None),) * self.ndim) + return np.asarray(self[key], dtype=dtype) + + def unwrap_explicit_indexer(key, target, allow): """Unwrap an explicit key into a tuple.""" + if not isinstance(key, ExplicitIndexer): + raise TypeError('unexpected key type: {}'.format(key)) if not isinstance(key, allow): key_type_name = { BasicIndexer: 'Basic', @@ -346,21 +427,27 @@ def unwrap_explicit_indexer(key, target, allow): '{} indexing for {} is not implemented. Load your data first with ' '.load(), .compute() or .persist(), or disable caching by setting ' 'cache=False in open_dataset.'.format(key_type_name, type(target))) - if not isinstance(key, ExplicitIndexer): - raise TypeError('unexpected key type: {}'.format(key)) return key.tuple -class BasicToExplicitArray(utils.NDArrayMixin, ExplicitlyIndexed): +class WrapImplicitIndexing(utils.NDArrayMixin): + """Wrap an array, converting tuples into the indicated explicit indexer. - def __init__(self, array): - self.array = array + We use this for wrapping xarray's array types with dask. + """ + def __init__(self, array, indexer_cls=BasicIndexer): + self.array = as_indexable(array) + self.indexer_cls = indexer_cls + + def __array__(self, dtype=None): + return np.asarray(self.array, dtype=dtype) def __getitem__(self, key): - return self.array[BasicIndexer(key)] + key = expanded_indexer(key, self.ndim) + return self.array[self.indexer_cls(key)] -class LazilyIndexedArray(utils.NDArrayMixin, ExplicitlyIndexed): +class LazilyIndexedArray(ExplicitNDArrayMixin): """Wrap an array to make basic and orthogonal indexing lazy. """ def __init__(self, array, key=None): @@ -393,15 +480,17 @@ def _updated_key(self, new_key): 'setting cache=False in open_dataset.'.format(type(self))) iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) - key = [] + full_key = [] for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, integer_types): - key.append(k) + full_key.append(k) else: - key.append(_index_indexer_1d(k, next(iter_new_key), size)) - if all(isinstance(k, integer_types + (slice, )) for k in key): - return BasicIndexer(key) - return OuterIndexer(key) + full_key.append(_index_indexer_1d(k, next(iter_new_key), size)) + full_key = tuple(full_key) + + if all(isinstance(k, integer_types + (slice, )) for k in full_key): + return BasicIndexer(full_key) + return OuterIndexer(full_key) @property def shape(self): @@ -421,8 +510,8 @@ def __getitem__(self, indexer): return type(self)(self.array, self._updated_key(indexer)) def __setitem__(self, key, value): - key = self._updated_key(key) - self.array[key] = value + full_key = self._updated_key(key) + self.array[full_key] = value def __repr__(self): return ('%s(array=%r, key=%r)' % @@ -437,7 +526,7 @@ def _wrap_numpy_scalars(array): return array -class CopyOnWriteArray(utils.NDArrayMixin, ExplicitlyIndexed): +class CopyOnWriteArray(ExplicitNDArrayMixin): def __init__(self, array): self.array = as_indexable(array) self._copied = False @@ -458,7 +547,7 @@ def __setitem__(self, key, value): self.array[key] = value -class MemoryCachedArray(utils.NDArrayMixin, ExplicitlyIndexed): +class MemoryCachedArray(ExplicitNDArrayMixin): def __init__(self, array): self.array = _wrap_numpy_scalars(as_indexable(array)) @@ -494,12 +583,12 @@ def as_indexable(array): raise TypeError('Invalid array type: {}'.format(type(array))) -def _outer_to_numpy_indexer(indexer, shape): +def _outer_to_numpy_indexer(key, shape): """Convert an OuterIndexer into an indexer for NumPy. Parameters ---------- - indexer : OuterIndexer + key : tuple Outer indexing tuple to convert. shape : tuple Shape of the array subject to the indexing. @@ -509,8 +598,6 @@ def _outer_to_numpy_indexer(indexer, shape): tuple Base tuple suitable for use to index a NumPy array. """ - key = indexer.tuple - if len([k for k in key if not isinstance(k, slice)]) <= 1: # If there is only one vector and all others are slice, # it can be safely used in mixed basic/advanced indexing. @@ -534,7 +621,7 @@ def _outer_to_numpy_indexer(indexer, shape): return tuple(new_key) -class NumpyIndexingAdapter(utils.NDArrayMixin, ExplicitlyIndexed): +class NumpyIndexingAdapter(ExplicitNDArrayMixin): """Wrap a NumPy array to use broadcasted indexing """ def __init__(self, array): @@ -556,13 +643,15 @@ def _ensure_ndarray(self, value): def _indexing_array_and_key(self, key): if isinstance(key, OuterIndexer): array = self.array - key = _outer_to_numpy_indexer(key, self.array.shape) + key = _outer_to_numpy_indexer(key.tuple, self.array.shape) elif isinstance(key, VectorizedIndexer): array = nputils.NumpyVIndexAdapter(self.array) key = key.tuple - else: + elif isinstance(key, BasicIndexer): array = self.array key = key.tuple + else: + raise TypeError('unexpected key type: {}'.format(type(key))) return array, key @@ -575,7 +664,7 @@ def __setitem__(self, key, value): array[key] = value -class DaskIndexingAdapter(utils.NDArrayMixin, ExplicitlyIndexed): +class DaskIndexingAdapter(ExplicitNDArrayMixin): """Wrap a dask array to support xarray-style indexing. """ def __init__(self, array): @@ -585,22 +674,13 @@ def __init__(self, array): self.array = array def __getitem__(self, key): - def to_int_tuple(key): - # workaround for uint64 indexer (GH:1406) - # TODO remove here after next dask release (0.15.3) - return tuple([k.astype(int) if isinstance(k, np.ndarray) - else int(k) if isinstance(k, np.integer) else k - for k in key]) - if isinstance(key, BasicIndexer): - return self.array[to_int_tuple(key.tuple)] + return self.array[key.tuple] elif isinstance(key, VectorizedIndexer): - return self.array.vindex[to_int_tuple(key.tuple)] - elif key is Ellipsis: - return self.array + return self.array.vindex[key.tuple] else: assert isinstance(key, OuterIndexer) - key = to_int_tuple(key.tuple) + key = key.tuple try: return self.array[key] except NotImplementedError: @@ -619,7 +699,7 @@ def __setitem__(self, key, value): 'method or accessing its .values attribute.') -class PandasIndexAdapter(utils.NDArrayMixin, ExplicitlyIndexed): +class PandasIndexAdapter(ExplicitNDArrayMixin): """Wrap a pandas.Index to be better about preserving dtypes and to handle indexing by length 1 tuples like numpy """ diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 8fb1f9acce3..ac70d7f7aea 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -455,9 +455,6 @@ def shape(self): def __getitem__(self, key): return self.array[key] - def __array__(self, dtype=None): - return np.asarray(self.array, dtype=dtype) - def __repr__(self): return '%s(array=%r)' % (type(self).__name__, self.array) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 8bc6e28b4d2..f22f5305ca8 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -468,8 +468,10 @@ def _broadcast_indexes(self, key): # key is a tuple of full size key = indexing.expanded_indexer(key, self.ndim) # Convert a scalar Variable as an integer - key = tuple([(k.data.item() if isinstance(k, Variable) and k.ndim == 0 - else k) for k in key]) + key = tuple( + k.data.item() if isinstance(k, Variable) and k.ndim == 0 else k + for k in key) + if all(isinstance(k, BASIC_INDEXING_TYPES) for k in key): return self._broadcast_indexes_basic(key) @@ -532,17 +534,18 @@ def _broadcast_indexes_outer(self, key): dims = tuple(k.dims[0] if isinstance(k, Variable) else dim for k, dim in zip(key, self.dims) if not isinstance(k, integer_types)) - indexer = [] + + new_key = [] for k in key: if isinstance(k, Variable): k = k.data - - if isinstance(k, BASIC_INDEXING_TYPES): - indexer.append(k) - else: + if not isinstance(k, BASIC_INDEXING_TYPES): k = np.asarray(k) - indexer.append(k if k.dtype.kind != 'b' else np.flatnonzero(k)) - return dims, OuterIndexer(indexer), None + if k.dtype.kind == 'b': + (k,) = np.nonzero(k) + new_key.append(k) + + return dims, OuterIndexer(tuple(new_key)), None def _nonzero(self): """ Equivalent numpy's nonzero but returns a tuple of Varibles. """ @@ -604,7 +607,7 @@ def _broadcast_indexes_vectorized(self, key): else: new_order = None - return out_dims, VectorizedIndexer(out_key), new_order + return out_dims, VectorizedIndexer(tuple(out_key)), new_order def __getitem__(self, key): """Return a new Array object whose contents are consistent with @@ -766,7 +769,7 @@ def chunk(self, chunks=None, name=None, lock=False): if utils.is_dict_like(chunks): chunks = tuple(chunks.get(n, s) for n, s in enumerate(self.shape)) - data = indexing.BasicToExplicitArray(indexing.as_indexable(data)) + data = indexing.WrapImplicitIndexing(data, indexing.OuterIndexer) data = da.from_array(data, chunks, name=name, lock=lock) return type(self)(self.dims, data, self._attrs, self._encoding, diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 738b0be5a56..1716c3ee537 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -214,6 +214,17 @@ def __getitem__(self, key): return key +class IndexerMaker(object): + + def __init__(self, indexer_cls): + self._indexer_cls = indexer_cls + + def __getitem__(self, key): + if not isinstance(key, tuple): + key = (key,) + return self._indexer_cls(key) + + def source_ndarray(array): """Given an ndarray, return the base object which holds its memory, or the object itself. diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 28ffbf1cdf1..4fb649537bc 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -12,7 +12,7 @@ from xarray import conventions, Variable, Dataset, open_dataset from xarray.core import utils, indexing -from . import TestCase, requires_netCDF4, unittest, raises_regex +from . import TestCase, requires_netCDF4, unittest, raises_regex, IndexerMaker from .test_backends import CFEncodedDataTest from xarray.core.pycompat import iteritems from xarray.backends.memory import InMemoryDataStore @@ -20,6 +20,11 @@ from xarray.conventions import decode_cf +B = IndexerMaker(indexing.BasicIndexer) +O = IndexerMaker(indexing.OuterIndexer) +V = IndexerMaker(indexing.VectorizedIndexer) + + class TestMaskedAndScaledArray(TestCase): def test(self): x = conventions.MaskedAndScaledArray(np.arange(3), fill_value=0) @@ -44,10 +49,10 @@ def test(self): def test_0d(self): x = conventions.MaskedAndScaledArray(np.array(0), fill_value=0) self.assertTrue(np.isnan(x)) - self.assertTrue(np.isnan(x[...])) + self.assertTrue(np.isnan(x[B[()]])) x = conventions.MaskedAndScaledArray(np.array(0), fill_value=10) - self.assertEqual(0, x[...]) + self.assertEqual(0, x[B[()]]) def test_multiple_fill_value(self): x = conventions.MaskedAndScaledArray( @@ -57,7 +62,7 @@ def test_multiple_fill_value(self): x = conventions.MaskedAndScaledArray( np.array(0), fill_value=np.array([0, 1])) self.assertTrue(np.isnan(x)) - self.assertTrue(np.isnan(x[...])) + self.assertTrue(np.isnan(x[B[()]])) class TestStackedBytesArray(TestCase): @@ -71,9 +76,9 @@ def test_wrapper_class(self): self.assertEqual(actual.ndim, expected.ndim) self.assertEqual(len(actual), len(expected)) self.assertArrayEqual(expected, actual) - self.assertArrayEqual(expected[:1], actual[:1]) + self.assertArrayEqual(expected[:1], actual[B[:1]]) with pytest.raises(IndexError): - actual[:, :2] + actual[B[:, :2]] def test_scalar(self): array = np.array([b'a', b'b', b'c'], dtype='S') @@ -88,7 +93,7 @@ def test_scalar(self): len(actual) np.testing.assert_array_equal(expected, actual) with pytest.raises(IndexError): - actual[:2] + actual[B[:2]] assert str(actual) == str(expected) def test_char_to_bytes(self): @@ -124,6 +129,14 @@ def test_bytes_to_char(self): actual = conventions.bytes_to_char(array.T) self.assertArrayEqual(actual, expected) + def test_vectorized_indexing(self): + array = np.array([[b'a', b'b', b'c'], [b'd', b'e', b'f']], dtype='S') + stacked = conventions.StackedBytesArray(array) + expected = np.array([[b'abc', b'def'], [b'def', b'abc']]) + indexer = V[np.array([[0, 1], [1, 0]])] + actual = stacked[indexer] + self.assertArrayEqual(actual, expected) + class TestBytesToStringArray(TestCase): @@ -138,7 +151,7 @@ def test_encoding(self): self.assertEqual(actual.size, expected.size) self.assertEqual(actual.ndim, expected.ndim) self.assertArrayEqual(expected, actual) - self.assertArrayEqual(expected[0], actual[0]) + self.assertArrayEqual(expected[0], actual[B[0]]) def test_scalar(self): expected = np.array(u'abc', dtype=object) @@ -152,7 +165,7 @@ def test_scalar(self): len(actual) np.testing.assert_array_equal(expected, actual) with pytest.raises(IndexError): - actual[:2] + actual[B[:2]] assert str(actual) == str(expected) def test_decode_bytes_array(self): @@ -302,13 +315,13 @@ def test_slice_decoded_cf_datetime_array(self): np.array([0, 1, 2]), 'days since 1900-01-01', 'standard') expected = pd.date_range('1900-01-01', periods=3).values self.assertEqual(actual.dtype, np.dtype('datetime64[ns]')) - self.assertArrayEqual(actual[slice(0, 2)], expected[slice(0, 2)]) + self.assertArrayEqual(actual[B[0:2]], expected[slice(0, 2)]) actual = conventions.DecodedCFDatetimeArray( np.array([0, 1, 2]), 'days since 1900-01-01', 'standard') expected = pd.date_range('1900-01-01', periods=3).values self.assertEqual(actual.dtype, np.dtype('datetime64[ns]')) - self.assertArrayEqual(actual[[0, 2]], expected[[0, 2]]) + self.assertArrayEqual(actual[O[np.array([0, 2]),]], expected[[0, 2]]) def test_decode_cf_datetime_non_standard_units(self): expected = pd.date_range(periods=100, start='1970-01-01', freq='h') diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 4b79fa3aa6c..0ffdeb61419 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1556,7 +1556,8 @@ def test_reindex(self): expected = Dataset({'x': ('time', np.random.randn(5))}, {'time': range(5)}) time2 = DataArray(np.arange(5), dims="time2") - actual = expected.reindex(time=time2) + with pytest.warns(FutureWarning): + actual = expected.reindex(time=time2) self.assertDatasetIdentical(actual, expected) # another regression test diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 553c3012dd8..6890c499eca 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -12,7 +12,10 @@ from xarray.core import indexing from xarray.core import nputils from xarray.core.pycompat import integer_types -from . import TestCase, ReturnItem, raises_regex +from . import TestCase, ReturnItem, raises_regex, IndexerMaker + + +B = IndexerMaker(indexing.BasicIndexer) class TestIndexers(TestCase): @@ -173,7 +176,7 @@ def test_lazily_indexed_array(self): # make sure actual.key is appropriate type if all(isinstance(k, integer_types + (slice, )) - for k in v_lazy._data.key): + for k in v_lazy._data.key.tuple): assert isinstance(v_lazy._data.key, indexing.BasicIndexer) else: @@ -197,16 +200,16 @@ class TestCopyOnWriteArray(TestCase): def test_setitem(self): original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - wrapped[:] = 0 + wrapped[B[:]] = 0 self.assertArrayEqual(original, np.arange(10)) self.assertArrayEqual(wrapped, np.zeros(10)) def test_sub_array(self): original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - child = wrapped[:5] + child = wrapped[B[:5]] self.assertIsInstance(child, indexing.CopyOnWriteArray) - child[:] = 0 + child[B[:]] = 0 self.assertArrayEqual(original, np.arange(10)) self.assertArrayEqual(wrapped, np.arange(10)) self.assertArrayEqual(child, np.zeros(5)) @@ -214,7 +217,7 @@ def test_sub_array(self): def test_index_scalar(self): # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(['foo', 'bar'])) - assert np.array(x[0][()]) == 'foo' + assert np.array(x[B[0]][B[()]]) == 'foo' class TestMemoryCachedArray(TestCase): @@ -227,7 +230,7 @@ def test_wrapper(self): def test_sub_array(self): original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) - child = wrapped[:5] + child = wrapped[B[:5]] self.assertIsInstance(child, indexing.MemoryCachedArray) self.assertArrayEqual(child, np.arange(5)) self.assertIsInstance(child.array, indexing.NumpyIndexingAdapter) @@ -236,46 +239,149 @@ def test_sub_array(self): def test_setitem(self): original = np.arange(10) wrapped = indexing.MemoryCachedArray(original) - wrapped[:] = 0 + wrapped[B[:]] = 0 self.assertArrayEqual(original, np.zeros(10)) def test_index_scalar(self): # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(['foo', 'bar'])) - assert np.array(x[0][()]) == 'foo' - - -class TestIndexerTuple(TestCase): - """ Make sure _outer_to_numpy_indexer gives similar result to - Variable._broadcast_indexes_vectorized - """ - def test_outer_indexer(self): - def nonzero(x): - if isinstance(x, np.ndarray) and x.dtype.kind == 'b': - x = x.nonzero()[0] - return x - original = np.random.rand(10, 20, 30) - v = Variable(['i', 'j', 'k'], original) - I = ReturnItem() - # test orthogonally applied indexers - indexers = [I[:], 0, -2, I[:3], np.array([0, 1, 2, 3]), np.array([0]), - np.arange(10) < 5] - for i, j, k in itertools.product(indexers, repeat=3): - - if isinstance(j, np.ndarray) and j.dtype.kind == 'b': # match size - j = np.arange(20) < 4 - if isinstance(k, np.ndarray) and k.dtype.kind == 'b': - k = np.arange(30) < 8 - - _, expected, new_order = v._broadcast_indexes_vectorized((i, j, k)) - expected_data = nputils.NumpyVIndexAdapter(v.data)[expected] - if new_order: - old_order = range(len(new_order)) - expected_data = np.moveaxis(expected_data, old_order, - new_order) - - outer_index = indexing.OuterIndexer( - (nonzero(i), nonzero(j), nonzero(k))) - actual = indexing._outer_to_numpy_indexer(outer_index, v.shape) - actual_data = v.data[actual] - self.assertArrayEqual(actual_data, expected_data) + assert np.array(x[B[0]][B[()]]) == 'foo' + + +def test_base_explicit_indexer(): + with pytest.raises(TypeError): + indexing.ExplicitIndexer(()) + + class Subclass(indexing.ExplicitIndexer): + pass + + value = Subclass((1, 2, 3)) + assert value.tuple == (1, 2, 3) + assert repr(value) == 'Subclass((1, 2, 3))' + + +@pytest.mark.parametrize('indexer_cls', [indexing.BasicIndexer, + indexing.OuterIndexer, + indexing.VectorizedIndexer]) +def test_invalid_for_all(indexer_cls): + with pytest.raises(TypeError): + indexer_cls(None) + with pytest.raises(TypeError): + indexer_cls(([],)) + with pytest.raises(TypeError): + indexer_cls((None,)) + with pytest.raises(TypeError): + indexer_cls(('foo',)) + with pytest.raises(TypeError): + indexer_cls((1.0,)) + with pytest.raises(TypeError): + indexer_cls((slice('foo'),)) + with pytest.raises(TypeError): + indexer_cls((np.array(['foo']),)) + + +def check_integer(indexer_cls): + value = indexer_cls((1, np.uint64(2),)).tuple + assert all(isinstance(v, int) for v in value) + assert value == (1, 2) + + +def check_slice(indexer_cls): + (value,) = indexer_cls((slice(1, None, np.int64(2)),)).tuple + assert value == slice(1, None, 2) + assert isinstance(value.step, int) + + +def check_array1d(indexer_cls): + (value,) = indexer_cls((np.arange(3, dtype=np.int32),)).tuple + assert value.dtype == np.int64 + np.testing.assert_array_equal(value, [0, 1, 2]) + + +def check_array2d(indexer_cls): + array = np.array([[1, 2], [3, 4]], dtype=np.int64) + (value,) = indexer_cls((array,)).tuple + assert value.dtype == np.int64 + np.testing.assert_array_equal(value, array) + + +def test_basic_indexer(): + check_integer(indexing.BasicIndexer) + check_slice(indexing.BasicIndexer) + with pytest.raises(TypeError): + check_array1d(indexing.BasicIndexer) + with pytest.raises(TypeError): + check_array2d(indexing.BasicIndexer) + + +def test_outer_indexer(): + check_integer(indexing.OuterIndexer) + check_slice(indexing.OuterIndexer) + check_array1d(indexing.OuterIndexer) + with pytest.raises(TypeError): + check_array2d(indexing.OuterIndexer) + + +def test_vectorized_indexer(): + with pytest.raises(TypeError): + check_integer(indexing.VectorizedIndexer) + check_slice(indexing.VectorizedIndexer) + check_array1d(indexing.VectorizedIndexer) + check_array2d(indexing.VectorizedIndexer) + + +def test_unwrap_explicit_indexer(): + indexer = indexing.BasicIndexer((1, 2)) + target = None + + unwrapped = indexing.unwrap_explicit_indexer( + indexer, target, allow=indexing.BasicIndexer) + assert unwrapped == (1, 2) + + with raises_regex(NotImplementedError, 'Load your data'): + indexing.unwrap_explicit_indexer( + indexer, target, allow=indexing.OuterIndexer) + + with raises_regex(TypeError, 'unexpected key type'): + indexing.unwrap_explicit_indexer( + indexer.tuple, target, allow=indexing.OuterIndexer) + + +def test_wrap_implicit_indexing(): + array = np.arange(10) + implicit = indexing.WrapImplicitIndexing( + indexing.NumpyIndexingAdapter(array), indexing.BasicIndexer) + np.testing.assert_array_equal(array, np.asarray(implicit)) + np.testing.assert_array_equal(array, implicit[:]) + + +def test_outer_indexer_consistency_with_broadcast_indexes_vectorized(): + def nonzero(x): + if isinstance(x, np.ndarray) and x.dtype.kind == 'b': + x = x.nonzero()[0] + return x + + original = np.random.rand(10, 20, 30) + v = Variable(['i', 'j', 'k'], original) + I = ReturnItem() + # test orthogonally applied indexers + indexers = [I[:], 0, -2, I[:3], np.array([0, 1, 2, 3]), np.array([0]), + np.arange(10) < 5] + for i, j, k in itertools.product(indexers, repeat=3): + + if isinstance(j, np.ndarray) and j.dtype.kind == 'b': # match size + j = np.arange(20) < 4 + if isinstance(k, np.ndarray) and k.dtype.kind == 'b': + k = np.arange(30) < 8 + + _, expected, new_order = v._broadcast_indexes_vectorized((i, j, k)) + expected_data = nputils.NumpyVIndexAdapter(v.data)[expected.tuple] + if new_order: + old_order = range(len(new_order)) + expected_data = np.moveaxis(expected_data, old_order, + new_order) + + outer_index = (nonzero(i), nonzero(j), nonzero(k)) + actual = indexing._outer_to_numpy_indexer(outer_index, v.shape) + actual_data = v.data[actual] + np.testing.assert_array_equal(actual_data, expected_data) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index d8ef9c29ec2..bdaee5edb7a 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -59,28 +59,29 @@ def test_getitem_dict(self): self.assertVariableIdentical(expected, actual) def test_getitem_1d(self): - v = self.cls(['x'], [0, 1, 2]) + data = np.array([0, 1, 2]) + v = self.cls(['x'], data) v_new = v[dict(x=[0, 1])] assert v_new.dims == ('x', ) - self.assertArrayEqual(v_new, v._data[[0, 1]]) + self.assertArrayEqual(v_new, data[[0, 1]]) v_new = v[dict(x=slice(None))] assert v_new.dims == ('x', ) - self.assertArrayEqual(v_new, v._data) + self.assertArrayEqual(v_new, data) v_new = v[dict(x=Variable('a', [0, 1]))] assert v_new.dims == ('a', ) - self.assertArrayEqual(v_new, v._data[[0, 1]]) + self.assertArrayEqual(v_new, data[[0, 1]]) v_new = v[dict(x=1)] assert v_new.dims == () - self.assertArrayEqual(v_new, v._data[1]) + self.assertArrayEqual(v_new, data[1]) # tuple argument v_new = v[slice(None)] assert v_new.dims == ('x', ) - self.assertArrayEqual(v_new, v._data) + self.assertArrayEqual(v_new, data) def test_getitem_1d_fancy(self): v = self.cls(['x'], [0, 1, 2]) From e64bc89e1c0a6bfedca922a99ce7cbcd76ae808b Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 12 Nov 2017 11:14:08 -0800 Subject: [PATCH 3/8] Fix pynio test failure --- xarray/tests/test_backends.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ed7725535b8..d0321788b5f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1768,11 +1768,11 @@ def test_write_store(self): def test_orthogonal_indexing(self): # pynio also does not support list-like indexing - with raises_regex(NotImplementedError, 'Nio backend does not '): + with raises_regex(NotImplementedError, 'Outer indexing'): super(TestPyNio, self).test_orthogonal_indexing() def test_isel_dataarray(self): - with raises_regex(NotImplementedError, 'Nio backend does not '): + with raises_regex(NotImplementedError, 'Outer indexing'): super(TestPyNio, self).test_isel_dataarray() def test_array_type_after_indexing(self): From e203ec9917385ca73ef3acdcb9b39aaf071a75d0 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 12 Nov 2017 12:24:11 -0800 Subject: [PATCH 4/8] Rename and add comments --- xarray/conventions.py | 18 +++++++++--------- xarray/core/indexing.py | 33 +++++++++++++++------------------ xarray/core/variable.py | 7 ++++++- xarray/tests/test_indexing.py | 4 ++-- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index 5978ec3a5cb..3d4bfeadbf2 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -333,7 +333,7 @@ def encode_cf_timedelta(timedeltas, units=None): return (num, units) -class MaskedAndScaledArray(indexing.ExplicitNDArrayMixin): +class MaskedAndScaledArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically scaled and masked according to CF conventions for packed and missing data values. @@ -391,7 +391,7 @@ def __repr__(self): self.scale_factor, self.add_offset, self._dtype)) -class DecodedCFDatetimeArray(indexing.ExplicitNDArrayMixin): +class DecodedCFDatetimeArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically converted into datetime objects using decode_cf_datetime. @@ -404,7 +404,7 @@ def __init__(self, array, units, calendar=None): # Verify that at least the first and last date can be decoded # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. - values = indexing.WrapImplicitIndexing(self.array) + values = indexing.ImplicitToExplicitIndexingAdapter(self.array) example_value = np.concatenate([first_n_items(values, 1) or [0], last_item(values) or [0]]) @@ -431,7 +431,7 @@ def __getitem__(self, key): calendar=self.calendar) -class DecodedCFTimedeltaArray(indexing.ExplicitNDArrayMixin): +class DecodedCFTimedeltaArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically converted into timedelta objects using decode_cf_timedelta. @@ -448,7 +448,7 @@ def __getitem__(self, key): return decode_cf_timedelta(self.array[key], units=self.units) -class StackedBytesArray(indexing.ExplicitNDArrayMixin): +class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. @@ -494,7 +494,7 @@ def __getitem__(self, key): return char_to_bytes(self.array[key]) -class BytesToStringArray(indexing.ExplicitNDArrayMixin): +class BytesToStringArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper that decodes bytes to unicode when values are read. >>> BytesToStringArray(np.array([b'abc']))[:] @@ -533,7 +533,7 @@ def __getitem__(self, key): return decode_bytes_array(self.array[key], self.encoding) -class NativeEndiannessArray(indexing.ExplicitNDArrayMixin): +class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): """Decode arrays on the fly from non-native to native endianness This is useful for decoding arrays from netCDF3 files (which are all @@ -562,7 +562,7 @@ def __getitem__(self, key): return np.asarray(self.array[key], dtype=self.dtype) -class BoolTypeArray(indexing.ExplicitNDArrayMixin): +class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): """Decode arrays on the fly from integer to boolean datatype This is useful for decoding boolean arrays from integer typed netCDF @@ -590,7 +590,7 @@ def __getitem__(self, key): return np.asarray(self.array[key], dtype=self.dtype) -class UnsignedIntTypeArray(indexing.ExplicitNDArrayMixin): +class UnsignedIntTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): """Decode arrays on the fly from signed integer to unsigned integer. Typically used when _Unsigned is set at as a netCDF attribute on a signed integer variable. diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index a4030a189fb..3aea8ca6b8a 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -406,7 +406,7 @@ class ExplicitlyIndexed(object): """Mixin to mark support for Indexer subclasses in indexing.""" -class ExplicitNDArrayMixin(utils.NDArrayMixin, ExplicitlyIndexed): +class ExplicitlyIndexedNDArrayMixin(utils.NDArrayMixin, ExplicitlyIndexed): def __array__(self, dtype=None): key = BasicIndexer((slice(None),) * self.ndim) @@ -430,11 +430,9 @@ def unwrap_explicit_indexer(key, target, allow): return key.tuple -class WrapImplicitIndexing(utils.NDArrayMixin): - """Wrap an array, converting tuples into the indicated explicit indexer. +class ImplicitToExplicitIndexingAdapter(utils.NDArrayMixin): + """Wrap an array, converting tuples into the indicated explicit indexer.""" - We use this for wrapping xarray's array types with dask. - """ def __init__(self, array, indexer_cls=BasicIndexer): self.array = as_indexable(array) self.indexer_cls = indexer_cls @@ -447,7 +445,7 @@ def __getitem__(self, key): return self.array[self.indexer_cls(key)] -class LazilyIndexedArray(ExplicitNDArrayMixin): +class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and orthogonal indexing lazy. """ def __init__(self, array, key=None): @@ -526,7 +524,7 @@ def _wrap_numpy_scalars(array): return array -class CopyOnWriteArray(ExplicitNDArrayMixin): +class CopyOnWriteArray(ExplicitlyIndexedNDArrayMixin): def __init__(self, array): self.array = as_indexable(array) self._copied = False @@ -547,7 +545,7 @@ def __setitem__(self, key, value): self.array[key] = value -class MemoryCachedArray(ExplicitNDArrayMixin): +class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin): def __init__(self, array): self.array = _wrap_numpy_scalars(as_indexable(array)) @@ -621,9 +619,9 @@ def _outer_to_numpy_indexer(key, shape): return tuple(new_key) -class NumpyIndexingAdapter(ExplicitNDArrayMixin): - """Wrap a NumPy array to use broadcasted indexing - """ +class NumpyIndexingAdapter(ExplicitlyIndexedNDArrayMixin): + """Wrap a NumPy array to use explicit indexing.""" + def __init__(self, array): # In NumpyIndexingAdapter we only allow to store bare np.ndarray if not isinstance(array, np.ndarray): @@ -664,9 +662,9 @@ def __setitem__(self, key, value): array[key] = value -class DaskIndexingAdapter(ExplicitNDArrayMixin): - """Wrap a dask array to support xarray-style indexing. - """ +class DaskIndexingAdapter(ExplicitlyIndexedNDArrayMixin): + """Wrap a dask array to support explicit indexing.""" + def __init__(self, array): """ This adapter is created in Variable.__getitem__ in Variable._broadcast_indexes. @@ -699,10 +697,9 @@ def __setitem__(self, key, value): 'method or accessing its .values attribute.') -class PandasIndexAdapter(ExplicitNDArrayMixin): - """Wrap a pandas.Index to be better about preserving dtypes and to handle - indexing by length 1 tuples like numpy - """ +class PandasIndexAdapter(ExplicitlyIndexedNDArrayMixin): + """Wrap a pandas.Index to preserve dtypes and handle explicit indexing.""" + def __init__(self, array, dtype=None): self.array = utils.safe_cast_to_index(array) if dtype is None: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f22f5305ca8..795a5a2e55f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -769,7 +769,12 @@ def chunk(self, chunks=None, name=None, lock=False): if utils.is_dict_like(chunks): chunks = tuple(chunks.get(n, s) for n, s in enumerate(self.shape)) - data = indexing.WrapImplicitIndexing(data, indexing.OuterIndexer) + # da.from_array works by using lazily indexing with a tuple of + # slices. Using OuterIndexer is a pragmatic choice: dask does not + # yet handle different indexing types in an explicit way: + # https://github.com/dask/dask/issues/2883 + data = indexing.ImplicitToExplicitIndexingAdapter( + data, indexing.OuterIndexer) data = da.from_array(data, chunks, name=name, lock=lock) return type(self)(self.dims, data, self._attrs, self._encoding, diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 6890c499eca..6bae06874f2 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -347,9 +347,9 @@ def test_unwrap_explicit_indexer(): indexer.tuple, target, allow=indexing.OuterIndexer) -def test_wrap_implicit_indexing(): +def test_implicit_indexing_adapter(): array = np.arange(10) - implicit = indexing.WrapImplicitIndexing( + implicit = indexing.ImplicitToExplicitIndexingAdapter( indexing.NumpyIndexingAdapter(array), indexing.BasicIndexer) np.testing.assert_array_equal(array, np.asarray(implicit)) np.testing.assert_array_equal(array, implicit[:]) From 3f9b522d1afa47c1866ea9c3e62a73d7b116a445 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 12 Nov 2017 12:25:23 -0800 Subject: [PATCH 5/8] flake8 --- xarray/backends/pynio_.py | 1 - xarray/tests/test_conventions.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index fedd74d1148..ffa936c0466 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -9,7 +9,6 @@ from .. import Variable from ..core.utils import (FrozenOrderedDict, Frozen) from ..core import indexing -from ..core.pycompat import integer_types from .common import AbstractDataStore, DataStorePickleMixin, BackendArray diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 4fb649537bc..ca88ea661c7 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -321,7 +321,7 @@ def test_slice_decoded_cf_datetime_array(self): np.array([0, 1, 2]), 'days since 1900-01-01', 'standard') expected = pd.date_range('1900-01-01', periods=3).values self.assertEqual(actual.dtype, np.dtype('datetime64[ns]')) - self.assertArrayEqual(actual[O[np.array([0, 2]),]], expected[[0, 2]]) + self.assertArrayEqual(actual[O[np.array([0, 2])]], expected[[0, 2]]) def test_decode_cf_datetime_non_standard_units(self): expected = pd.date_range(periods=100, start='1970-01-01', freq='h') From 9c5318f4db58e3f2e02bfded209b2f0315931612 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 12 Nov 2017 20:17:12 -0800 Subject: [PATCH 6/8] Fix windows test failure --- xarray/core/indexing.py | 2 +- xarray/core/pycompat.py | 7 +++++-- xarray/tests/test_indexing.py | 6 +++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 3aea8ca6b8a..4e4f6e6e454 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -302,7 +302,7 @@ def __repr__(self): def as_integer_or_none(value): - return None if value is None else operator.index(value) + return None if value is None operator.index(value) def as_integer_slice(value): diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 305fe38b159..a73a27f9643 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -12,7 +12,7 @@ basestring = str unicode_type = str bytes_type = bytes - integer_types = (int, np.integer) + native_int_types = (int,) def iteritems(d): return iter(d.items()) @@ -31,7 +31,7 @@ def itervalues(d): basestring = basestring # noqa unicode_type = unicode # noqa bytes_type = str - integer_types = (int, long, np.integer) # noqa + native_int_types = (int, long) # noqa def iteritems(d): return d.iteritems() @@ -45,6 +45,9 @@ def itervalues(d): import __builtin__ as builtins from urllib import urlretrieve from inspect import getargspec + +integer_types = native_int_types + (np.integer,) + try: from cyordereddict import OrderedDict except ImportError: # pragma: no cover diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 6bae06874f2..28fecdb4827 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -11,7 +11,7 @@ from xarray import Dataset, DataArray, Variable from xarray.core import indexing from xarray.core import nputils -from xarray.core.pycompat import integer_types +from xarray.core.pycompat import native_int_types from . import TestCase, ReturnItem, raises_regex, IndexerMaker @@ -175,7 +175,7 @@ def test_lazily_indexed_array(self): indexing.LazilyIndexedArray) # make sure actual.key is appropriate type - if all(isinstance(k, integer_types + (slice, )) + if all(isinstance(k, native_int_types + (slice, )) for k in v_lazy._data.key.tuple): assert isinstance(v_lazy._data.key, indexing.BasicIndexer) @@ -289,7 +289,7 @@ def check_integer(indexer_cls): def check_slice(indexer_cls): (value,) = indexer_cls((slice(1, None, np.int64(2)),)).tuple assert value == slice(1, None, 2) - assert isinstance(value.step, int) + assert isinstance(value.step, native_int_types) def check_array1d(indexer_cls): From 00bd0fa6d8b923bd7698b01d52517cd674733e61 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 12 Nov 2017 20:54:15 -0800 Subject: [PATCH 7/8] typo --- xarray/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 4e4f6e6e454..3aea8ca6b8a 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -302,7 +302,7 @@ def __repr__(self): def as_integer_or_none(value): - return None if value is None operator.index(value) + return None if value is None else operator.index(value) def as_integer_slice(value): From 53bda8af12840e31ebb1affd3a8c27564e622db0 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 13 Nov 2017 19:08:46 -0800 Subject: [PATCH 8/8] leftover from debugging --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 795a5a2e55f..9f8792d2a87 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1270,7 +1270,7 @@ def equals(self, other, equiv=duck_array_ops.array_equiv): return (self.dims == other.dims and (self._data is other._data or equiv(self.data, other.data))) - except (TypeError, AttributeError) as e: + except (TypeError, AttributeError): return False def broadcast_equals(self, other, equiv=duck_array_ops.array_equiv):