From 69f0315b520a731c9f5958c2b4e0908036a9f8d0 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 27 Mar 2024 21:50:21 -0700 Subject: [PATCH 1/8] add .oindex and .vindex to BackendArray --- xarray/backends/common.py | 18 ++++++++++++++++++ xarray/backends/scipy_.py | 33 ++++++++++++++++++++++++--------- xarray/core/indexing.py | 8 +------- 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index f318b4dd42f..04f2d604b1d 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -210,6 +210,24 @@ def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) return self[key] # type: ignore [index] + def _oindex_get(self, indexer: indexing.OuterIndexer): + raise NotImplementedError( + f"{self.__class__.__name__}._oindex_get method should be overridden" + ) + + def _vindex_get(self, indexer: indexing.VectorizedIndexer): + raise NotImplementedError( + f"{self.__class__.__name__}._vindex_get method should be overridden" + ) + + @property + def oindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._oindex_get) + + @property + def vindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._vindex_get) + class AbstractDataStore: __slots__ = () diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index f8c486e512c..92962d86904 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -67,15 +67,7 @@ def get_variable(self, needs_lock=True): ds = self.datastore._manager.acquire(needs_lock) return ds.variables[self.variable_name] - def _getitem(self, key): - with self.datastore.lock: - data = self.get_variable(needs_lock=False).data - return data[key] - - def __getitem__(self, key): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem - ) + def _finalize_result(self, data): # Copy data if the source file is mmapped. This makes things consistent # with the netCDF4 library by ensuring we can safely read arrays even # after closing associated files. @@ -88,6 +80,29 @@ def __getitem__(self, key): return np.array(data, dtype=self.dtype, copy=copy) + def _getitem(self, key): + with self.datastore.lock: + data = self.get_variable(needs_lock=False).data + return data[key] + + def _vindex_get(self, indexer: indexing.VectorizedIndexer): + data = indexing.explicit_indexing_adapter( + indexer, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + return self._finalize_result(data) + + def _oindex_get(self, indexer: indexing.OuterIndexer): + data = indexing.explicit_indexing_adapter( + indexer, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + return self._finalize_result(data) + + def __getitem__(self, key): + data = indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + return self._finalize_result(data) + def __setitem__(self, key, value): with self.datastore.lock: data = self.get_variable(needs_lock=False) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index e26c50c8b90..121f0044d2b 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -615,13 +615,7 @@ def shape(self) -> _Shape: return tuple(shape) def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): - array = apply_indexer(self.array, self.key) - else: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ - array = self.array[self.key] - + array = apply_indexer(self.array, self.key) # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) From a452182a9e1c06048b259eeadd7d2dfd8e2afe91 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 27 Mar 2024 21:55:12 -0700 Subject: [PATCH 2/8] Add support for .oindex and .vindex in H5NetCDFArrayWrapper --- xarray/backends/common.py | 4 ++-- xarray/backends/h5netcdf_.py | 12 +++++++++++- xarray/backends/scipy_.py | 8 ++++---- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 04f2d604b1d..f8f073f86a1 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -210,12 +210,12 @@ def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) return self[key] # type: ignore [index] - def _oindex_get(self, indexer: indexing.OuterIndexer): + def _oindex_get(self, key: indexing.OuterIndexer): raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: indexing.VectorizedIndexer): + def _vindex_get(self, key: indexing.VectorizedIndexer): raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 71463193939..07973c3cbd9 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -48,7 +48,17 @@ def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) return ds.variables[self.variable_name] - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem ) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 92962d86904..cd2217c567f 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -85,15 +85,15 @@ def _getitem(self, key): data = self.get_variable(needs_lock=False).data return data[key] - def _vindex_get(self, indexer: indexing.VectorizedIndexer): + def _vindex_get(self, key: indexing.VectorizedIndexer): data = indexing.explicit_indexing_adapter( - indexer, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem ) return self._finalize_result(data) - def _oindex_get(self, indexer: indexing.OuterIndexer): + def _oindex_get(self, key: indexing.OuterIndexer): data = indexing.explicit_indexing_adapter( - indexer, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem ) return self._finalize_result(data) From 67bda60c9dc604891dc3d04d8b102bf3f2096ec6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 27 Mar 2024 22:22:13 -0700 Subject: [PATCH 3/8] Add support for .oindex and .vindex in NetCDF4ArrayWrapper, PydapArrayWrapper, NioArrayWrapper, and ZarrArrayWrapper --- xarray/backends/netCDF4_.py | 12 ++++++++++- xarray/backends/pydap_.py | 12 ++++++++++- xarray/backends/pynio_.py | 12 ++++++++++- xarray/backends/zarr.py | 40 ++++++++++++++++++++++++++++--------- xarray/core/indexing.py | 8 ++------ 5 files changed, 66 insertions(+), 18 deletions(-) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index ae86c4ce384..33d636b59cf 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -97,7 +97,17 @@ def get_array(self, needs_lock=True): variable.set_auto_chartostring(False) return variable - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.OUTER, self._getitem ) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 5a475a7c3be..2ce3a579b2d 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -43,7 +43,17 @@ def shape(self) -> tuple[int, ...]: def dtype(self): return self.array.dtype - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.BASIC, self._getitem ) diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index 75e96ffdc0a..0d9188f229c 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -50,7 +50,17 @@ def get_array(self, needs_lock=True): ds = self.datastore._manager.acquire(needs_lock) return ds.variables[self.variable_name] - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.BASIC, self._getitem ) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 13b1819f206..b1cbd7d2fd9 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -93,16 +93,38 @@ def _vindex(self, key): def _getitem(self, key): return self._array[key] - def __getitem__(self, key): - array = self._array - if isinstance(key, indexing.BasicIndexer): - method = self._getitem - elif isinstance(key, indexing.VectorizedIndexer): - method = self._vindex - elif isinstance(key, indexing.OuterIndexer): - method = self._oindex + def _oindex_get(self, key: indexing.OuterIndexer): + def raw_indexing_method(key): + return self._array.oindex[key] + + return indexing.explicit_indexing_adapter( + key, + self._array.shape, + indexing.IndexingSupport.VECTORIZED, + raw_indexing_method, + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + + def raw_indexing_method(key): + return self._array.vindex[key] + + return indexing.explicit_indexing_adapter( + key, + self._array.shape, + indexing.IndexingSupport.VECTORIZED, + raw_indexing_method, + ) + + def __getitem__(self, key: indexing.BasicIndexer): + def raw_indexing_method(key): + return self._array[key] + return indexing.explicit_indexing_adapter( - key, array.shape, indexing.IndexingSupport.VECTORIZED, method + key, + self._array.shape, + indexing.IndexingSupport.VECTORIZED, + raw_indexing_method, ) # if self.ndim == 0: diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 121f0044d2b..0b3033ae792 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -685,12 +685,8 @@ def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): - array = apply_indexer(self.array, self.key) - else: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ - array = self.array[self.key] + array = apply_indexer(self.array, self.key) + # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) From a8b0d6cbbd7edbde590fa90c442808c5c3a1b8ac Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 1 Apr 2024 13:09:51 -0700 Subject: [PATCH 4/8] add deprecation warning --- xarray/core/indexing.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 0b3033ae792..d524a2a97c6 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -3,6 +3,7 @@ import enum import functools import operator +import warnings from collections import Counter, defaultdict from collections.abc import Hashable, Iterable, Mapping from contextlib import suppress @@ -564,6 +565,14 @@ def __getitem__(self, key: Any): return result +BackendArray_fallback_warning_message = ( + "The array does not support indexing using the .vindex and .oindex properties. " + "The __getitem__ method is being used instead. This fallback behavior will be " + "removed in a future version. Please ensure that the backend array implements " + "support for the .vindex and .oindex properties to avoid potential issues." +) + + class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and outer indexing lazy.""" @@ -615,7 +624,17 @@ def shape(self) -> _Shape: return tuple(shape) def get_duck_array(self): - array = apply_indexer(self.array, self.key) + try: + array = apply_indexer(self.array, self.key) + except AttributeError as _: + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + warnings.warn( + BackendArray_fallback_warning_message, + category=DeprecationWarning, + ) + array = self.array[self.key] + # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) @@ -685,7 +704,16 @@ def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): - array = apply_indexer(self.array, self.key) + try: + array = apply_indexer(self.array, self.key) + except AttributeError as _: + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + warnings.warn( + BackendArray_fallback_warning_message, + category=DeprecationWarning, + ) + array = self.array[self.key] # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass From fe0993c7320326e814dcfe34f837cf8bdb70edd7 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 1 Apr 2024 13:29:08 -0700 Subject: [PATCH 5/8] Fix deprecation warning message formatting --- xarray/core/indexing.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index d524a2a97c6..c94aa6882c3 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -566,9 +566,9 @@ def __getitem__(self, key: Any): BackendArray_fallback_warning_message = ( - "The array does not support indexing using the .vindex and .oindex properties. " + "The array {0} does not support indexing using the .vindex and .oindex properties. " "The __getitem__ method is being used instead. This fallback behavior will be " - "removed in a future version. Please ensure that the backend array implements " + "removed in a future version. Please ensure that the backend array {1} implements " "support for the .vindex and .oindex properties to avoid potential issues." ) @@ -626,11 +626,13 @@ def shape(self) -> _Shape: def get_duck_array(self): try: array = apply_indexer(self.array, self.key) - except AttributeError as _: + except NotImplementedError as _: # If the array is not an ExplicitlyIndexedNDArrayMixin, # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ warnings.warn( - BackendArray_fallback_warning_message, + BackendArray_fallback_warning_message.format( + self.array.__class__.__name__, self.array.__class__.__name__ + ), category=DeprecationWarning, ) array = self.array[self.key] @@ -706,11 +708,13 @@ def shape(self) -> _Shape: def get_duck_array(self): try: array = apply_indexer(self.array, self.key) - except AttributeError as _: + except NotImplementedError as _: # If the array is not an ExplicitlyIndexedNDArrayMixin, # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ warnings.warn( - BackendArray_fallback_warning_message, + BackendArray_fallback_warning_message.format( + self.array.__class__.__name__, self.array.__class__.__name__ + ), category=DeprecationWarning, ) array = self.array[self.key] From ef6afc4e8fbfd3cbdc6891f6f2fbd19a5af7ebf5 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 1 Apr 2024 17:55:09 -0700 Subject: [PATCH 6/8] add tests --- xarray/core/indexing.py | 6 +++-- xarray/tests/test_backends.py | 46 +++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index c94aa6882c3..da9086212d4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -566,9 +566,9 @@ def __getitem__(self, key: Any): BackendArray_fallback_warning_message = ( - "The array {0} does not support indexing using the .vindex and .oindex properties. " + "The array `{0}` does not support indexing using the .vindex and .oindex properties. " "The __getitem__ method is being used instead. This fallback behavior will be " - "removed in a future version. Please ensure that the backend array {1} implements " + "removed in a future version. Please ensure that the backend array `{1}` implements " "support for the .vindex and .oindex properties to avoid potential issues." ) @@ -634,6 +634,7 @@ def get_duck_array(self): self.array.__class__.__name__, self.array.__class__.__name__ ), category=DeprecationWarning, + stacklevel=2, ) array = self.array[self.key] @@ -716,6 +717,7 @@ def get_duck_array(self): self.array.__class__.__name__, self.array.__class__.__name__ ), category=DeprecationWarning, + stacklevel=2, ) array = self.array[self.key] diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index be9b3ef0422..29be17cc316 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5828,3 +5828,49 @@ def test_zarr_region_chunk_partial_offset(tmp_path): # This write is unsafe, and should raise an error, but does not. # with pytest.raises(ValueError): # da.isel(x=slice(5, 25)).chunk(x=(10, 10)).to_zarr(store, region="auto") + + +def test_backend_array_deprecation_warning(capsys): + class CustomBackendArray(xr.backends.common.BackendArray): + def __init__(self): + array = self.get_array() + self.shape = array.shape + self.dtype = array.dtype + + def get_array(self): + return np.arange(10) + + def __getitem__(self, key): + return xr.core.indexing.explicit_indexing_adapter( + key, self.shape, xr.core.indexing.IndexingSupport.BASIC, self._getitem + ) + + def _getitem(self, key): + array = self.get_array() + return array[key] + + cba = CustomBackendArray() + indexer = xr.core.indexing.VectorizedIndexer(key=(np.array([0]),)) + + la = xr.core.indexing.LazilyIndexedArray(cba, indexer) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + la.vindex[indexer].get_duck_array() + + captured = capsys.readouterr() + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) + assert ( + "The array `CustomBackendArray` does not support indexing using the .vindex and .oindex properties." + in str(w[-1].message) + ) + assert "The __getitem__ method is being used instead." in str(w[-1].message) + assert "This fallback behavior will be removed in a future version." in str( + w[-1].message + ) + assert ( + "Please ensure that the backend array `CustomBackendArray` implements support for the .vindex and .oindex properties to avoid potential issues." + in str(w[-1].message) + ) + assert captured.out == "" From 3270b7a43858f8aa99e8c1e8438c8f0764317bb1 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Wed, 3 Apr 2024 13:54:25 -0700 Subject: [PATCH 7/8] Update xarray/core/indexing.py Co-authored-by: Deepak Cherian --- xarray/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index da9086212d4..f2bf3a830b9 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -716,7 +716,7 @@ def get_duck_array(self): BackendArray_fallback_warning_message.format( self.array.__class__.__name__, self.array.__class__.__name__ ), - category=DeprecationWarning, + category=PendingDeprecationWarning, stacklevel=2, ) array = self.array[self.key] From 889dcf883294b6f4fadf2dbf86ead1e1baafc242 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 3 Apr 2024 13:59:29 -0700 Subject: [PATCH 8/8] Update ZarrArrayWrapper class in xarray/backends/zarr.py Co-authored-by: Deepak Cherian --- xarray/backends/zarr.py | 9 --------- xarray/tests/test_backends.py | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index b3e78f7d8ff..69f8b56c083 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -84,15 +84,6 @@ def __init__(self, zarr_array): def get_array(self): return self._array - def _oindex(self, key): - return self._array.oindex[key] - - def _vindex(self, key): - return self._array.vindex[key] - - def _getitem(self, key): - return self._array[key] - def _oindex_get(self, key: indexing.OuterIndexer): def raw_indexing_method(key): return self._array.oindex[key] diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 29be17cc316..529bfbd0dda 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5860,7 +5860,7 @@ def _getitem(self, key): captured = capsys.readouterr() assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) + assert issubclass(w[-1].category, PendingDeprecationWarning) assert ( "The array `CustomBackendArray` does not support indexing using the .vindex and .oindex properties." in str(w[-1].message)