Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial refactor for NamedArray #8075

Merged
merged 83 commits into from
Sep 27, 2023
Merged
Changes from 1 commit
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
81098cf
initial prototype for NamedArray
andersy005 Aug 16, 2023
27910dc
move NDArrayMixin and NdimSizeLenMixin inside named_array
andersy005 Aug 16, 2023
1a02dac
vendor is_duck_dask_array
andersy005 Aug 16, 2023
636b156
vendor Frozen object
andersy005 Aug 16, 2023
9ba6c84
update import
andersy005 Aug 17, 2023
b1a1de0
move _default sentinel value
andersy005 Aug 17, 2023
1e11e87
rename subpackage to namedarray per @TomNicholas suggestion
andersy005 Aug 17, 2023
ad364f0
Remove NdimSizeLenMixin
andersy005 Aug 17, 2023
d1e8d2a
fix typing
andersy005 Aug 17, 2023
5654063
Merge branch 'main' into named-array
andersy005 Aug 17, 2023
098eb0c
add annotations
andersy005 Aug 17, 2023
38c105a
Remove NDArrayMixin
andersy005 Aug 17, 2023
1fdd281
Apply suggestions from code review
andersy005 Aug 18, 2023
7060268
Merge branch 'main' into named-array
andersy005 Aug 18, 2023
33c2216
fix typing
andersy005 Aug 21, 2023
2c9223d
fix return type
andersy005 Aug 21, 2023
0e4afe0
revert NDArrayMixin
andersy005 Aug 22, 2023
ab79fb1
[WIP] as_compatible_data refactor
dcherian Aug 22, 2023
e70e98a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 22, 2023
a393d7f
duplicate sentinel value and leave the original sentinel object alone
andersy005 Aug 23, 2023
7b8316e
Apply suggestions from code review
andersy005 Aug 23, 2023
d74b802
use DuckArray
andersy005 Aug 23, 2023
acfdb90
Apply suggestions from code review
andersy005 Aug 23, 2023
d8b79eb
Merge branch 'main' into named-array
andersy005 Aug 23, 2023
2ece3c0
use sentinel value from xarray
andersy005 Aug 23, 2023
6fb79e6
remove unused code
andersy005 Aug 23, 2023
9545ca2
fix variable constructor
andersy005 Aug 23, 2023
e41a27c
fix as_compatible_data utility function
andersy005 Aug 23, 2023
259e0bd
move _to_dense and _non_zero to NamedArray
andersy005 Aug 23, 2023
a7ec770
more typing
andersy005 Aug 24, 2023
c55f35a
add initial tests
andersy005 Aug 24, 2023
2335bba
Merge branch 'main' into named-array
andersy005 Aug 30, 2023
34a262a
Apply suggestions from code review
andersy005 Aug 31, 2023
4b22b29
Merge branch 'main' into named-array
andersy005 Aug 31, 2023
790bfc2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 31, 2023
b909c87
Merge branch 'main' into pr/8075
Illviljan Sep 11, 2023
a31da00
attempt to fix some mypy errors
Illviljan Sep 11, 2023
b6c0af5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 11, 2023
b1e42aa
Update core.py
Illviljan Sep 11, 2023
45f9d99
Merge branch 'named-array' of https://github.com/andersy005/xarray in…
Illviljan Sep 11, 2023
2661001
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 11, 2023
b2a1cda
Update core.py
Illviljan Sep 11, 2023
d2971cc
Merge branch 'named-array' of https://github.com/andersy005/xarray in…
Illviljan Sep 11, 2023
b25a8ff
All input data can be arraylike
Illviljan Sep 11, 2023
06d77ad
Update core.py
Illviljan Sep 11, 2023
96ac4ec
Update core.py
Illviljan Sep 11, 2023
760cb48
get and set attrs at the same level.
Illviljan Sep 11, 2023
15c7300
data doesn't have to be ndarray
Illviljan Sep 11, 2023
bbe3db4
avoid redefining typing use new variable names instead
Illviljan Sep 11, 2023
2233662
import on runtime as well to be able to cast
Illviljan Sep 11, 2023
fb2ca4d
requires ufunc and function to be a valid duck array
Illviljan Sep 11, 2023
cf91823
Add array_namespace
Illviljan Sep 15, 2023
f21297b
Update test_dataset.py
Illviljan Sep 15, 2023
4fafb02
Update test_dataset.py
Illviljan Sep 15, 2023
c07fa0d
Merge branch 'main' into named-array
andersy005 Sep 15, 2023
c5fb91d
remove Frozen
andersy005 Sep 15, 2023
f2d3c95
Merge branch 'main' into named-array
andersy005 Sep 19, 2023
abc02c5
Merge branch 'main' into named-array
andersy005 Sep 19, 2023
4708ca2
update tests
andersy005 Sep 19, 2023
ff1b4de
update tests
andersy005 Sep 20, 2023
5455a44
Merge branch 'main' into named-array
andersy005 Sep 20, 2023
2162063
switch to functional API
andersy005 Sep 20, 2023
e530dd1
add fastpath
andersy005 Sep 20, 2023
9b3590c
Merge branch 'main' into named-array
andersy005 Sep 20, 2023
0f42857
Test making sizes dict[Hashable, int]
Illviljan Sep 20, 2023
afc7228
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 20, 2023
32ec4ea
A lot of errors... Try Mapping instead
Illviljan Sep 20, 2023
76bb881
Update groupby.py
Illviljan Sep 20, 2023
2d59cf5
Merge branch 'main' into named-array
andersy005 Sep 21, 2023
df77741
Update types.py
Illviljan Sep 21, 2023
8bf13b5
Apply suggestions from code review
andersy005 Sep 25, 2023
89a0010
Merge branch 'main' into named-array
andersy005 Sep 25, 2023
2f0192f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 25, 2023
3f22902
update docstrings
andersy005 Sep 25, 2023
f618625
update error messages
andersy005 Sep 25, 2023
94bf6c4
update tests
andersy005 Sep 25, 2023
0ec7876
test explicitly index array
andersy005 Sep 25, 2023
fb4ed12
update tests
andersy005 Sep 25, 2023
f0cfc11
remove unused types
andersy005 Sep 25, 2023
48fcf9b
Update xarray/tests/test_namedarray.py
andersy005 Sep 26, 2023
5f4e127
Merge branch 'main' into named-array
andersy005 Sep 26, 2023
2d9d7ff
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 26, 2023
2ef5064
use Self
andersy005 Sep 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
use Self
andersy005 committed Sep 26, 2023

Verified

This commit was signed with the committer’s verified signature.
bep Bjørn Erik Pedersen
commit 2ef5064f1adb0cd9f314fff7bfe9bd6c24caf5ae
93 changes: 50 additions & 43 deletions xarray/namedarray/core.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@

import copy
import math
import sys
import typing
from collections.abc import Hashable, Iterable, Mapping

@@ -24,6 +25,18 @@
Dims = tuple[Hashable, ...]


try:
if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self
except ImportError:
if typing.TYPE_CHECKING:
raise
else:
Self: typing.Any = None


# TODO: Add tests!
def as_compatible_data(
data: T_DuckArray | np.typing.ArrayLike, fastpath: bool = False
@@ -97,7 +110,7 @@ def __init__(
self._attrs: dict | None = dict(attrs) if attrs else None

@property
def ndim(self: T_NamedArray) -> int:
def ndim(self) -> int:
"""
Number of array dimensions.

@@ -108,7 +121,7 @@ def ndim(self: T_NamedArray) -> int:
return len(self.shape)

@property
def size(self: T_NamedArray) -> int:
def size(self) -> int:
"""
Number of elements in the array.

@@ -127,7 +140,7 @@ def __len__(self) -> int:
raise TypeError("len() of unsized object") from exc

@property
def dtype(self: T_NamedArray) -> np.dtype:
def dtype(self) -> np.dtype:
"""
Data-type of the array’s elements.

@@ -139,7 +152,7 @@ def dtype(self: T_NamedArray) -> np.dtype:
return self._data.dtype

@property
def shape(self: T_NamedArray) -> tuple[int, ...]:
def shape(self) -> tuple[int, ...]:
"""


@@ -157,7 +170,7 @@ def shape(self: T_NamedArray) -> tuple[int, ...]:
return self._data.shape

@property
def nbytes(self: T_NamedArray) -> int:
def nbytes(self) -> int:
"""
Total bytes consumed by the elements of the data array.

@@ -170,15 +183,15 @@ def nbytes(self: T_NamedArray) -> int:
return self.size * self.dtype.itemsize

@property
def dims(self: T_NamedArray) -> Dims:
def dims(self) -> Dims:
"""Tuple of dimension names with which this NamedArray is associated."""
return self._dims

@dims.setter
def dims(self: T_NamedArray, value: DimsInput) -> None:
def dims(self, value: DimsInput) -> None:
self._dims = self._parse_dimensions(value)

def _parse_dimensions(self: T_NamedArray, dims: DimsInput) -> Dims:
def _parse_dimensions(self, dims: DimsInput) -> Dims:
dims = (dims,) if isinstance(dims, str) else tuple(dims)
if len(dims) != self.ndim:
raise ValueError(
@@ -188,14 +201,14 @@ def _parse_dimensions(self: T_NamedArray, dims: DimsInput) -> Dims:
return dims

@property
def attrs(self: T_NamedArray) -> dict[typing.Any, typing.Any]:
def attrs(self) -> dict[typing.Any, typing.Any]:
"""Dictionary of local attributes on this NamedArray."""
if self._attrs is None:
self._attrs = {}
return self._attrs

@attrs.setter
def attrs(self: T_NamedArray, value: Mapping) -> None:
def attrs(self, value: Mapping) -> None:
self._attrs = dict(value)

def _check_shape(self, new_data: T_DuckArray) -> None:
@@ -206,7 +219,7 @@ def _check_shape(self, new_data: T_DuckArray) -> None:
)

@property
def data(self: T_NamedArray):
def data(self):
"""
The NamedArray's data as an array. The underlying array type
(e.g. dask, sparse, pint) is preserved.
@@ -216,13 +229,13 @@ def data(self: T_NamedArray):
return self._data
dcherian marked this conversation as resolved.
Show resolved Hide resolved

@data.setter
def data(self: T_NamedArray, data: T_DuckArray | np.typing.ArrayLike) -> None:
def data(self, data: T_DuckArray | np.typing.ArrayLike) -> None:
data = as_compatible_data(data)
self._check_shape(data)
self._data = data

@property
def real(self: T_NamedArray) -> T_NamedArray:
def real(self) -> Self:
"""
The real part of the NamedArray.

@@ -233,7 +246,7 @@ def real(self: T_NamedArray) -> T_NamedArray:
return self._replace(data=self.data.real)

@property
def imag(self: T_NamedArray) -> T_NamedArray:
def imag(self) -> Self:
"""
The imaginary part of the NamedArray.

@@ -243,50 +256,48 @@ def imag(self: T_NamedArray) -> T_NamedArray:
"""
return self._replace(data=self.data.imag)

def __dask_tokenize__(self: T_NamedArray):
def __dask_tokenize__(self):
# Use v.data, instead of v._data, in order to cope with the wrappers
# around NetCDF and the like
from dask.base import normalize_token

return normalize_token((type(self), self._dims, self.data, self.attrs))

def __dask_graph__(self: T_NamedArray):
def __dask_graph__(self):
return self._data.__dask_graph__() if is_duck_dask_array(self._data) else None

def __dask_keys__(self: T_NamedArray):
def __dask_keys__(self):
return self._data.__dask_keys__()

def __dask_layers__(self: T_NamedArray):
def __dask_layers__(self):
return self._data.__dask_layers__()

@property
def __dask_optimize__(self: T_NamedArray) -> typing.Callable:
def __dask_optimize__(self) -> typing.Callable:
return self._data.__dask_optimize__

@property
def __dask_scheduler__(self: T_NamedArray) -> typing.Callable:
def __dask_scheduler__(self) -> typing.Callable:
return self._data.__dask_scheduler__

def __dask_postcompute__(
self: T_NamedArray,
self,
) -> tuple[typing.Callable, tuple[typing.Any, ...]]:
array_func, array_args = self._data.__dask_postcompute__()
return self._dask_finalize, (array_func,) + array_args

def __dask_postpersist__(
self: T_NamedArray,
self,
) -> tuple[typing.Callable, tuple[typing.Any, ...]]:
array_func, array_args = self._data.__dask_postpersist__()
return self._dask_finalize, (array_func,) + array_args

def _dask_finalize(
self: T_NamedArray, results, array_func, *args, **kwargs
) -> T_NamedArray:
def _dask_finalize(self, results, array_func, *args, **kwargs) -> Self:
data = array_func(results, *args, **kwargs)
return type(self)(self._dims, data, attrs=self._attrs)

@property
def chunks(self: T_NamedArray) -> tuple[tuple[int, ...], ...] | None:
def chunks(self) -> tuple[tuple[int, ...], ...] | None:
"""
Tuple of block lengths for this NamedArray's data, in order of dimensions, or None if
the underlying data is not a dask array.
@@ -301,7 +312,7 @@ def chunks(self: T_NamedArray) -> tuple[tuple[int, ...], ...] | None:

@property
def chunksizes(
self: T_NamedArray,
self,
) -> typing.Mapping[typing.Any, tuple[int, ...]]:
"""
Mapping from dimension names to block lengths for this namedArray's data, or None if
@@ -323,13 +334,11 @@ def chunksizes(
return {}
andersy005 marked this conversation as resolved.
Show resolved Hide resolved

@property
def sizes(self: T_NamedArray) -> dict[Hashable, int]:
def sizes(self) -> dict[Hashable, int]:
"""Ordered mapping from dimension names to lengths."""
return dict(zip(self.dims, self.shape))

def _replace(
self: T_NamedArray, dims=_default, data=_default, attrs=_default
) -> T_NamedArray:
def _replace(self, dims=_default, data=_default, attrs=_default) -> Self:
if dims is _default:
dims = copy.copy(self._dims)
if data is _default:
@@ -339,11 +348,11 @@ def _replace(
return type(self)(dims, data, attrs)

def _copy(
self: T_NamedArray,
self,
deep: bool = True,
data: T_DuckArray | np.typing.ArrayLike | None = None,
memo: dict[int, typing.Any] | None = None,
) -> T_NamedArray:
) -> Self:
if data is None:
ndata = self._data
if deep:
@@ -358,19 +367,17 @@ def _copy(

return self._replace(data=ndata, attrs=attrs)

def __copy__(self: T_NamedArray) -> T_NamedArray:
def __copy__(self) -> Self:
return self._copy(deep=False)

def __deepcopy__(
self: T_NamedArray, memo: dict[int, typing.Any] | None = None
) -> T_NamedArray:
def __deepcopy__(self, memo: dict[int, typing.Any] | None = None) -> Self:
return self._copy(deep=True, memo=memo)

def copy(
self: T_NamedArray,
self,
deep: bool = True,
data: T_DuckArray | np.typing.ArrayLike | None = None,
) -> T_NamedArray:
) -> Self:
"""Returns a copy of this object.

If `deep=True`, the data array is loaded into memory and copied onto
@@ -398,18 +405,18 @@ def copy(
"""
return self._copy(deep=deep, data=data)

def _nonzero(self: T_NamedArray) -> tuple[T_NamedArray, ...]:
def _nonzero(self) -> tuple[Self, ...]:
"""Equivalent numpy's nonzero but returns a tuple of NamedArrays."""
# TODO we should replace dask's native nonzero
# after https://github.com/dask/dask/issues/1076 is implemented.
nonzeros = np.nonzero(self.data)
return tuple(type(self)((dim,), nz) for nz, dim in zip(nonzeros, self.dims))

def _as_sparse(
self: T_NamedArray,
self,
sparse_format: str | Default = _default,
fill_value=dtypes.NA,
) -> T_NamedArray:
) -> Self:
"""
use sparse-array as backend.
"""
@@ -431,7 +438,7 @@ def _as_sparse(
data = as_sparse(self.data.astype(dtype), fill_value=fill_value)
return self._replace(data=data)

def _to_dense(self: T_NamedArray) -> T_NamedArray:
def _to_dense(self) -> Self:
"""
Change backend from sparse to np.array
"""