diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ac60994d35b..dea110b5e46 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,10 @@ Bug fixes - Sync with cftime by removing `dayofwk=-1` for cftime>=1.0.4. By `Anderson Banihirwe `_. +- Fix :py:meth:`xarray.core.groupby.DataArrayGroupBy.reduce` and + :py:meth:`xarray.core.groupby.DatasetGroupBy.reduce` when reducing over multiple dimensions. + (:issue:`3402`). By `Deepak Cherian `_ + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 68bd28ddb12..62c055fed51 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -15,6 +15,7 @@ from .utils import ( either_dict_or_kwargs, hashable, + is_scalar, maybe_wrap_array, peek_at, safe_cast_to_index, @@ -22,6 +23,18 @@ from .variable import IndexVariable, Variable, as_variable +def check_reduce_dims(reduce_dims, dimensions): + + if reduce_dims is not ...: + if is_scalar(reduce_dims): + reduce_dims = [reduce_dims] + if any([dim not in dimensions for dim in reduce_dims]): + raise ValueError( + "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r." + % (reduce_dims, dimensions) + ) + + def unique_value_groups(ar, sort=True): """Group an array by its unique values. @@ -794,15 +807,11 @@ def reduce( if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if dim is not ... and dim not in self.dims: - raise ValueError( - "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." - % (dim, self.dims) - ) - def reduce_array(ar): return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) + check_reduce_dims(dim, self.dims) + return self.apply(reduce_array, shortcut=shortcut) @@ -895,11 +904,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) - if dim is not ... and dim not in self.dims: - raise ValueError( - "cannot reduce over dimension %r. expected either '...' to reduce over all dimensions or one or more of %r." - % (dim, self.dims) - ) + check_reduce_dims(dim, self.dims) return self.apply(reduce_dataset) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b13527bc098..101bb44660c 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2560,15 +2560,6 @@ def change_metadata(x): expected = change_metadata(expected) assert_equal(expected, actual) - def test_groupby_reduce_dimension_error(self): - array = self.make_groupby_example_array() - grouped = array.groupby("y") - with raises_regex(ValueError, "cannot reduce over dimension 'y'"): - grouped.mean() - - grouped = array.groupby("y", squeeze=False) - assert_identical(array, grouped.mean()) - def test_groupby_math(self): array = self.make_groupby_example_array() for squeeze in [True, False]: diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index a6de41beb66..d74d684dc54 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -5,7 +5,23 @@ import xarray as xr from xarray.core.groupby import _consolidate_slices -from . import assert_identical, raises_regex +from . import assert_allclose, assert_identical, raises_regex + + +@pytest.fixture +def dataset(): + ds = xr.Dataset( + {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, + {"x": ["a", "b", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, + ) + ds["boo"] = (("z", "y"), [["f", "g", "h", "j"]] * 2) + + return ds + + +@pytest.fixture +def array(dataset): + return dataset["foo"] def test_consolidate_slices(): @@ -21,25 +37,17 @@ def test_consolidate_slices(): _consolidate_slices([slice(3), 4]) -def test_groupby_dims_property(): - ds = xr.Dataset( - {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, - {"x": ["a", "bcd", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, - ) +def test_groupby_dims_property(dataset): + assert dataset.groupby("x").dims == dataset.isel(x=1).dims + assert dataset.groupby("y").dims == dataset.isel(y=1).dims - assert ds.groupby("x").dims == ds.isel(x=1).dims - assert ds.groupby("y").dims == ds.isel(y=1).dims - - stacked = ds.stack({"xy": ("x", "y")}) + stacked = dataset.stack({"xy": ("x", "y")}) assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims -def test_multi_index_groupby_apply(): +def test_multi_index_groupby_apply(dataset): # regression test for GH873 - ds = xr.Dataset( - {"foo": (("x", "y"), np.random.randn(3, 4))}, - {"x": ["a", "b", "c"], "y": [1, 2, 3, 4]}, - ) + ds = dataset.isel(z=1, drop=True)[["foo"]] doubled = 2 * ds group_doubled = ( ds.stack(space=["x", "y"]) @@ -276,6 +284,24 @@ def test_groupby_grouping_errors(): dataset.to_array().groupby(dataset.foo * np.nan) +def test_groupby_reduce_dimension_error(array): + grouped = array.groupby("y") + with raises_regex(ValueError, "cannot reduce over dimensions"): + grouped.mean() + + with raises_regex(ValueError, "cannot reduce over dimensions"): + grouped.mean("huh") + + with raises_regex(ValueError, "cannot reduce over dimensions"): + grouped.mean(("x", "y", "asd")) + + grouped = array.groupby("y", squeeze=False) + assert_identical(array, grouped.mean()) + + assert_identical(array.mean("x"), grouped.reduce(np.mean, "x")) + assert_allclose(array.mean(["x", "z"]), grouped.reduce(np.mean, ["x", "z"])) + + def test_groupby_bins_timeseries(): ds = xr.Dataset() ds["time"] = xr.DataArray(