diff --git a/doc/release_notes.rst b/doc/release_notes.rst index be256a60..e3fc607a 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -6,6 +6,8 @@ Upcoming Version * When writing out an LP file, large variables and constraints are now chunked to avoid memory issues. This is especially useful for large models with constraints with many terms. The chunk size can be set with the `slice_size` argument in the `solve` function. * Constraints which of the form `<= infinity` and `>= -infinity` are now automatically filtered out when solving. The `solve` function now has a new argument `sanitize_infinities` to control this feature. Default is set to `True`. +* Grouping expressions is now supported on dimensions called "group" and dimensions that have the same name as the grouping object. +* Grouping dimensions which have multiindexed coordinates is now supported. Version 0.3.15 -------------- diff --git a/linopy/constants.py b/linopy/constants.py index 5941d014..22694297 100644 --- a/linopy/constants.py +++ b/linopy/constants.py @@ -36,6 +36,7 @@ TERM_DIM = "_term" STACKED_TERM_DIM = "_stacked_term" GROUPED_TERM_DIM = "_grouped_term" +GROUP_DIM = "_group" FACTOR_DIM = "_factor" CONCAT_DIM = "_concat" HELPER_DIMS = [TERM_DIM, STACKED_TERM_DIM, GROUPED_TERM_DIM, FACTOR_DIM, CONCAT_DIM] diff --git a/linopy/expressions.py b/linopy/expressions.py index 08af65a8..713efd77 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -62,6 +62,7 @@ EQUAL, FACTOR_DIM, GREATER_EQUAL, + GROUP_DIM, GROUPED_TERM_DIM, HELPER_DIMS, LESS_EQUAL, @@ -218,42 +219,43 @@ def sum(self, use_fallback: bool = False, **kwargs) -> LinearExpression: group: pd.Series | pd.DataFrame | xr.DataArray = self.group if isinstance(group, pd.DataFrame): # dataframes do not have a name, so we need to set it - group_name = "group" + final_group_name = "group" else: - group_name = getattr(group, "name", "group") or "group" + final_group_name = getattr(group, "name", "group") or "group" if isinstance(group, DataArray): group = group.to_pandas() int_map = None if isinstance(group, pd.DataFrame): + index_name = group.index.name group = group.reindex(self.data.indexes[group.index.name]) + group.index.name = index_name # ensure name for multiindex int_map = get_index_map(*group.values.T) orig_group = group group = group.apply(tuple, axis=1).map(int_map) group_dim = group.index.name - if group_name == group_dim: - raise ValueError( - "Group name cannot be the same as group dimension in non-fallback mode." - ) arrays = [group, group.groupby(group).cumcount()] - idx = pd.MultiIndex.from_arrays( - arrays, names=[group_name, GROUPED_TERM_DIM] - ) - coords = Coordinates.from_pandas_multiindex(idx, group_dim) - ds = self.data.assign_coords(coords) + idx = pd.MultiIndex.from_arrays(arrays, names=[GROUP_DIM, GROUPED_TERM_DIM]) + new_coords = Coordinates.from_pandas_multiindex(idx, group_dim) + coords = self.data.indexes[group_dim] + names_to_drop = [coords.name] + if isinstance(coords, pd.MultiIndex): + names_to_drop += list(coords.names) + ds = self.data.drop_vars(names_to_drop).assign_coords(new_coords) ds = ds.unstack(group_dim, fill_value=LinearExpression._fill_value) ds = LinearExpression._sum(ds, dim=GROUPED_TERM_DIM) if int_map is not None: - index = ds.indexes["group"].map({v: k for k, v in int_map.items()}) + index = ds.indexes[GROUP_DIM].map({v: k for k, v in int_map.items()}) index.names = [str(col) for col in orig_group.columns] - index.name = group_name - coords = Coordinates.from_pandas_multiindex(index, group_name) - ds = xr.Dataset(ds.assign_coords(coords)) + index.name = GROUP_DIM + new_coords = Coordinates.from_pandas_multiindex(index, GROUP_DIM) + ds = xr.Dataset(ds.assign_coords(new_coords)) + ds = ds.rename({GROUP_DIM: final_group_name}) return LinearExpression(ds, self.model) def func(ds): @@ -1428,6 +1430,8 @@ def to_polars(self) -> pl.DataFrame: drop = exprwrap(Dataset.drop) + drop_vars = exprwrap(Dataset.drop_vars) + drop_sel = exprwrap(Dataset.drop_sel) drop_isel = exprwrap(Dataset.drop_isel) @@ -1452,6 +1456,8 @@ def to_polars(self) -> pl.DataFrame: rename = exprwrap(Dataset.rename) + reset_index = exprwrap(Dataset.reset_index) + rename_dims = exprwrap(Dataset.rename_dims) roll = exprwrap(Dataset.roll) diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index 277e6443..b5db29b8 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -668,6 +668,17 @@ def test_linear_expression_diff(v): @pytest.mark.parametrize("use_fallback", [True, False]) def test_linear_expression_groupby(v, use_fallback): + expr = 1 * v + dim = v.dims[0] + groups = xr.DataArray([1] * 10 + [2] * 10, coords=v.coords, name=dim) + grouped = expr.groupby(groups).sum(use_fallback=use_fallback) + assert dim in grouped.dims + assert (grouped.data[dim] == [1, 2]).all() + assert grouped.nterm == 10 + + +@pytest.mark.parametrize("use_fallback", [True, False]) +def test_linear_expression_groupby_on_same_name_as_target_dim(v, use_fallback): expr = 1 * v groups = xr.DataArray([1] * 10 + [2] * 10, coords=v.coords) grouped = expr.groupby(groups).sum(use_fallback=use_fallback) @@ -719,20 +730,31 @@ def test_linear_expression_groupby_series_with_name(v, use_fallback): @pytest.mark.parametrize("use_fallback", [True, False]) -def test_linear_expression_groupby_with_series_false(v, use_fallback): +def test_linear_expression_groupby_with_series_with_same_group_name(v, use_fallback): + """ + Test that the group by works with a series whose name is the same as + the dimension to group. + """ expr = 1 * v groups = pd.Series([1] * 10 + [2] * 10, index=v.indexes["dim_2"]) groups.name = "dim_2" - if not use_fallback: - with pytest.raises(ValueError): - expr.groupby(groups).sum(use_fallback=use_fallback) - return grouped = expr.groupby(groups).sum(use_fallback=use_fallback) assert "dim_2" in grouped.dims assert (grouped.data.dim_2 == [1, 2]).all() assert grouped.nterm == 10 +@pytest.mark.parametrize("use_fallback", [True, False]) +def test_linear_expression_groupby_with_series_on_multiindex(u, use_fallback): + expr = 1 * u + len_grouped_dim = len(u.data["dim_3"]) + groups = pd.Series([1] * len_grouped_dim, index=u.indexes["dim_3"]) + grouped = expr.groupby(groups).sum(use_fallback=use_fallback) + assert "group" in grouped.dims + assert (grouped.data.group == [1]).all() + assert grouped.nterm == len_grouped_dim + + @pytest.mark.parametrize("use_fallback", [True, False]) def test_linear_expression_groupby_with_dataframe(v, use_fallback): expr = 1 * v @@ -751,6 +773,45 @@ def test_linear_expression_groupby_with_dataframe(v, use_fallback): assert grouped.nterm == 3 +@pytest.mark.parametrize("use_fallback", [True, False]) +def test_linear_expression_groupby_with_dataframe_with_same_group_name(v, use_fallback): + """ + Test that the group by works with a dataframe whose column name is the same as + the dimension to group. + """ + expr = 1 * v + groups = pd.DataFrame( + {"dim_2": [1] * 10 + [2] * 10, "b": list(range(4)) * 5}, + index=v.indexes["dim_2"], + ) + if use_fallback: + with pytest.raises(ValueError): + expr.groupby(groups).sum(use_fallback=use_fallback) + return + + grouped = expr.groupby(groups).sum(use_fallback=use_fallback) + index = pd.MultiIndex.from_frame(groups) + assert "group" in grouped.dims + assert set(grouped.data.group.values) == set(index.values) + assert grouped.nterm == 3 + + +@pytest.mark.parametrize("use_fallback", [True, False]) +def test_linear_expression_groupby_with_dataframe_on_multiindex(u, use_fallback): + expr = 1 * u + len_grouped_dim = len(u.data["dim_3"]) + groups = pd.DataFrame({"a": [1] * len_grouped_dim}, index=u.indexes["dim_3"]) + + if use_fallback: + with pytest.raises(ValueError): + expr.groupby(groups).sum(use_fallback=use_fallback) + return + grouped = expr.groupby(groups).sum(use_fallback=use_fallback) + assert "group" in grouped.dims + assert isinstance(grouped.indexes["group"], pd.MultiIndex) + assert grouped.nterm == len_grouped_dim + + @pytest.mark.parametrize("use_fallback", [True, False]) def test_linear_expression_groupby_with_dataarray(v, use_fallback): expr = 1 * v