Skip to content

Commit

Permalink
fix: support multiindexed and arbitrarly-named dimensions for grouping
Browse files Browse the repository at this point in the history
  • Loading branch information
FabianHofmann committed Nov 6, 2024
1 parent 78331b9 commit c935112
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 20 deletions.
2 changes: 2 additions & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Upcoming Version

* When writing out an LP file, large variables and constraints are now chunked to avoid memory issues. This is especially useful for large models with constraints with many terms. The chunk size can be set with the `slice_size` argument in the `solve` function.
* Constraints which of the form `<= infinity` and `>= -infinity` are now automatically filtered out when solving. The `solve` function now has a new argument `sanitize_infinities` to control this feature. Default is set to `True`.
* Grouping expressions is now supported on dimensions called "group" and dimensions that have the same name as the grouping object.
* Grouping dimensions which have multiindexed coordinates is now supported.

Version 0.3.15
--------------
Expand Down
1 change: 1 addition & 0 deletions linopy/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
TERM_DIM = "_term"
STACKED_TERM_DIM = "_stacked_term"
GROUPED_TERM_DIM = "_grouped_term"
GROUP_DIM = "_group"
FACTOR_DIM = "_factor"
CONCAT_DIM = "_concat"
HELPER_DIMS = [TERM_DIM, STACKED_TERM_DIM, GROUPED_TERM_DIM, FACTOR_DIM, CONCAT_DIM]
Expand Down
36 changes: 21 additions & 15 deletions linopy/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
EQUAL,
FACTOR_DIM,
GREATER_EQUAL,
GROUP_DIM,
GROUPED_TERM_DIM,
HELPER_DIMS,
LESS_EQUAL,
Expand Down Expand Up @@ -218,42 +219,43 @@ def sum(self, use_fallback: bool = False, **kwargs) -> LinearExpression:
group: pd.Series | pd.DataFrame | xr.DataArray = self.group
if isinstance(group, pd.DataFrame):
# dataframes do not have a name, so we need to set it
group_name = "group"
final_group_name = "group"
else:
group_name = getattr(group, "name", "group") or "group"
final_group_name = getattr(group, "name", "group") or "group"

if isinstance(group, DataArray):
group = group.to_pandas()

int_map = None
if isinstance(group, pd.DataFrame):
index_name = group.index.name
group = group.reindex(self.data.indexes[group.index.name])
group.index.name = index_name # ensure name for multiindex
int_map = get_index_map(*group.values.T)
orig_group = group
group = group.apply(tuple, axis=1).map(int_map)

group_dim = group.index.name
if group_name == group_dim:
raise ValueError(
"Group name cannot be the same as group dimension in non-fallback mode."
)

arrays = [group, group.groupby(group).cumcount()]
idx = pd.MultiIndex.from_arrays(
arrays, names=[group_name, GROUPED_TERM_DIM]
)
coords = Coordinates.from_pandas_multiindex(idx, group_dim)
ds = self.data.assign_coords(coords)
idx = pd.MultiIndex.from_arrays(arrays, names=[GROUP_DIM, GROUPED_TERM_DIM])
new_coords = Coordinates.from_pandas_multiindex(idx, group_dim)
coords = self.data.indexes[group_dim]
names_to_drop = [coords.name]
if isinstance(coords, pd.MultiIndex):
names_to_drop += list(coords.names)
ds = self.data.drop_vars(names_to_drop).assign_coords(new_coords)
ds = ds.unstack(group_dim, fill_value=LinearExpression._fill_value)
ds = LinearExpression._sum(ds, dim=GROUPED_TERM_DIM)

if int_map is not None:
index = ds.indexes["group"].map({v: k for k, v in int_map.items()})
index = ds.indexes[GROUP_DIM].map({v: k for k, v in int_map.items()})
index.names = [str(col) for col in orig_group.columns]
index.name = group_name
coords = Coordinates.from_pandas_multiindex(index, group_name)
ds = xr.Dataset(ds.assign_coords(coords))
index.name = GROUP_DIM
new_coords = Coordinates.from_pandas_multiindex(index, GROUP_DIM)
ds = xr.Dataset(ds.assign_coords(new_coords))

ds = ds.rename({GROUP_DIM: final_group_name})
return LinearExpression(ds, self.model)

def func(ds):
Expand Down Expand Up @@ -1428,6 +1430,8 @@ def to_polars(self) -> pl.DataFrame:

drop = exprwrap(Dataset.drop)

drop_vars = exprwrap(Dataset.drop_vars)

drop_sel = exprwrap(Dataset.drop_sel)

drop_isel = exprwrap(Dataset.drop_isel)
Expand All @@ -1452,6 +1456,8 @@ def to_polars(self) -> pl.DataFrame:

rename = exprwrap(Dataset.rename)

reset_index = exprwrap(Dataset.reset_index)

rename_dims = exprwrap(Dataset.rename_dims)

roll = exprwrap(Dataset.roll)
Expand Down
71 changes: 66 additions & 5 deletions test/test_linear_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,17 @@ def test_linear_expression_diff(v):

@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby(v, use_fallback):
expr = 1 * v
dim = v.dims[0]
groups = xr.DataArray([1] * 10 + [2] * 10, coords=v.coords, name=dim)
grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
assert dim in grouped.dims
assert (grouped.data[dim] == [1, 2]).all()
assert grouped.nterm == 10


@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_on_same_name_as_target_dim(v, use_fallback):
expr = 1 * v
groups = xr.DataArray([1] * 10 + [2] * 10, coords=v.coords)
grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
Expand Down Expand Up @@ -719,20 +730,31 @@ def test_linear_expression_groupby_series_with_name(v, use_fallback):


@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_series_false(v, use_fallback):
def test_linear_expression_groupby_with_series_with_same_group_name(v, use_fallback):
"""
Test that the group by works with a series whose name is the same as
the dimension to group.
"""
expr = 1 * v
groups = pd.Series([1] * 10 + [2] * 10, index=v.indexes["dim_2"])
groups.name = "dim_2"
if not use_fallback:
with pytest.raises(ValueError):
expr.groupby(groups).sum(use_fallback=use_fallback)
return
grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
assert "dim_2" in grouped.dims
assert (grouped.data.dim_2 == [1, 2]).all()
assert grouped.nterm == 10


@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_series_on_multiindex(u, use_fallback):
expr = 1 * u
len_grouped_dim = len(u.data["dim_3"])
groups = pd.Series([1] * len_grouped_dim, index=u.indexes["dim_3"])
grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
assert "group" in grouped.dims
assert (grouped.data.group == [1]).all()
assert grouped.nterm == len_grouped_dim


@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_dataframe(v, use_fallback):
expr = 1 * v
Expand All @@ -751,6 +773,45 @@ def test_linear_expression_groupby_with_dataframe(v, use_fallback):
assert grouped.nterm == 3


@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_dataframe_with_same_group_name(v, use_fallback):
"""
Test that the group by works with a dataframe whose column name is the same as
the dimension to group.
"""
expr = 1 * v
groups = pd.DataFrame(
{"dim_2": [1] * 10 + [2] * 10, "b": list(range(4)) * 5},
index=v.indexes["dim_2"],
)
if use_fallback:
with pytest.raises(ValueError):
expr.groupby(groups).sum(use_fallback=use_fallback)
return

grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
index = pd.MultiIndex.from_frame(groups)
assert "group" in grouped.dims
assert set(grouped.data.group.values) == set(index.values)
assert grouped.nterm == 3


@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_dataframe_on_multiindex(u, use_fallback):
expr = 1 * u
len_grouped_dim = len(u.data["dim_3"])
groups = pd.DataFrame({"a": [1] * len_grouped_dim}, index=u.indexes["dim_3"])

if use_fallback:
with pytest.raises(ValueError):
expr.groupby(groups).sum(use_fallback=use_fallback)
return
grouped = expr.groupby(groups).sum(use_fallback=use_fallback)
assert "group" in grouped.dims
assert isinstance(grouped.indexes["group"], pd.MultiIndex)
assert grouped.nterm == len_grouped_dim


@pytest.mark.parametrize("use_fallback", [True, False])
def test_linear_expression_groupby_with_dataarray(v, use_fallback):
expr = 1 * v
Expand Down

0 comments on commit c935112

Please sign in to comment.