Skip to content

Commit

Permalink
Fix groupby-resample KeyError when resampling on Index and giving exp…
Browse files Browse the repository at this point in the history
…licit list of columns. (pandas-dev#50876)

* Add failing test reproducing groupby-resample KeyError (pandas-dev#50840)

* Fix groupby-resample KeyError (pandas-dev#50840) by adding None check.

* Update whatsnew for pandas-dev#50840

* Improve coverage with multi and missing column groupby-resample tests.

* Refactor groupby-resample tests via TestCase to remove duplicate code.

* Revert "Refactor groupby-resample tests via TestCase to remove duplicate code."

This reverts commit d522606.

* Fix typo in bug fix entry for pandas-dev#50840 in doc/source/whatsnew/v2.0.0.rst
  • Loading branch information
czroth authored and pooja-subramaniam committed Jan 25, 2023
1 parent c17c138 commit 75ed44e
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,7 @@ Groupby/resample/rolling
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`)
- Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`)
- Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`)
- Bug in :meth:`.DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`)
-

Reshaping
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,7 +1202,7 @@ def _gotitem(self, key, ndim, subset=None):

# Try to select from a DataFrame, falling back to a Series
try:
if isinstance(key, list) and self.key not in key:
if isinstance(key, list) and self.key not in key and self.key is not None:
key.append(self.key)
groupby = self._groupby[key]
except IndexError:
Expand Down
79 changes: 79 additions & 0 deletions pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,3 +536,82 @@ def test_groupby_resample_size_all_index_same():
),
)
tm.assert_series_equal(result, expected)


def test_groupby_resample_on_index_with_list_of_keys():
# GH 50840
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [3, 1, 4, 1, 5, 9, 2, 6],
},
index=Series(
date_range(start="2016-01-01", periods=8),
name="date",
),
)
result = df.groupby("group").resample("2D")[["val"]].mean()
expected = DataFrame(
data={
"val": [2.0, 2.5, 7.0, 4.0],
},
index=Index(
data=[
(0, Timestamp("2016-01-01")),
(0, Timestamp("2016-01-03")),
(1, Timestamp("2016-01-05")),
(1, Timestamp("2016-01-07")),
],
name=("group", "date"),
),
)
tm.assert_frame_equal(result, expected)


def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
# GH 50876
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
},
index=Series(
date_range(start="2016-01-01", periods=8),
name="date",
),
)
result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
expected = DataFrame(
data={
"first_val": [2.0, 2.5, 7.0, 4.0],
"second_val": [4.5, 4.5, 5.0, 4.5],
},
index=Index(
data=[
(0, Timestamp("2016-01-01")),
(0, Timestamp("2016-01-03")),
(1, Timestamp("2016-01-05")),
(1, Timestamp("2016-01-07")),
],
name=("group", "date"),
),
)
tm.assert_frame_equal(result, expected)


def test_groupby_resample_on_index_with_list_of_keys_missing_column():
# GH 50876
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [3, 1, 4, 1, 5, 9, 2, 6],
},
index=Series(
date_range(start="2016-01-01", periods=8),
name="date",
),
)
with pytest.raises(KeyError, match="Columns not found"):
df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean()

0 comments on commit 75ed44e

Please sign in to comment.