Skip to content

Commit

Permalink
Implement Groupby pct_change (#11144)
Browse files Browse the repository at this point in the history
Subsequent to #9805, this PR adds support for Groupby.pct_change()

Fixes #9606
Replaces #10444

Authors:
  - Sheilah Kirui (https://github.com/skirui-source)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: #11144
  • Loading branch information
skirui-source authored Jul 18, 2022
1 parent 5f9da83 commit f220e90
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 0 deletions.
42 changes: 42 additions & 0 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1416,6 +1416,48 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
result = self._mimic_pandas_order(result)
return result._copy_type_metadata(values)

def pct_change(
self, periods=1, fill_method="ffill", axis=0, limit=None, freq=None
):
"""
Calculates the percent change between sequential elements
in the group.
Parameters
----------
periods : int, default 1
Periods to shift for forming percent change.
fill_method : str, default 'ffill'
How to handle NAs before computing percent changes.
limit : int, optional
The number of consecutive NAs to fill before stopping.
Not yet implemented.
freq : str, optional
Increment to use from time series API.
Not yet implemented.
Returns
-------
Series or DataFrame
Percentage changes within each group
"""
if not axis == 0:
raise NotImplementedError("Only axis=0 is supported.")
if limit is not None:
raise NotImplementedError("limit parameter not supported yet.")
if freq is not None:
raise NotImplementedError("freq parameter not supported yet.")
elif fill_method not in {"ffill", "pad", "bfill", "backfill"}:
raise ValueError(
"fill_method must be one of 'ffill', 'pad', "
"'bfill', or 'backfill'."
)

filled = self.fillna(method=fill_method, limit=limit)
fill_grp = filled.groupby(self.grouping)
shifted = fill_grp.shift(periods=periods, freq=freq)
return (filled / shifted) - 1

def _mimic_pandas_order(
self, result: DataFrameOrSeries
) -> DataFrameOrSeries:
Expand Down
73 changes: 73 additions & 0 deletions python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2604,3 +2604,76 @@ def test_groupby_transform_maintain_index(by):
assert_groupby_results_equal(
pdf.groupby(by).transform("max"), gdf.groupby(by).transform("max")
)


@pytest.mark.parametrize(
"data, gkey",
[
(
{
"id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
"val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
"val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
"val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
},
["id"],
),
(
{
"id": [0, 0, 0, 0, 1, 1, 1],
"a": [1, 3, 4, 2.0, -3.0, 9.0, 10.0],
"b": [10.0, 23, -4.0, 2, -3.0, None, 19.0],
},
["id", "a"],
),
(
{
"id": ["a", "a", "b", "b", "c", "c"],
"val1": [None, None, None, None, None, None],
},
["id"],
),
],
)
@pytest.mark.parametrize("periods", [-5, -2, 0, 2, 5])
@pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"])
def test_groupby_pct_change(data, gkey, periods, fill_method):
gdf = cudf.DataFrame(data)
pdf = gdf.to_pandas()

actual = gdf.groupby(gkey).pct_change(
periods=periods, fill_method=fill_method
)
expected = pdf.groupby(gkey).pct_change(
periods=periods, fill_method=fill_method
)

assert_eq(expected, actual)


@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/11259")
@pytest.mark.parametrize("periods", [-5, 5])
def test_groupby_pct_change_multiindex_dataframe(periods):
gdf = cudf.DataFrame(
{
"a": [1, 1, 2, 2],
"b": [1, 1, 2, 3],
"c": [2, 3, 4, 5],
"d": [6, 8, 9, 1],
}
).set_index(["a", "b"])

actual = gdf.groupby(level=["a", "b"]).pct_change(periods)
expected = gdf.to_pandas().groupby(level=["a", "b"]).pct_change(periods)

assert_eq(expected, actual)


def test_groupby_pct_change_empty_columns():
gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
pdf = gdf.to_pandas()

actual = gdf.groupby("id").pct_change()
expected = pdf.groupby("id").pct_change()

assert_eq(expected, actual)

0 comments on commit f220e90

Please sign in to comment.