From 1fb55d1ce8a972fbb4d119aedf0cd08943930ae5 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 8 Nov 2024 21:02:16 +0000 Subject: [PATCH] fix: In group_by_dynamic, period and every were getting applied in reverse order for the window upper boundary --- crates/polars-time/src/windows/window.rs | 6 ++-- .../unit/operations/test_group_by_dynamic.py | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/crates/polars-time/src/windows/window.rs b/crates/polars-time/src/windows/window.rs index c7a29b846c58..9609f2abc514 100644 --- a/crates/polars-time/src/windows/window.rs +++ b/crates/polars-time/src/windows/window.rs @@ -327,15 +327,15 @@ impl Iterator for BoundsIter<'_> { // Issue is that `next` needs to return `Option`. TimeUnit::Nanoseconds => { self.bi.start = self.window.every.add_ns(self.bi.start, self.tz).unwrap(); - self.bi.stop = self.window.every.add_ns(self.bi.stop, self.tz).unwrap(); + self.bi.stop = self.window.period.add_ns(self.bi.start, self.tz).unwrap(); }, TimeUnit::Microseconds => { self.bi.start = self.window.every.add_us(self.bi.start, self.tz).unwrap(); - self.bi.stop = self.window.every.add_us(self.bi.stop, self.tz).unwrap(); + self.bi.stop = self.window.period.add_us(self.bi.start, self.tz).unwrap(); }, TimeUnit::Milliseconds => { self.bi.start = self.window.every.add_ms(self.bi.start, self.tz).unwrap(); - self.bi.stop = self.window.every.add_ms(self.bi.stop, self.tz).unwrap(); + self.bi.stop = self.window.period.add_ms(self.bi.start, self.tz).unwrap(); }, } Some(out) diff --git a/py-polars/tests/unit/operations/test_group_by_dynamic.py b/py-polars/tests/unit/operations/test_group_by_dynamic.py index 0c083e204725..0b4cc1ec2531 100644 --- a/py-polars/tests/unit/operations/test_group_by_dynamic.py +++ b/py-polars/tests/unit/operations/test_group_by_dynamic.py @@ -1043,3 +1043,36 @@ def test_group_by_dynamic_exclude_index_from_expansion_17075() -> None: "n": [0, 2, 4, 6], "m": [0, 2, 4, 6], } + + +def test_group_by_dynamic_overlapping_19704() -> None: + df = pl.DataFrame( + { + "a": [datetime(2020, 1, 1), datetime(2020, 2, 1), datetime(2020, 3, 1)], + "b": [1, 2, 3], + } + ) + result = df.group_by_dynamic( + "a", every="1mo", period="45d", include_boundaries=True + ).agg(pl.col("b").sum()) + expected = pl.DataFrame( + { + "_lower_boundary": [ + datetime(2020, 1, 1, 0, 0), + datetime(2020, 2, 1, 0, 0), + datetime(2020, 3, 1, 0, 0), + ], + "_upper_boundary": [ + datetime(2020, 2, 15, 0, 0), + datetime(2020, 3, 17, 0, 0), + datetime(2020, 4, 15, 0, 0), + ], + "a": [ + datetime(2020, 1, 1, 0, 0), + datetime(2020, 2, 1, 0, 0), + datetime(2020, 3, 1, 0, 0), + ], + "b": [3, 5, 3], + } + ) + assert_frame_equal(result, expected)