From fe5547d4dbf9417cb2e7cd90d945f2b19f3c1d37 Mon Sep 17 00:00:00 2001
From: Zeynep Arikoglu <zeynep.arikoglu@gmail.com>
Date: Sun, 6 Nov 2022 17:54:35 +0300
Subject: [PATCH 1/7] Implementation of GROUPS mode in window frame

---
 datafusion/common/src/bisect.rs               |  25 +-
 datafusion/core/src/physical_plan/planner.rs  |   8 +-
 datafusion/core/tests/sql/window.rs           | 116 ++-
 .../physical-expr/src/window/aggregate.rs     |   4 +-
 .../physical-expr/src/window/built_in.rs      |   4 +
 datafusion/physical-expr/src/window/groups.rs | 712 ++++++++++++++++++
 datafusion/physical-expr/src/window/mod.rs    |   1 +
 .../physical-expr/src/window/window_expr.rs   | 143 +++-
 .../sqls/simple_window_groups.sql             |  71 ++
 integration-tests/test_psql_parity.py         |   3 +-
 10 files changed, 1046 insertions(+), 41 deletions(-)
 create mode 100644 datafusion/physical-expr/src/window/groups.rs
 create mode 100644 integration-tests/sqls/simple_window_groups.sql
diff --git a/datafusion/common/src/bisect.rs b/datafusion/common/src/bisect.rs
index 5060bf013120..bf31f055d617 100644
--- a/datafusion/common/src/bisect.rs
+++ b/datafusion/common/src/bisect.rs
@@ -60,22 +60,37 @@ pub fn bisect<const SIDE: bool>(
     target: &[ScalarValue],
     sort_options: &[SortOptions],
 ) -> Result<usize> {
-    let mut low: usize = 0;
-    let mut high: usize = item_columns
+    let low: usize = 0;
+    let high: usize = item_columns
         .get(0)
         .ok_or_else(|| {
             DataFusionError::Internal("Column array shouldn't be empty".to_string())
         })?
         .len();
+    let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
+        let cmp = compare(current, target, sort_options)?;
+        Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
+    };
+    find_bisect_point(item_columns, target, compare_fn, low, high)
+}
+
+pub fn find_bisect_point<F>(
+    item_columns: &[ArrayRef],
+    target: &[ScalarValue],
+    compare_fn: F,
+    mut low: usize,
+    mut high: usize,
+) -> Result<usize>
+where
+    F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
+{
     while low < high {
         let mid = ((high - low) / 2) + low;
         let val = item_columns
             .iter()
             .map(|arr| ScalarValue::try_from_array(arr, mid))
             .collect::<Result<Vec<ScalarValue>>>()?;
-        let cmp = compare(&val, target, sort_options)?;
-        let flag = if SIDE { cmp.is_lt() } else { cmp.is_le() };
-        if flag {
+        if compare_fn(&val, target)? {
             low = mid + 1;
         } else {
             high = mid;
diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs
index 88a43e1117e6..6318e7963381 100644
--- a/datafusion/core/src/physical_plan/planner.rs
+++ b/datafusion/core/src/physical_plan/planner.rs
@@ -64,7 +64,7 @@ use datafusion_expr::expr::{
 };
 use datafusion_expr::expr_rewriter::unnormalize_cols;
 use datafusion_expr::utils::{expand_wildcard, expr_to_columns};
-use datafusion_expr::{WindowFrame, WindowFrameBound, WindowFrameUnits};
+use datafusion_expr::{WindowFrame, WindowFrameBound};
 use datafusion_optimizer::utils::unalias;
 use datafusion_physical_expr::expressions::Literal;
 use datafusion_sql::utils::window_expr_common_partition_keys;
@@ -1511,12 +1511,6 @@ pub fn create_window_expr_with_name(
                 })
                 .collect::<Result<Vec<_>>>()?;
             if let Some(ref window_frame) = window_frame {
-                if window_frame.units == WindowFrameUnits::Groups {
-                    return Err(DataFusionError::NotImplemented(
-                        "Window frame definitions involving GROUPS are not supported yet"
-                            .to_string(),
-                    ));
-                }
                 if !is_window_valid(window_frame) {
                     return Err(DataFusionError::Execution(format!(
                         "Invalid window frame: start bound ({}) cannot be larger than end bound ({})",
diff --git a/datafusion/core/tests/sql/window.rs b/datafusion/core/tests/sql/window.rs
index a36d90c2a10d..0ab96e44213b 100644
--- a/datafusion/core/tests/sql/window.rs
+++ b/datafusion/core/tests/sql/window.rs
@@ -1189,24 +1189,120 @@ async fn window_frame_ranges_unbounded_preceding_err() -> Result<()> {
 }
 
 #[tokio::test]
-async fn window_frame_groups_query() -> Result<()> {
+async fn window_frame_groups_preceding_following_desc() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_csv(&ctx).await?;
+    let sql = "SELECT
+        SUM(c4) OVER(ORDER BY c2 DESC GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+        SUM(c3) OVER(ORDER BY c2 DESC GROUPS BETWEEN 10000 PRECEDING AND 10000 FOLLOWING),
+        COUNT(*) OVER(ORDER BY c2 DESC GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+        FROM aggregate_test_100
+        ORDER BY c9
+        LIMIT 5";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+----------------------------+----------------------------+-----------------+",
+        "| SUM(aggregate_test_100.c4) | SUM(aggregate_test_100.c3) | COUNT(UInt8(1)) |",
+        "+----------------------------+----------------------------+-----------------+",
+        "| 52276                      | 781                        | 56              |",
+        "| 260620                     | 781                        | 63              |",
+        "| -28623                     | 781                        | 37              |",
+        "| 260620                     | 781                        | 63              |",
+        "| 260620                     | 781                        | 63              |",
+        "+----------------------------+----------------------------+-----------------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
+
+#[tokio::test]
+async fn window_frame_groups_order_by_null_desc() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_null_cases_csv(&ctx).await?;
+    let sql = "SELECT
+        COUNT(c2) OVER (ORDER BY c1 DESC GROUPS BETWEEN 5 PRECEDING AND 3 FOLLOWING)
+        FROM null_cases
+        LIMIT 5";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+----------------------+",
+        "| COUNT(null_cases.c2) |",
+        "+----------------------+",
+        "| 12                   |",
+        "| 12                   |",
+        "| 12                   |",
+        "| 12                   |",
+        "| 12                   |",
+        "+----------------------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
+
+#[tokio::test]
+async fn window_frame_groups() -> Result<()> {
+    let ctx = SessionContext::new();
+    register_aggregate_null_cases_csv(&ctx).await?;
+    let sql = "SELECT
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as a,
+        SUM(c1) OVER (ORDER BY c3 DESC GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as b,
+        SUM(c1) OVER (ORDER BY c3 NULLS first GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as c,
+        SUM(c1) OVER (ORDER BY c3 DESC NULLS last GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as d,
+        SUM(c1) OVER (ORDER BY c3 DESC NULLS first GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as e,
+        SUM(c1) OVER (ORDER BY c3 NULLS first GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as f,
+        SUM(c1) OVER (ORDER BY c3 GROUPS current row) as a1,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 9 PRECEDING AND 5 PRECEDING) as a2,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN UNBOUNDED PRECEDING AND 5 PRECEDING) as a3,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING) as a4,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN UNBOUNDED PRECEDING AND current row) as a5,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as a6,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as a7,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 3 FOLLOWING AND UNBOUNDED FOLLOWING) as a8,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN current row AND UNBOUNDED FOLLOWING) as a9,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN current row AND 3 FOLLOWING) as a10,
+        SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 5 FOLLOWING AND 7 FOLLOWING) as a11
+        FROM null_cases
+        ORDER BY c3
+        LIMIT 10";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-----+-----+-----+-----+-----+-----+----+-----+-----+-----+-----+------+------+------+------+-----+-----+",
+        "| a   | b   | c   | d   | e   | f   | a1 | a2  | a3  | a4  | a5  | a6   | a7   | a8   | a9   | a10 | a11 |",
+        "+-----+-----+-----+-----+-----+-----+----+-----+-----+-----+-----+------+------+------+------+-----+-----+",
+        "| 412 | 307 | 412 | 307 | 307 | 412 |    |     |     | 412 |     | 4627 | 4627 | 4531 | 4627 | 115 | 85  |",
+        "| 488 | 339 | 488 | 339 | 339 | 488 | 72 |     |     | 488 | 72  | 4627 | 4627 | 4512 | 4627 | 140 | 153 |",
+        "| 543 | 412 | 543 | 412 | 412 | 543 | 24 |     |     | 543 | 96  | 4627 | 4627 | 4487 | 4555 | 82  | 122 |",
+        "| 553 | 488 | 553 | 488 | 488 | 553 | 19 |     |     | 553 | 115 | 4627 | 4555 | 4473 | 4531 | 89  | 114 |",
+        "| 553 | 543 | 553 | 543 | 543 | 553 | 25 |     |     | 553 | 140 | 4627 | 4531 | 4442 | 4512 | 110 | 105 |",
+        "| 591 | 553 | 591 | 553 | 553 | 591 | 14 |     |     | 591 | 154 | 4627 | 4512 | 4402 | 4487 | 167 | 181 |",
+        "| 651 | 553 | 651 | 553 | 553 | 651 | 31 | 72  | 72  | 651 | 185 | 4627 | 4487 | 4320 | 4473 | 153 | 204 |",
+        "| 662 | 591 | 662 | 591 | 591 | 662 | 40 | 96  | 96  | 662 | 225 | 4627 | 4473 | 4320 | 4442 | 154 | 141 |",
+        "| 697 | 651 | 697 | 651 | 651 | 697 | 82 | 115 | 115 | 697 | 307 | 4627 | 4442 | 4288 | 4402 | 187 | 65  |",
+        "| 758 | 662 | 758 | 662 | 662 | 758 |    | 140 | 140 | 758 | 307 | 4627 | 4402 | 4215 | 4320 | 181 | 48  |",
+        "+-----+-----+-----+-----+-----+-----+----+-----+-----+-----+-----+------+------+------+------+-----+-----+",
+    ];
+    assert_batches_eq!(expected, &actual);
+    Ok(())
+}
+
+#[tokio::test]
+async fn window_frame_groups_without_order_by() -> Result<()> {
     let ctx = SessionContext::new();
     register_aggregate_csv(&ctx).await?;
     // execute the query
     let df = ctx
         .sql(
             "SELECT
-                COUNT(c1) OVER (ORDER BY c2 GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
-                FROM aggregate_test_100;",
+            SUM(c4) OVER(PARTITION BY c2 GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+            FROM aggregate_test_100
+            ORDER BY c9;",
         )
         .await?;
-    let results = df.collect().await;
-    assert!(results
-        .as_ref()
-        .err()
-        .unwrap()
-        .to_string()
-        .contains("Window frame definitions involving GROUPS are not supported yet"));
+    let err = df.collect().await.unwrap_err();
+    assert_contains!(
+        err.to_string(),
+        "Execution error: GROUPS mode requires an ORDER BY clause".to_owned()
+    );
     Ok(())
 }
 
diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
index e6c754387f1c..1c41f2bb2ed7 100644
--- a/datafusion/physical-expr/src/window/aggregate.rs
+++ b/datafusion/physical-expr/src/window/aggregate.rs
@@ -31,7 +31,7 @@ use datafusion_common::ScalarValue;
 use datafusion_expr::WindowFrame;
 
 use crate::{expressions::PhysicalSortExpr, PhysicalExpr};
-use crate::{window::WindowExpr, AggregateExpr};
+use crate::{window::groups::WindowFrameGroups, window::WindowExpr, AggregateExpr};
 
 /// A window expr that takes the form of an aggregate function
 #[derive(Debug)]
@@ -114,6 +114,7 @@ impl WindowExpr for AggregateWindowExpr {
                 .map(|v| v.slice(partition_range.start, length))
                 .collect::<Vec<_>>();
 
+            let mut window_frame_groups = WindowFrameGroups::default();
             let mut last_range: (usize, usize) = (0, 0);
 
             // We iterate on each row to perform a running calculation.
@@ -121,6 +122,7 @@ impl WindowExpr for AggregateWindowExpr {
             for i in 0..length {
                 let cur_range = self.calculate_range(
                     &window_frame,
+                    &mut window_frame_groups,
                     &slice_order_bys,
                     &sort_options,
                     length,
diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs
index e4e377175653..698bffdf92bd 100644
--- a/datafusion/physical-expr/src/window/built_in.rs
+++ b/datafusion/physical-expr/src/window/built_in.rs
@@ -31,6 +31,8 @@ use std::any::Any;
 use std::ops::Range;
 use std::sync::Arc;
 
+use crate::window::groups::WindowFrameGroups;
+
 /// A window expr that takes the form of a built in window function
 #[derive(Debug)]
 pub struct BuiltInWindowExpr {
@@ -113,10 +115,12 @@ impl WindowExpr for BuiltInWindowExpr {
                     .iter()
                     .map(|v| v.slice(partition_range.start, length))
                     .collect::<Vec<_>>();
+                let mut window_frame_groups = WindowFrameGroups::default();
                 // We iterate on each row to calculate window frame range and and window function result
                 for idx in 0..length {
                     let range = self.calculate_range(
                         &window_frame,
+                        &mut window_frame_groups,
                         &slice_order_bys,
                         &sort_options,
                         num_rows,
diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs
new file mode 100644
index 000000000000..d1d79ba8700d
--- /dev/null
+++ b/datafusion/physical-expr/src/window/groups.rs
@@ -0,0 +1,712 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module provides the utilities for the GROUPS window frame index calculations.
+
+use arrow::array::ArrayRef;
+use std::cmp;
+use std::collections::VecDeque;
+
+use datafusion_common::bisect::find_bisect_point;
+use datafusion_common::{DataFusionError, Result, ScalarValue};
+
+/// In the GROUPS mode, the rows with duplicate sorting values are grouped together.
+/// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode.
+/// The syntax is as follows:
+///     GROUPS frame_start [ frame_exclusion ]
+///     GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ]
+/// The frame_exclusion is not yet supported.
+/// The frame_start and frame_end parameters allow us to specify which rows the window frame starts and ends with.
+/// They accept the following values:
+///    - UNBOUNDED PRECEDING: (possible only in frame_start) start with the first row of the partition
+///    - offset PRECEDING: When used as frame_start, it means the first row of the group which is a given number of
+///                        groups before the current group (the group containing the current row).
+///                        When used as frame_end, it means the last row of the group which is a given number of groups
+///                        before the current group.
+///    - CURRENT ROW: When used as frame_start, it means the first row in a group containing the current row.
+///                   When used as frame_end, it means the last row in a group containing the current row.
+///    - offset FOLLOWING: When used as frame_start, it means the first row of the group which is a given number of
+///                        groups after the current group (the group containing the current row).
+///                        When used as frame_end, it means the last row of the group which is a given number of groups
+///                        after the current group.
+///    - UNBOUNDED FOLLOWING: (possible only as a frame_end) end with the last row of the partition
+///
+/// In the following implementation, 'process' is the only public interface of the WindowFrameGroups. It is called for
+/// frame_start and frame_end of each row, consecutively.
+/// The call for frame_start, first, tries to initialize the WindowFrameGroups handler if it has not already been
+/// initialized. The initialization means the setting of the window frame start index. The window frame start index can
+/// only be set, if the current row's window frame start is greater or equal to zero depending on the offset. If the
+/// window frame start index is greater or equal to zero, it means the current row can be stored in the
+/// WindowFrameGroups handler's window frame (group_start_indices), the row values as the group identifier and the row
+/// index as the start index of the group. Then, it keeps track of the previous row values, so that a group change can
+/// be identified. If there is a group change, the WindowFrameGroups handler's window frame is proceeded by one group.
+/// The call for frame_end, first, calculates the current row's window frame end index from the offset. If the current
+/// row's window frame end index is greater than zero, the WindowFrameGroups handler's window frame is extended
+/// one-by-one until the current row's window frame end index is reached by finding the next group.
+/// The call to 'process' for frame_start returns the index of the the WindowFrameGroups handler's window frame's first
+/// entry, if there is a window frame, otherwise it returns 0.
+/// The call to 'process' for frame_end returns the index of the the WindowFrameGroups handler's window frame's last
+/// entry, if there is a window frame, otherwise it returns the last index of the partition (length).
+
+#[derive(Debug, Default)]
+pub struct WindowFrameGroups {
+    current_group_idx: u64,
+    group_start_indices: VecDeque<(Vec<ScalarValue>, usize)>,
+    previous_row_values: Option<Vec<ScalarValue>>,
+    reached_end: bool,
+    window_frame_end_idx: u64,
+    window_frame_start_idx: u64,
+}
+
+impl WindowFrameGroups {
+    fn extend_window_frame_if_necessary<
+        const BISECT_SIDE: bool,
+        const SEARCH_SIDE: bool,
+    >(
+        &mut self,
+        item_columns: &[ArrayRef],
+        delta: u64,
+    ) -> Result<()> {
+        if BISECT_SIDE || self.reached_end {
+            return Ok(());
+        }
+        let current_window_frame_end_idx = if !SEARCH_SIDE {
+            self.current_group_idx + delta + 1
+        } else if self.current_group_idx >= delta {
+            self.current_group_idx - delta + 1
+        } else {
+            0
+        };
+        if current_window_frame_end_idx == 0 {
+            // the end index of the window frame is still before the first index
+            return Ok(());
+        }
+        while !self.reached_end
+            && current_window_frame_end_idx >= self.window_frame_end_idx
+        {
+            if self.group_start_indices.is_empty() {
+                self.initialize_window_frame_start(item_columns)?;
+                continue;
+            }
+            self.proceed_one_group::<BISECT_SIDE, SEARCH_SIDE>(item_columns, delta)?;
+        }
+        Ok(())
+    }
+
+    fn initialize<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
+        &mut self,
+        delta: u64,
+        item_columns: &[ArrayRef],
+    ) -> Result<()> {
+        if BISECT_SIDE {
+            if !SEARCH_SIDE {
+                self.window_frame_start_idx = self.current_group_idx + delta;
+                return self.initialize_window_frame_start(item_columns);
+            } else if self.current_group_idx >= delta {
+                self.window_frame_start_idx = self.current_group_idx - delta;
+                return self.initialize_window_frame_start(item_columns);
+            }
+        }
+        Ok(())
+    }
+
+    fn initialize_window_frame_start(&mut self, item_columns: &[ArrayRef]) -> Result<()> {
+        let mut group_values = item_columns
+            .iter()
+            .map(|col| ScalarValue::try_from_array(col, 0))
+            .collect::<Result<Vec<ScalarValue>>>()?;
+        let mut start_idx: usize = 0;
+        for _ in 0..self.window_frame_start_idx {
+            let next_group_and_start_index =
+                WindowFrameGroups::find_next_group_and_start_index(
+                    item_columns,
+                    &group_values,
+                    start_idx,
+                    None,
+                )?;
+            match next_group_and_start_index {
+                Some((group, index)) => {
+                    group_values = group;
+                    start_idx = index;
+                }
+                // not enough groups to generate a window frame
+                None => {
+                    self.reached_end = true;
+                    self.window_frame_end_idx = self.window_frame_start_idx;
+                    return Ok(());
+                }
+            }
+        }
+        self.group_start_indices
+            .push_back((group_values, start_idx));
+        self.window_frame_end_idx = self.window_frame_start_idx + 1;
+        Ok(())
+    }
+
+    fn initialized(&self) -> bool {
+        self.reached_end || !self.group_start_indices.is_empty()
+    }
+
+    /// This function proceeds the window frame by one group.
+    /// First, it extends the window frame by adding one next group as the last entry.
+    /// Then, if this function is called due to a group change (with BISECT_SIDE true), it pops the front entry.
+    /// If this function is called with BISECT_SIDE false, it means we are trying to extend the window frame to reach the
+    /// window frame size, so no popping of the front entry.
+    fn proceed_one_group<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
+        &mut self,
+        item_columns: &[ArrayRef],
+        delta: u64,
+    ) -> Result<()> {
+        let last_group_values = self.group_start_indices.back();
+        let last_group_values = if let Some(values) = last_group_values {
+            values
+        } else {
+            return Ok(());
+        };
+        let next_group_and_start_index =
+            WindowFrameGroups::find_next_group_and_start_index(
+                item_columns,
+                &last_group_values.0,
+                last_group_values.1,
+                None,
+            )?;
+        match next_group_and_start_index {
+            Some((group, index)) => {
+                self.group_start_indices.push_back((group, index));
+                self.window_frame_end_idx += 1;
+            }
+            // not enough groups to proceed
+            None => {
+                self.reached_end = true;
+            }
+        }
+        if BISECT_SIDE {
+            let current_window_frame_start_idx = if !SEARCH_SIDE {
+                self.current_group_idx + delta
+            } else if self.current_group_idx >= delta {
+                self.current_group_idx - delta
+            } else {
+                0
+            };
+            if current_window_frame_start_idx > self.window_frame_start_idx {
+                self.group_start_indices.pop_front();
+                self.window_frame_start_idx += 1;
+            }
+        }
+        Ok(())
+    }
+
+    pub fn process<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
+        &mut self,
+        current_row_values: &[ScalarValue],
+        delta: u64,
+        item_columns: &[ArrayRef],
+        length: usize,
+    ) -> Result<usize> {
+        if !self.initialized() {
+            self.initialize::<BISECT_SIDE, SEARCH_SIDE>(delta, item_columns)?;
+        }
+        self.extend_window_frame_if_necessary::<BISECT_SIDE, SEARCH_SIDE>(
+            item_columns,
+            delta,
+        )?;
+        let group_change = match &self.previous_row_values {
+            None => false,
+            Some(values) => current_row_values != values,
+        };
+        if self.previous_row_values.is_none() || group_change {
+            self.previous_row_values = Some(current_row_values.to_vec());
+        }
+        if group_change {
+            self.current_group_idx += 1;
+            self.proceed_one_group::<BISECT_SIDE, SEARCH_SIDE>(item_columns, delta)?;
+        }
+        Ok(if self.group_start_indices.is_empty() {
+            if self.reached_end {
+                length
+            } else {
+                0
+            }
+        } else if BISECT_SIDE {
+            match self.group_start_indices.get(0) {
+                Some(&(_, idx)) => idx,
+                None => 0,
+            }
+        } else {
+            match (self.reached_end, self.group_start_indices.back()) {
+                (false, Some(&(_, idx))) => idx,
+                _ => length,
+            }
+        })
+    }
+
+    /// This function finds the next group and its start index for a given group and start index.
+    /// It uses the bisect function, thus it should work on a bounded frame with a start and an end point.
+    /// To find the end point, a step_size is used with a default value of 100. After proceeding the end point,
+    /// if the entry at the end point still has the same group identifier (row values), the start point is set as the
+    /// end point, and the end point is proceeded one step size more. The end point is found when the entry at the end
+    /// point has a different group identifier (different row values), or the end of the partition is reached.
+    /// Then, the bisect function can be applied with low as the start point, and high as the end point.
+    ///
+    /// Improvement:
+    /// For small group sizes, proceeding one-by-one to find the group change can be more efficient.
+    /// A statistics about the group sizes can be utilized to decide upon which approach to use, or even set the
+    /// step_size.
+    /// We will also need a benchmark implementation to prove the efficiency.
+    fn find_next_group_and_start_index(
+        item_columns: &[ArrayRef],
+        current_row_values: &[ScalarValue],
+        idx: usize,
+        step_size: Option<usize>,
+    ) -> Result<Option<(Vec<ScalarValue>, usize)>> {
+        let step_size = if let Some(n) = step_size { n } else { 100 };
+        if step_size == 0 {
+            return Err(DataFusionError::Internal(
+                "Step size cannot be 0".to_string(),
+            ));
+        }
+        let data_size: usize = item_columns
+            .get(0)
+            .ok_or_else(|| {
+                DataFusionError::Internal("Column array shouldn't be empty".to_string())
+            })?
+            .len();
+        let mut low: usize = idx;
+        let mut high: usize = cmp::min(low + step_size, data_size);
+        while high < data_size {
+            let val = item_columns
+                .iter()
+                .map(|arr| ScalarValue::try_from_array(arr, high))
+                .collect::<Result<Vec<ScalarValue>>>()?;
+            if val == current_row_values {
+                low = high;
+                high = cmp::min(low + step_size, data_size);
+            } else {
+                break;
+            }
+        }
+        low = find_bisect_point(
+            item_columns,
+            current_row_values,
+            |current, to_compare| Ok(current == to_compare),
+            low,
+            high,
+        )?;
+        if low == data_size {
+            return Ok(None);
+        }
+        let val = item_columns
+            .iter()
+            .map(|arr| ScalarValue::try_from_array(arr, low))
+            .collect::<Result<Vec<ScalarValue>>>()?;
+        Ok(Some((val, low)))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::array::Float64Array;
+    use datafusion_common::ScalarValue;
+    use std::sync::Arc;
+
+    use crate::from_slice::FromSlice;
+
+    use super::*;
+
+    #[test]
+    fn test_find_next_group_and_start_index() {
+        const NUM_ROWS: usize = 6;
+        const NEXT_INDICES: [usize; 5] = [1, 2, 4, 4, 5];
+        const STEP_SIZE: usize = 1;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 3., 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 8., 5., 0.0])),
+        ];
+        for (current_idx, next_idx) in NEXT_INDICES.iter().enumerate() {
+            let current_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, current_idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+            let next_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, *next_idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+            let res = WindowFrameGroups::find_next_group_and_start_index(
+                &arrays,
+                &current_row_values,
+                current_idx,
+                None,
+            )
+            .unwrap();
+            assert_eq!(res, Some((next_row_values.clone(), *next_idx)));
+            let res = WindowFrameGroups::find_next_group_and_start_index(
+                &arrays,
+                &current_row_values,
+                current_idx,
+                Some(STEP_SIZE),
+            )
+            .unwrap();
+            assert_eq!(res, Some((next_row_values, *next_idx)));
+        }
+        let current_idx = NUM_ROWS - 1;
+        let current_row_values = arrays
+            .iter()
+            .map(|col| ScalarValue::try_from_array(col, current_idx))
+            .collect::<Result<Vec<ScalarValue>>>()
+            .unwrap();
+        let res = WindowFrameGroups::find_next_group_and_start_index(
+            &arrays,
+            &current_row_values,
+            current_idx,
+            None,
+        )
+        .unwrap();
+        assert_eq!(res, None);
+        let res = WindowFrameGroups::find_next_group_and_start_index(
+            &arrays,
+            &current_row_values,
+            current_idx,
+            Some(STEP_SIZE),
+        )
+        .unwrap();
+        assert_eq!(res, None);
+    }
+
+    #[test]
+    fn test_window_frame_groups_preceding_huge_delta() {
+        const START: bool = true;
+        const END: bool = false;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 10;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameGroups::default();
+        window_frame_groups
+            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        window_frame_groups
+            .extend_window_frame_if_necessary::<END, PRECEDING>(&arrays, DELTA)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        for idx in 0..NUM_ROWS {
+            let current_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+            let start = window_frame_groups
+                .process::<START, PRECEDING>(
+                    &current_row_values,
+                    DELTA,
+                    &arrays,
+                    NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, 0);
+            let end = window_frame_groups
+                .process::<END, PRECEDING>(&current_row_values, DELTA, &arrays, NUM_ROWS)
+                .unwrap();
+            assert_eq!(end, 0);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_following_huge_delta() {
+        const START: bool = true;
+        const END: bool = false;
+        const FOLLOWING: bool = false;
+        const DELTA: u64 = 10;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameGroups::default();
+        window_frame_groups
+            .initialize::<START, FOLLOWING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, DELTA);
+        assert_eq!(window_frame_groups.window_frame_end_idx, DELTA);
+        assert!(window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        window_frame_groups
+            .extend_window_frame_if_necessary::<END, FOLLOWING>(&arrays, DELTA)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, DELTA);
+        assert_eq!(window_frame_groups.window_frame_end_idx, DELTA);
+        assert!(window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        for idx in 0..NUM_ROWS {
+            let current_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+            let start = window_frame_groups
+                .process::<START, FOLLOWING>(
+                    &current_row_values,
+                    DELTA,
+                    &arrays,
+                    NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, NUM_ROWS);
+            let end = window_frame_groups
+                .process::<END, FOLLOWING>(&current_row_values, DELTA, &arrays, NUM_ROWS)
+                .unwrap();
+            assert_eq!(end, NUM_ROWS);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_preceding_and_following_huge_delta() {
+        const START: bool = true;
+        const END: bool = false;
+        const FOLLOWING: bool = false;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 10;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameGroups::default();
+        window_frame_groups
+            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        window_frame_groups
+            .extend_window_frame_if_necessary::<END, FOLLOWING>(&arrays, DELTA)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, NUM_ROWS as u64);
+        assert!(window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), NUM_ROWS);
+
+        for idx in 0..NUM_ROWS {
+            let current_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+            let start = window_frame_groups
+                .process::<START, PRECEDING>(
+                    &current_row_values,
+                    DELTA,
+                    &arrays,
+                    NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, 0);
+            let end = window_frame_groups
+                .process::<END, FOLLOWING>(&current_row_values, DELTA, &arrays, NUM_ROWS)
+                .unwrap();
+            assert_eq!(end, NUM_ROWS);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_preceding_and_following_1() {
+        const START: bool = true;
+        const END: bool = false;
+        const FOLLOWING: bool = false;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 1;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameGroups::default();
+        window_frame_groups
+            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        window_frame_groups
+            .extend_window_frame_if_necessary::<END, FOLLOWING>(&arrays, DELTA)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 2 * DELTA + 1);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(
+            window_frame_groups.group_start_indices.len(),
+            2 * DELTA as usize + 1
+        );
+
+        for idx in 0..NUM_ROWS {
+            let current_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+            let start_idx = if idx < DELTA as usize {
+                0
+            } else {
+                idx - DELTA as usize
+            };
+            let start = window_frame_groups
+                .process::<START, PRECEDING>(
+                    &current_row_values,
+                    DELTA,
+                    &arrays,
+                    NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, start_idx);
+            let end_idx = if idx + 1 + DELTA as usize > NUM_ROWS {
+                NUM_ROWS
+            } else {
+                idx + 1 + DELTA as usize
+            };
+            let end = window_frame_groups
+                .process::<END, FOLLOWING>(&current_row_values, DELTA, &arrays, NUM_ROWS)
+                .unwrap();
+            assert_eq!(end, end_idx);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_preceding_1_and_unbounded_following() {
+        const START: bool = true;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 1;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameGroups::default();
+        window_frame_groups
+            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        for idx in 0..NUM_ROWS {
+            let current_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+            let start_idx = if idx < DELTA as usize {
+                0
+            } else {
+                idx - DELTA as usize
+            };
+            let start = window_frame_groups
+                .process::<START, PRECEDING>(
+                    &current_row_values,
+                    DELTA,
+                    &arrays,
+                    NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, start_idx);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_current_and_unbounded_following() {
+        const START: bool = true;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 0;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameGroups::default();
+        window_frame_groups
+            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 1);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 1);
+
+        for idx in 0..NUM_ROWS {
+            let current_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+
+            let start = window_frame_groups
+                .process::<START, PRECEDING>(
+                    &current_row_values,
+                    DELTA,
+                    &arrays,
+                    NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, idx);
+        }
+    }
+}
diff --git a/datafusion/physical-expr/src/window/mod.rs b/datafusion/physical-expr/src/window/mod.rs
index 059f8b88637c..7de388b621cc 100644
--- a/datafusion/physical-expr/src/window/mod.rs
+++ b/datafusion/physical-expr/src/window/mod.rs
@@ -19,6 +19,7 @@ mod aggregate;
 mod built_in;
 mod built_in_window_function_expr;
 pub(crate) mod cume_dist;
+mod groups;
 pub(crate) mod lead_lag;
 pub(crate) mod nth_value;
 pub(crate) mod partition_evaluator;
diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs
index 9c4b1b17970d..86010349c171 100644
--- a/datafusion/physical-expr/src/window/window_expr.rs
+++ b/datafusion/physical-expr/src/window/window_expr.rs
@@ -31,6 +31,8 @@ use std::sync::Arc;
 use datafusion_expr::WindowFrameBound;
 use datafusion_expr::{WindowFrame, WindowFrameUnits};
 
+use crate::window::groups::WindowFrameGroups;
+
 /// A window expression that:
 /// * knows its resulting field
 pub trait WindowExpr: Send + Sync + Debug {
@@ -117,10 +119,11 @@ pub trait WindowExpr: Send + Sync + Debug {
     }
 
     /// We use start and end bounds to calculate current row's starting and ending range.
-    /// This function supports different modes, but we currently do not support window calculation for GROUPS inside window frames.
+    /// This function supports different modes.
     fn calculate_range(
         &self,
         window_frame: &Option<Arc<WindowFrame>>,
+        window_frame_groups: &mut WindowFrameGroups,
         range_columns: &[ArrayRef],
         sort_options: &[SortOptions],
         length: usize,
@@ -212,11 +215,6 @@ pub trait WindowExpr: Send + Sync + Debug {
                                 0
                             }
                         }
-                        WindowFrameBound::Preceding(_) => {
-                            return Err(DataFusionError::Internal(
-                                "Rows should be Uint".to_string(),
-                            ))
-                        }
                         WindowFrameBound::CurrentRow => idx,
                         // UNBOUNDED FOLLOWING
                         WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
@@ -228,7 +226,9 @@ pub trait WindowExpr: Send + Sync + Debug {
                         WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
                             min(idx + n as usize, length)
                         }
-                        WindowFrameBound::Following(_) => {
+                        // ERRONEOUS FRAMES
+                        WindowFrameBound::Preceding(_)
+                        | WindowFrameBound::Following(_) => {
                             return Err(DataFusionError::Internal(
                                 "Rows should be Uint".to_string(),
                             ))
@@ -249,18 +249,15 @@ pub trait WindowExpr: Send + Sync + Debug {
                                 0
                             }
                         }
-                        WindowFrameBound::Preceding(_) => {
-                            return Err(DataFusionError::Internal(
-                                "Rows should be Uint".to_string(),
-                            ))
-                        }
                         WindowFrameBound::CurrentRow => idx + 1,
                         // UNBOUNDED FOLLOWING
                         WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
                         WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
                             min(idx + n as usize + 1, length)
                         }
-                        WindowFrameBound::Following(_) => {
+                        // ERRONEOUS FRAMES
+                        WindowFrameBound::Preceding(_)
+                        | WindowFrameBound::Following(_) => {
                             return Err(DataFusionError::Internal(
                                 "Rows should be Uint".to_string(),
                             ))
@@ -268,9 +265,104 @@ pub trait WindowExpr: Send + Sync + Debug {
                     };
                     Ok((start, end))
                 }
-                WindowFrameUnits::Groups => Err(DataFusionError::NotImplemented(
-                    "Window frame for groups is not implemented".to_string(),
-                )),
+                WindowFrameUnits::Groups => {
+                    if range_columns.is_empty() {
+                        return Err(DataFusionError::Execution(
+                            "GROUPS mode requires an ORDER BY clause".to_string(),
+                        ));
+                    }
+                    let start = match window_frame.start_bound {
+                        // UNBOUNDED PRECEDING
+                        WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
+                        WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
+                            calculate_index_of_group::<true, true>(
+                                range_columns,
+                                window_frame_groups,
+                                idx,
+                                n,
+                                length,
+                            )?
+                        }
+                        WindowFrameBound::CurrentRow => {
+                            calculate_index_of_group::<true, true>(
+                                range_columns,
+                                window_frame_groups,
+                                idx,
+                                0,
+                                length,
+                            )?
+                        }
+                        WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
+                            calculate_index_of_group::<true, false>(
+                                range_columns,
+                                window_frame_groups,
+                                idx,
+                                n,
+                                length,
+                            )?
+                        }
+                        // UNBOUNDED FOLLOWING
+                        WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
+                            return Err(DataFusionError::Internal(format!(
+                                "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'",
+                                window_frame
+                            )))
+                        }
+                        // ERRONEOUS FRAMES
+                        WindowFrameBound::Preceding(_)
+                        | WindowFrameBound::Following(_) => {
+                            return Err(DataFusionError::Internal(
+                                "Groups should be Uint".to_string(),
+                            ))
+                        }
+                    };
+                    let end = match window_frame.end_bound {
+                        // UNBOUNDED PRECEDING
+                        WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
+                            return Err(DataFusionError::Internal(format!(
+                                "Frame end cannot be UNBOUNDED PRECEDING '{:?}'",
+                                window_frame
+                            )))
+                        }
+                        WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
+                            calculate_index_of_group::<false, true>(
+                                range_columns,
+                                window_frame_groups,
+                                idx,
+                                n,
+                                length,
+                            )?
+                        }
+                        WindowFrameBound::CurrentRow => {
+                            calculate_index_of_group::<false, false>(
+                                range_columns,
+                                window_frame_groups,
+                                idx,
+                                0,
+                                length,
+                            )?
+                        }
+                        WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
+                            calculate_index_of_group::<false, false>(
+                                range_columns,
+                                window_frame_groups,
+                                idx,
+                                n,
+                                length,
+                            )?
+                        }
+                        // UNBOUNDED FOLLOWING
+                        WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
+                        // ERRONEOUS FRAMES
+                        WindowFrameBound::Preceding(_)
+                        | WindowFrameBound::Following(_) => {
+                            return Err(DataFusionError::Internal(
+                                "Groups should be Uint".to_string(),
+                            ))
+                        }
+                    };
+                    Ok((start, end))
+                }
             }
         } else {
             Ok((0, length))
@@ -320,3 +412,22 @@ fn calculate_index_of_row<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
     // `BISECT_SIDE` true means bisect_left, false means bisect_right
     bisect::<BISECT_SIDE>(range_columns, &end_range, sort_options)
 }
+
+fn calculate_index_of_group<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
+    range_columns: &[ArrayRef],
+    window_frame_groups: &mut WindowFrameGroups,
+    idx: usize,
+    delta: u64,
+    length: usize,
+) -> Result<usize> {
+    let current_row_values = range_columns
+        .iter()
+        .map(|col| ScalarValue::try_from_array(col, idx))
+        .collect::<Result<Vec<ScalarValue>>>()?;
+    window_frame_groups.process::<BISECT_SIDE, SEARCH_SIDE>(
+        &current_row_values,
+        delta,
+        range_columns,
+        length,
+    )
+}
diff --git a/integration-tests/sqls/simple_window_groups.sql b/integration-tests/sqls/simple_window_groups.sql
new file mode 100644
index 000000000000..dd369c28ca4d
--- /dev/null
+++ b/integration-tests/sqls/simple_window_groups.sql
@@ -0,0 +1,71 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+
+-- http://www.apache.org/licenses/LICENSE-2.0
+
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+SELECT
+    SUM(c4) OVER(ORDER BY c3 DESC GROUPS BETWEEN 3 PRECEDING AND 1 FOLLOWING) as summation1,
+    SUM(c4) OVER(ORDER BY c3 DESC GROUPS BETWEEN 3 PRECEDING AND 2 PRECEDING) as summation2,
+    SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN 2 PRECEDING AND 2 PRECEDING) as summation3,
+    SUM(c4) OVER(ORDER BY c3 DESC GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING) as summation4,
+    SUM(c3) OVER(ORDER BY c4 DESC GROUPS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) as summation5,
+    SUM(c5) OVER(ORDER BY c3 DESC GROUPS 2 PRECEDING) as summation6,
+    SUM(c5) OVER(ORDER BY c3 DESC GROUPS BETWEEN CURRENT ROW AND 3 FOLLOWING) as summation7,
+    SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as summation8,
+    SUM(c4) OVER(ORDER BY c4 DESC GROUPS UNBOUNDED PRECEDING) as summation9,
+    SUM(c5) OVER(ORDER BY c4 DESC GROUPS CURRENT ROW) as summation10,
+    SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as summation11,
+    SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as summation12,
+    SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) as summation13,
+    SUM(c4) OVER(PARTITION BY c4 ORDER BY c3 GROUPS BETWEEN 3 PRECEDING AND 1 FOLLOWING) as summation21,
+    SUM(c4) OVER(PARTITION BY c4 ORDER BY c3 GROUPS BETWEEN 3 PRECEDING AND 2 PRECEDING) as summation22,
+    SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN 2 PRECEDING AND 2 PRECEDING) as summation23,
+    SUM(c4) OVER(PARTITION BY c4 ORDER BY c3 GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING) as summation24,
+    SUM(c3) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) as summation25,
+    SUM(c5) OVER(PARTITION BY c4 ORDER BY c3 GROUPS 2 PRECEDING) as summation26,
+    SUM(c5) OVER(PARTITION BY c4 ORDER BY c3 GROUPS BETWEEN CURRENT ROW AND 3 FOLLOWING) as summation27,
+    SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as summation28,
+    SUM(c4) OVER(PARTITION BY c4 ORDER BY c4 GROUPS UNBOUNDED PRECEDING) as summation29,
+    SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS CURRENT ROW) as summation30,
+    SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as summation31,
+    SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as summation32,
+    SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) as summation33,
+    SUM(c4) OVER(ORDER BY c5, c3 GROUPS BETWEEN 3 PRECEDING AND 1 FOLLOWING) as summation41,
+    SUM(c4) OVER(ORDER BY c5, c3 GROUPS BETWEEN 3 PRECEDING AND 2 PRECEDING) as summation42,
+    SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN 2 PRECEDING AND 2 PRECEDING) as summation43,
+    SUM(c4) OVER(ORDER BY c5, c3 GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING) as summation44,
+    SUM(c3) OVER(ORDER BY c5, c4 GROUPS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) as summation45,
+    SUM(c5) OVER(ORDER BY c5, c3 GROUPS 2 PRECEDING) as summation46,
+    SUM(c5) OVER(ORDER BY c5, c3 GROUPS BETWEEN CURRENT ROW AND 3 FOLLOWING) as summation47,
+    SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as summation48,
+    SUM(c4) OVER(ORDER BY c5, c4 GROUPS UNBOUNDED PRECEDING) as summation49,
+    SUM(c5) OVER(ORDER BY c5, c4 GROUPS CURRENT ROW) as summation50,
+    SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as summation51,
+    SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as summation52,
+    SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) as summation53,
+    SUM(c4) OVER(ORDER BY c3 GROUPS BETWEEN 3 PRECEDING AND 1 FOLLOWING) as summation61,
+    SUM(c4) OVER(ORDER BY c3 GROUPS BETWEEN 3 PRECEDING AND 2 PRECEDING) as summation62,
+    SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN 2 PRECEDING AND 2 PRECEDING) as summation63,
+    SUM(c4) OVER(ORDER BY c3 GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING) as summation64,
+    SUM(c3) OVER(ORDER BY c4 GROUPS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) as summation65,
+    SUM(c5) OVER(ORDER BY c3 GROUPS 2 PRECEDING) as summation66,
+    SUM(c5) OVER(ORDER BY c3 GROUPS BETWEEN CURRENT ROW AND 3 FOLLOWING) as summation67,
+    SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as summation68,
+    SUM(c4) OVER(ORDER BY c4 GROUPS UNBOUNDED PRECEDING) as summation69,
+    SUM(c5) OVER(ORDER BY c4 GROUPS CURRENT ROW) as summation70,
+    SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as summation71,
+    SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as summation72,
+    SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) as summation73
+FROM test
+ORDER BY c9;
diff --git a/integration-tests/test_psql_parity.py b/integration-tests/test_psql_parity.py
index be197eb612ff..e4c2bb17edb6 100644
--- a/integration-tests/test_psql_parity.py
+++ b/integration-tests/test_psql_parity.py
@@ -82,11 +82,10 @@ def generate_csv_from_psql(fname: str):
 
 class TestPsqlParity:
     def test_tests_count(self):
-        assert len(test_files) == 25, "tests are missed"
+        assert len(test_files) == 26, "tests are missed"
 
     @pytest.mark.parametrize("fname", test_files, ids=str)
     def test_sql_file(self, fname):
         datafusion_output = pd.read_csv(io.BytesIO(generate_csv_from_datafusion(fname)))
         psql_output = pd.read_csv(io.BytesIO(generate_csv_from_psql(fname)))
         np.testing.assert_allclose(datafusion_output, psql_output, equal_nan=True, verbose=True)
-

From 6c8e2bb3f0794576091b417f0c5024fb683b892b Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Mon, 7 Nov 2022 08:45:29 +0300
Subject: [PATCH 2/7] Break down/flatten some functions, move comments inline

---
 datafusion/physical-expr/src/window/groups.rs | 205 ++++++++----------
 .../physical-expr/src/window/window_expr.rs   |   2 +-
 2 files changed, 95 insertions(+), 112 deletions(-)

diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs
index d1d79ba8700d..06f5695298ec 100644
--- a/datafusion/physical-expr/src/window/groups.rs
+++ b/datafusion/physical-expr/src/window/groups.rs
@@ -15,7 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module provides the utilities for the GROUPS window frame index calculations.
+//! This module provides utilities for GROUPS mode window frame index calculations.
+// In GROUPS mode, rows with duplicate sorting values are grouped together.
+// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode.
+// The syntax is as follows:
+//     GROUPS frame_start [ frame_exclusion ]
+//     GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ]
+// The optional frame_exclusion specifier is not yet supported.
+// The frame_start and frame_end parameters allow us to specify which rows the window
+// frame starts and ends with. They accept the following values:
+//    - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start.
+//    - offset PRECEDING: When used in frame_start, it refers to the first row of the group
+//                        that comes "offset" groups before the current group (i.e. the group
+//                        containing the current row). When used in frame_end, it refers to the
+//                        last row of the group that comes "offset" groups before the current group.
+//    - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing
+//                   the current row. When used in frame_end, it refers to the last row of the group
+//                   containing the current row.
+//    - offset FOLLOWING: When used in frame_start, it refers to the first row of the group
+//                        that comes "offset" groups after the current group (i.e. the group
+//                        containing the current row). When used in frame_end, it refers to the
+//                        last row of the group that comes "offset" groups after the current group.
+//    - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end.
 
 use arrow::array::ArrayRef;
 use std::cmp;
@@ -24,44 +45,8 @@ use std::collections::VecDeque;
 use datafusion_common::bisect::find_bisect_point;
 use datafusion_common::{DataFusionError, Result, ScalarValue};
 
-/// In the GROUPS mode, the rows with duplicate sorting values are grouped together.
-/// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode.
-/// The syntax is as follows:
-///     GROUPS frame_start [ frame_exclusion ]
-///     GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ]
-/// The frame_exclusion is not yet supported.
-/// The frame_start and frame_end parameters allow us to specify which rows the window frame starts and ends with.
-/// They accept the following values:
-///    - UNBOUNDED PRECEDING: (possible only in frame_start) start with the first row of the partition
-///    - offset PRECEDING: When used as frame_start, it means the first row of the group which is a given number of
-///                        groups before the current group (the group containing the current row).
-///                        When used as frame_end, it means the last row of the group which is a given number of groups
-///                        before the current group.
-///    - CURRENT ROW: When used as frame_start, it means the first row in a group containing the current row.
-///                   When used as frame_end, it means the last row in a group containing the current row.
-///    - offset FOLLOWING: When used as frame_start, it means the first row of the group which is a given number of
-///                        groups after the current group (the group containing the current row).
-///                        When used as frame_end, it means the last row of the group which is a given number of groups
-///                        after the current group.
-///    - UNBOUNDED FOLLOWING: (possible only as a frame_end) end with the last row of the partition
-///
-/// In the following implementation, 'process' is the only public interface of the WindowFrameGroups. It is called for
-/// frame_start and frame_end of each row, consecutively.
-/// The call for frame_start, first, tries to initialize the WindowFrameGroups handler if it has not already been
-/// initialized. The initialization means the setting of the window frame start index. The window frame start index can
-/// only be set, if the current row's window frame start is greater or equal to zero depending on the offset. If the
-/// window frame start index is greater or equal to zero, it means the current row can be stored in the
-/// WindowFrameGroups handler's window frame (group_start_indices), the row values as the group identifier and the row
-/// index as the start index of the group. Then, it keeps track of the previous row values, so that a group change can
-/// be identified. If there is a group change, the WindowFrameGroups handler's window frame is proceeded by one group.
-/// The call for frame_end, first, calculates the current row's window frame end index from the offset. If the current
-/// row's window frame end index is greater than zero, the WindowFrameGroups handler's window frame is extended
-/// one-by-one until the current row's window frame end index is reached by finding the next group.
-/// The call to 'process' for frame_start returns the index of the the WindowFrameGroups handler's window frame's first
-/// entry, if there is a window frame, otherwise it returns 0.
-/// The call to 'process' for frame_end returns the index of the the WindowFrameGroups handler's window frame's last
-/// entry, if there is a window frame, otherwise it returns the last index of the partition (length).
-
+/// This structure encapsulates all the state information we require as we
+/// scan groups of data while processing window frames.
 #[derive(Debug, Default)]
 pub struct WindowFrameGroups {
     current_group_idx: u64,
@@ -73,17 +58,11 @@ pub struct WindowFrameGroups {
 }
 
 impl WindowFrameGroups {
-    fn extend_window_frame_if_necessary<
-        const BISECT_SIDE: bool,
-        const SEARCH_SIDE: bool,
-    >(
+    fn extend_window_frame_if_necessary<const SEARCH_SIDE: bool>(
         &mut self,
         item_columns: &[ArrayRef],
         delta: u64,
     ) -> Result<()> {
-        if BISECT_SIDE || self.reached_end {
-            return Ok(());
-        }
         let current_window_frame_end_idx = if !SEARCH_SIDE {
             self.current_group_idx + delta + 1
         } else if self.current_group_idx >= delta {
@@ -95,33 +74,32 @@ impl WindowFrameGroups {
             // the end index of the window frame is still before the first index
             return Ok(());
         }
+        if self.group_start_indices.is_empty() {
+            // TODO: Do we need to check for self.window_frame_end_idx <= current_window_frame_end_idx?
+            self.initialize_window_frame_start(item_columns)?;
+        }
         while !self.reached_end
-            && current_window_frame_end_idx >= self.window_frame_end_idx
+            && self.window_frame_end_idx <= current_window_frame_end_idx
         {
-            if self.group_start_indices.is_empty() {
-                self.initialize_window_frame_start(item_columns)?;
-                continue;
-            }
-            self.proceed_one_group::<BISECT_SIDE, SEARCH_SIDE>(item_columns, delta)?;
+            self.advance_one_group::<SEARCH_SIDE>(item_columns)?;
         }
         Ok(())
     }
 
-    fn initialize<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
+    fn initialize<const SEARCH_SIDE: bool>(
         &mut self,
         delta: u64,
         item_columns: &[ArrayRef],
     ) -> Result<()> {
-        if BISECT_SIDE {
-            if !SEARCH_SIDE {
-                self.window_frame_start_idx = self.current_group_idx + delta;
-                return self.initialize_window_frame_start(item_columns);
-            } else if self.current_group_idx >= delta {
-                self.window_frame_start_idx = self.current_group_idx - delta;
-                return self.initialize_window_frame_start(item_columns);
-            }
+        if !SEARCH_SIDE {
+            self.window_frame_start_idx = self.current_group_idx + delta;
+            self.initialize_window_frame_start(item_columns)
+        } else if self.current_group_idx >= delta {
+            self.window_frame_start_idx = self.current_group_idx - delta;
+            self.initialize_window_frame_start(item_columns)
+        } else {
+            Ok(())
         }
-        Ok(())
     }
 
     fn initialize_window_frame_start(&mut self, item_columns: &[ArrayRef]) -> Result<()> {
@@ -161,15 +139,10 @@ impl WindowFrameGroups {
         self.reached_end || !self.group_start_indices.is_empty()
     }
 
-    /// This function proceeds the window frame by one group.
-    /// First, it extends the window frame by adding one next group as the last entry.
-    /// Then, if this function is called due to a group change (with BISECT_SIDE true), it pops the front entry.
-    /// If this function is called with BISECT_SIDE false, it means we are trying to extend the window frame to reach the
-    /// window frame size, so no popping of the front entry.
-    fn proceed_one_group<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
+    /// This function advances the window frame by one group.
+    fn advance_one_group<const SEARCH_SIDE: bool>(
         &mut self,
         item_columns: &[ArrayRef],
-        delta: u64,
     ) -> Result<()> {
         let last_group_values = self.group_start_indices.back();
         let last_group_values = if let Some(values) = last_group_values {
@@ -194,22 +167,27 @@ impl WindowFrameGroups {
                 self.reached_end = true;
             }
         }
-        if BISECT_SIDE {
-            let current_window_frame_start_idx = if !SEARCH_SIDE {
-                self.current_group_idx + delta
-            } else if self.current_group_idx >= delta {
-                self.current_group_idx - delta
-            } else {
-                0
-            };
-            if current_window_frame_start_idx > self.window_frame_start_idx {
-                self.group_start_indices.pop_front();
-                self.window_frame_start_idx += 1;
-            }
-        }
         Ok(())
     }
 
+    /// This function drops the oldest group from the window frame.
+    fn shift_one_group<const SEARCH_SIDE: bool>(&mut self, delta: u64) {
+        let current_window_frame_start_idx = if !SEARCH_SIDE {
+            self.current_group_idx + delta
+        } else if self.current_group_idx >= delta {
+            self.current_group_idx - delta
+        } else {
+            0
+        };
+        if current_window_frame_start_idx > self.window_frame_start_idx {
+            self.group_start_indices.pop_front();
+            self.window_frame_start_idx += 1;
+        }
+    }
+
+    /// This function is the main public interface of WindowFrameGroups. It is meant to be
+    /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE`
+    /// supplied as false and true, respectively).
     pub fn process<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
         &mut self,
         current_row_values: &[ScalarValue],
@@ -217,13 +195,23 @@ impl WindowFrameGroups {
         item_columns: &[ArrayRef],
         length: usize,
     ) -> Result<usize> {
-        if !self.initialized() {
-            self.initialize::<BISECT_SIDE, SEARCH_SIDE>(delta, item_columns)?;
+        if BISECT_SIDE {
+            // When we call this function to get the window frame start index, it tries to initialize
+            // the internal grouping state if this is not already done before. This initialization takes
+            // place only when the window frame start index is greater than or equal to zero. In this
+            // case, the current row is stored in group_start_indices, with row values as the group
+            // identifier and row index as the start index of the group.
+            if !self.initialized() {
+                self.initialize::<SEARCH_SIDE>(delta, item_columns)?;
+            }
+        } else if !self.reached_end {
+            // When we call this function to get the window frame end index, it extends the window
+            // frame one by one until the current row's window frame end index is reached by finding
+            // the next group.
+            self.extend_window_frame_if_necessary::<SEARCH_SIDE>(item_columns, delta)?;
         }
-        self.extend_window_frame_if_necessary::<BISECT_SIDE, SEARCH_SIDE>(
-            item_columns,
-            delta,
-        )?;
+        // We keep track of previous row values, so that a group change can be identified.
+        // If there is a group change, the window frame is advanced and shifted by one group.
         let group_change = match &self.previous_row_values {
             None => false,
             Some(values) => current_row_values != values,
@@ -233,7 +221,8 @@ impl WindowFrameGroups {
         }
         if group_change {
             self.current_group_idx += 1;
-            self.proceed_one_group::<BISECT_SIDE, SEARCH_SIDE>(item_columns, delta)?;
+            self.advance_one_group::<SEARCH_SIDE>(item_columns)?;
+            self.shift_one_group::<SEARCH_SIDE>(delta);
         }
         Ok(if self.group_start_indices.is_empty() {
             if self.reached_end {
@@ -255,18 +244,12 @@ impl WindowFrameGroups {
     }
 
     /// This function finds the next group and its start index for a given group and start index.
-    /// It uses the bisect function, thus it should work on a bounded frame with a start and an end point.
-    /// To find the end point, a step_size is used with a default value of 100. After proceeding the end point,
-    /// if the entry at the end point still has the same group identifier (row values), the start point is set as the
-    /// end point, and the end point is proceeded one step size more. The end point is found when the entry at the end
-    /// point has a different group identifier (different row values), or the end of the partition is reached.
-    /// Then, the bisect function can be applied with low as the start point, and high as the end point.
-    ///
-    /// Improvement:
-    /// For small group sizes, proceeding one-by-one to find the group change can be more efficient.
-    /// A statistics about the group sizes can be utilized to decide upon which approach to use, or even set the
-    /// step_size.
-    /// We will also need a benchmark implementation to prove the efficiency.
+    /// TODO: Explain the algorithm clearly.
+    // FUTURE WORK/ENHANCEMENTS:
+    // For small group sizes, proceeding one-by-one to find the group change can be more efficient.
+    // Statistics about previous group sizes can be utilized to choose the approach to use, or even
+    // to set the base step_size. We can also create a benchmark implementation to get insights about
+    // the crossover point.
     fn find_next_group_and_start_index(
         item_columns: &[ArrayRef],
         current_row_values: &[ScalarValue],
@@ -408,7 +391,7 @@ mod tests {
 
         let mut window_frame_groups = WindowFrameGroups::default();
         window_frame_groups
-            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .initialize::<PRECEDING>(DELTA, &arrays)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, 0);
         assert_eq!(window_frame_groups.window_frame_end_idx, 0);
@@ -416,7 +399,7 @@ mod tests {
         assert_eq!(window_frame_groups.group_start_indices.len(), 0);
 
         window_frame_groups
-            .extend_window_frame_if_necessary::<END, PRECEDING>(&arrays, DELTA)
+            .extend_window_frame_if_necessary::<PRECEDING>(&arrays, DELTA)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, 0);
         assert_eq!(window_frame_groups.window_frame_end_idx, 0);
@@ -462,7 +445,7 @@ mod tests {
 
         let mut window_frame_groups = WindowFrameGroups::default();
         window_frame_groups
-            .initialize::<START, FOLLOWING>(DELTA, &arrays)
+            .initialize::<FOLLOWING>(DELTA, &arrays)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, DELTA);
         assert_eq!(window_frame_groups.window_frame_end_idx, DELTA);
@@ -470,7 +453,7 @@ mod tests {
         assert_eq!(window_frame_groups.group_start_indices.len(), 0);
 
         window_frame_groups
-            .extend_window_frame_if_necessary::<END, FOLLOWING>(&arrays, DELTA)
+            .extend_window_frame_if_necessary::<FOLLOWING>(&arrays, DELTA)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, DELTA);
         assert_eq!(window_frame_groups.window_frame_end_idx, DELTA);
@@ -517,7 +500,7 @@ mod tests {
 
         let mut window_frame_groups = WindowFrameGroups::default();
         window_frame_groups
-            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .initialize::<PRECEDING>(DELTA, &arrays)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, 0);
         assert_eq!(window_frame_groups.window_frame_end_idx, 0);
@@ -525,7 +508,7 @@ mod tests {
         assert_eq!(window_frame_groups.group_start_indices.len(), 0);
 
         window_frame_groups
-            .extend_window_frame_if_necessary::<END, FOLLOWING>(&arrays, DELTA)
+            .extend_window_frame_if_necessary::<FOLLOWING>(&arrays, DELTA)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, 0);
         assert_eq!(window_frame_groups.window_frame_end_idx, NUM_ROWS as u64);
@@ -572,7 +555,7 @@ mod tests {
 
         let mut window_frame_groups = WindowFrameGroups::default();
         window_frame_groups
-            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .initialize::<PRECEDING>(DELTA, &arrays)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, 0);
         assert_eq!(window_frame_groups.window_frame_end_idx, 0);
@@ -580,7 +563,7 @@ mod tests {
         assert_eq!(window_frame_groups.group_start_indices.len(), 0);
 
         window_frame_groups
-            .extend_window_frame_if_necessary::<END, FOLLOWING>(&arrays, DELTA)
+            .extend_window_frame_if_necessary::<FOLLOWING>(&arrays, DELTA)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, 0);
         assert_eq!(window_frame_groups.window_frame_end_idx, 2 * DELTA + 1);
@@ -638,7 +621,7 @@ mod tests {
 
         let mut window_frame_groups = WindowFrameGroups::default();
         window_frame_groups
-            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .initialize::<PRECEDING>(DELTA, &arrays)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, 0);
         assert_eq!(window_frame_groups.window_frame_end_idx, 0);
@@ -684,7 +667,7 @@ mod tests {
 
         let mut window_frame_groups = WindowFrameGroups::default();
         window_frame_groups
-            .initialize::<START, PRECEDING>(DELTA, &arrays)
+            .initialize::<PRECEDING>(DELTA, &arrays)
             .unwrap();
         assert_eq!(window_frame_groups.window_frame_start_idx, 0);
         assert_eq!(window_frame_groups.window_frame_end_idx, 1);
diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs
index 86010349c171..45352c0409d9 100644
--- a/datafusion/physical-expr/src/window/window_expr.rs
+++ b/datafusion/physical-expr/src/window/window_expr.rs
@@ -133,9 +133,9 @@ pub trait WindowExpr: Send + Sync + Debug {
             match window_frame.units {
                 WindowFrameUnits::Range => {
                     let start = match &window_frame.start_bound {
-                        // UNBOUNDED PRECEDING
                         WindowFrameBound::Preceding(n) => {
                             if n.is_null() {
+                                // UNBOUNDED PRECEDING
                                 0
                             } else {
                                 calculate_index_of_row::<true, true>(

From 6bc0d7266eb2d43783cbd9857e6d9285d5d942a3 Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Mon, 7 Nov 2022 18:29:41 +0300
Subject: [PATCH 3/7] Change find_next_group_and_start_index to use an
 exponentially growing search algorithm

---
 datafusion/physical-expr/src/window/groups.rs | 78 ++++++-------------
 1 file changed, 22 insertions(+), 56 deletions(-)

diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs
index 06f5695298ec..90920513975b 100644
--- a/datafusion/physical-expr/src/window/groups.rs
+++ b/datafusion/physical-expr/src/window/groups.rs
@@ -114,19 +114,14 @@ impl WindowFrameGroups {
                     item_columns,
                     &group_values,
                     start_idx,
-                    None,
                 )?;
-            match next_group_and_start_index {
-                Some((group, index)) => {
-                    group_values = group;
-                    start_idx = index;
-                }
+            if let Some(entry) = next_group_and_start_index {
+                (group_values, start_idx) = entry;
+            } else {
                 // not enough groups to generate a window frame
-                None => {
-                    self.reached_end = true;
-                    self.window_frame_end_idx = self.window_frame_start_idx;
-                    return Ok(());
-                }
+                self.window_frame_end_idx = self.window_frame_start_idx;
+                self.reached_end = true;
+                return Ok(());
             }
         }
         self.group_start_indices
@@ -155,17 +150,13 @@ impl WindowFrameGroups {
                 item_columns,
                 &last_group_values.0,
                 last_group_values.1,
-                None,
             )?;
-        match next_group_and_start_index {
-            Some((group, index)) => {
-                self.group_start_indices.push_back((group, index));
-                self.window_frame_end_idx += 1;
-            }
+        if let Some(entry) = next_group_and_start_index {
+            self.group_start_indices.push_back(entry);
+            self.window_frame_end_idx += 1;
+        } else {
             // not enough groups to proceed
-            None => {
-                self.reached_end = true;
-            }
+            self.reached_end = true;
         }
         Ok(())
     }
@@ -244,32 +235,25 @@ impl WindowFrameGroups {
     }
 
     /// This function finds the next group and its start index for a given group and start index.
-    /// TODO: Explain the algorithm clearly.
-    // FUTURE WORK/ENHANCEMENTS:
-    // For small group sizes, proceeding one-by-one to find the group change can be more efficient.
-    // Statistics about previous group sizes can be utilized to choose the approach to use, or even
-    // to set the base step_size. We can also create a benchmark implementation to get insights about
-    // the crossover point.
+    /// It utilizes an exponentially growing step size to find the group boundary.
+    // TODO: For small group sizes, proceeding one-by-one to find the group change can be more efficient.
+    // Statistics about previous group sizes can be used to choose one-by-one vs. exponentially growing,
+    // or even to set the base step_size when exponentially growing. We can also create a benchmark
+    // implementation to get insights about the crossover point.
     fn find_next_group_and_start_index(
         item_columns: &[ArrayRef],
         current_row_values: &[ScalarValue],
         idx: usize,
-        step_size: Option<usize>,
     ) -> Result<Option<(Vec<ScalarValue>, usize)>> {
-        let step_size = if let Some(n) = step_size { n } else { 100 };
-        if step_size == 0 {
-            return Err(DataFusionError::Internal(
-                "Step size cannot be 0".to_string(),
-            ));
-        }
+        let mut step_size: usize = 1;
         let data_size: usize = item_columns
             .get(0)
             .ok_or_else(|| {
                 DataFusionError::Internal("Column array shouldn't be empty".to_string())
             })?
             .len();
-        let mut low: usize = idx;
-        let mut high: usize = cmp::min(low + step_size, data_size);
+        let mut low = idx;
+        let mut high = idx + step_size;
         while high < data_size {
             let val = item_columns
                 .iter()
@@ -277,7 +261,8 @@ impl WindowFrameGroups {
                 .collect::<Result<Vec<ScalarValue>>>()?;
             if val == current_row_values {
                 low = high;
-                high = cmp::min(low + step_size, data_size);
+                step_size *= 2;
+                high += step_size;
             } else {
                 break;
             }
@@ -287,7 +272,7 @@ impl WindowFrameGroups {
             current_row_values,
             |current, to_compare| Ok(current == to_compare),
             low,
-            high,
+            cmp::min(high, data_size),
         )?;
         if low == data_size {
             return Ok(None);
@@ -314,7 +299,6 @@ mod tests {
     fn test_find_next_group_and_start_index() {
         const NUM_ROWS: usize = 6;
         const NEXT_INDICES: [usize; 5] = [1, 2, 4, 4, 5];
-        const STEP_SIZE: usize = 1;
 
         let arrays: Vec<ArrayRef> = vec![
             Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 9., 10.])),
@@ -337,15 +321,6 @@ mod tests {
                 &arrays,
                 &current_row_values,
                 current_idx,
-                None,
-            )
-            .unwrap();
-            assert_eq!(res, Some((next_row_values.clone(), *next_idx)));
-            let res = WindowFrameGroups::find_next_group_and_start_index(
-                &arrays,
-                &current_row_values,
-                current_idx,
-                Some(STEP_SIZE),
             )
             .unwrap();
             assert_eq!(res, Some((next_row_values, *next_idx)));
@@ -360,15 +335,6 @@ mod tests {
             &arrays,
             &current_row_values,
             current_idx,
-            None,
-        )
-        .unwrap();
-        assert_eq!(res, None);
-        let res = WindowFrameGroups::find_next_group_and_start_index(
-            &arrays,
-            &current_row_values,
-            current_idx,
-            Some(STEP_SIZE),
         )
         .unwrap();
         assert_eq!(res, None);

From 81caf4be6a90b11d820cf0306400e4990c578304 Mon Sep 17 00:00:00 2001
From: Zeynep Arikoglu <zeynep.arikoglu@gmail.com>
Date: Tue, 8 Nov 2022 14:27:20 +0300
Subject: [PATCH 4/7] Removed the TODO after verification of correct
 implementation

---
 datafusion/physical-expr/src/window/groups.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs
index 90920513975b..8e0a28f1df64 100644
--- a/datafusion/physical-expr/src/window/groups.rs
+++ b/datafusion/physical-expr/src/window/groups.rs
@@ -75,7 +75,6 @@ impl WindowFrameGroups {
             return Ok(());
         }
         if self.group_start_indices.is_empty() {
-            // TODO: Do we need to check for self.window_frame_end_idx <= current_window_frame_end_idx?
             self.initialize_window_frame_start(item_columns)?;
         }
         while !self.reached_end

From 0d809f6a2220ae5a4d1daea48c5d4f96c8af57e7 Mon Sep 17 00:00:00 2001
From: Zeynep Arikoglu <zeynep.arikoglu@gmail.com>
Date: Wed, 9 Nov 2022 15:57:01 +0300
Subject: [PATCH 5/7] Window frame state capturing the state of the window
 frame calculations

---
 .../physical-expr/src/window/aggregate.rs     |    9 +-
 .../physical-expr/src/window/built_in.rs      |    8 +-
 datafusion/physical-expr/src/window/groups.rs |  660 -----------
 datafusion/physical-expr/src/window/mod.rs    |    2 +-
 .../physical-expr/src/window/window_expr.rs   |  322 +----
 .../src/window/window_frame_state.rs          | 1033 +++++++++++++++++
 6 files changed, 1043 insertions(+), 991 deletions(-)
 delete mode 100644 datafusion/physical-expr/src/window/groups.rs
 create mode 100644 datafusion/physical-expr/src/window/window_frame_state.rs

diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
index 1c41f2bb2ed7..0fc13c1f4683 100644
--- a/datafusion/physical-expr/src/window/aggregate.rs
+++ b/datafusion/physical-expr/src/window/aggregate.rs
@@ -31,7 +31,9 @@ use datafusion_common::ScalarValue;
 use datafusion_expr::WindowFrame;
 
 use crate::{expressions::PhysicalSortExpr, PhysicalExpr};
-use crate::{window::groups::WindowFrameGroups, window::WindowExpr, AggregateExpr};
+use crate::{window::WindowExpr, AggregateExpr};
+
+use super::window_frame_state::{WindowFrameState, WindowFrameStateImpl};
 
 /// A window expr that takes the form of an aggregate function
 #[derive(Debug)]
@@ -114,15 +116,14 @@ impl WindowExpr for AggregateWindowExpr {
                 .map(|v| v.slice(partition_range.start, length))
                 .collect::<Vec<_>>();
 
-            let mut window_frame_groups = WindowFrameGroups::default();
+            let mut window_frame_state = WindowFrameState::new(&window_frame);
             let mut last_range: (usize, usize) = (0, 0);
 
             // We iterate on each row to perform a running calculation.
             // First, cur_range is calculated, then it is compared with last_range.
             for i in 0..length {
-                let cur_range = self.calculate_range(
+                let cur_range = window_frame_state.calculate_range(
                     &window_frame,
-                    &mut window_frame_groups,
                     &slice_order_bys,
                     &sort_options,
                     length,
diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs
index 698bffdf92bd..04cbf4f523ad 100644
--- a/datafusion/physical-expr/src/window/built_in.rs
+++ b/datafusion/physical-expr/src/window/built_in.rs
@@ -17,6 +17,7 @@
 
 //! Physical exec for built-in window function expressions.
 
+use super::window_frame_state::{WindowFrameState, WindowFrameStateImpl};
 use super::BuiltInWindowFunctionExpr;
 use super::WindowExpr;
 use crate::{expressions::PhysicalSortExpr, PhysicalExpr};
@@ -31,8 +32,6 @@ use std::any::Any;
 use std::ops::Range;
 use std::sync::Arc;
 
-use crate::window::groups::WindowFrameGroups;
-
 /// A window expr that takes the form of a built in window function
 #[derive(Debug)]
 pub struct BuiltInWindowExpr {
@@ -115,12 +114,11 @@ impl WindowExpr for BuiltInWindowExpr {
                     .iter()
                     .map(|v| v.slice(partition_range.start, length))
                     .collect::<Vec<_>>();
-                let mut window_frame_groups = WindowFrameGroups::default();
+                let mut window_frame_state = WindowFrameState::new(&window_frame);
                 // We iterate on each row to calculate window frame range and and window function result
                 for idx in 0..length {
-                    let range = self.calculate_range(
+                    let range = window_frame_state.calculate_range(
                         &window_frame,
-                        &mut window_frame_groups,
                         &slice_order_bys,
                         &sort_options,
                         num_rows,
diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs
deleted file mode 100644
index 8e0a28f1df64..000000000000
--- a/datafusion/physical-expr/src/window/groups.rs
+++ /dev/null
@@ -1,660 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! This module provides utilities for GROUPS mode window frame index calculations.
-// In GROUPS mode, rows with duplicate sorting values are grouped together.
-// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode.
-// The syntax is as follows:
-//     GROUPS frame_start [ frame_exclusion ]
-//     GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ]
-// The optional frame_exclusion specifier is not yet supported.
-// The frame_start and frame_end parameters allow us to specify which rows the window
-// frame starts and ends with. They accept the following values:
-//    - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start.
-//    - offset PRECEDING: When used in frame_start, it refers to the first row of the group
-//                        that comes "offset" groups before the current group (i.e. the group
-//                        containing the current row). When used in frame_end, it refers to the
-//                        last row of the group that comes "offset" groups before the current group.
-//    - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing
-//                   the current row. When used in frame_end, it refers to the last row of the group
-//                   containing the current row.
-//    - offset FOLLOWING: When used in frame_start, it refers to the first row of the group
-//                        that comes "offset" groups after the current group (i.e. the group
-//                        containing the current row). When used in frame_end, it refers to the
-//                        last row of the group that comes "offset" groups after the current group.
-//    - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end.
-
-use arrow::array::ArrayRef;
-use std::cmp;
-use std::collections::VecDeque;
-
-use datafusion_common::bisect::find_bisect_point;
-use datafusion_common::{DataFusionError, Result, ScalarValue};
-
-/// This structure encapsulates all the state information we require as we
-/// scan groups of data while processing window frames.
-#[derive(Debug, Default)]
-pub struct WindowFrameGroups {
-    current_group_idx: u64,
-    group_start_indices: VecDeque<(Vec<ScalarValue>, usize)>,
-    previous_row_values: Option<Vec<ScalarValue>>,
-    reached_end: bool,
-    window_frame_end_idx: u64,
-    window_frame_start_idx: u64,
-}
-
-impl WindowFrameGroups {
-    fn extend_window_frame_if_necessary<const SEARCH_SIDE: bool>(
-        &mut self,
-        item_columns: &[ArrayRef],
-        delta: u64,
-    ) -> Result<()> {
-        let current_window_frame_end_idx = if !SEARCH_SIDE {
-            self.current_group_idx + delta + 1
-        } else if self.current_group_idx >= delta {
-            self.current_group_idx - delta + 1
-        } else {
-            0
-        };
-        if current_window_frame_end_idx == 0 {
-            // the end index of the window frame is still before the first index
-            return Ok(());
-        }
-        if self.group_start_indices.is_empty() {
-            self.initialize_window_frame_start(item_columns)?;
-        }
-        while !self.reached_end
-            && self.window_frame_end_idx <= current_window_frame_end_idx
-        {
-            self.advance_one_group::<SEARCH_SIDE>(item_columns)?;
-        }
-        Ok(())
-    }
-
-    fn initialize<const SEARCH_SIDE: bool>(
-        &mut self,
-        delta: u64,
-        item_columns: &[ArrayRef],
-    ) -> Result<()> {
-        if !SEARCH_SIDE {
-            self.window_frame_start_idx = self.current_group_idx + delta;
-            self.initialize_window_frame_start(item_columns)
-        } else if self.current_group_idx >= delta {
-            self.window_frame_start_idx = self.current_group_idx - delta;
-            self.initialize_window_frame_start(item_columns)
-        } else {
-            Ok(())
-        }
-    }
-
-    fn initialize_window_frame_start(&mut self, item_columns: &[ArrayRef]) -> Result<()> {
-        let mut group_values = item_columns
-            .iter()
-            .map(|col| ScalarValue::try_from_array(col, 0))
-            .collect::<Result<Vec<ScalarValue>>>()?;
-        let mut start_idx: usize = 0;
-        for _ in 0..self.window_frame_start_idx {
-            let next_group_and_start_index =
-                WindowFrameGroups::find_next_group_and_start_index(
-                    item_columns,
-                    &group_values,
-                    start_idx,
-                )?;
-            if let Some(entry) = next_group_and_start_index {
-                (group_values, start_idx) = entry;
-            } else {
-                // not enough groups to generate a window frame
-                self.window_frame_end_idx = self.window_frame_start_idx;
-                self.reached_end = true;
-                return Ok(());
-            }
-        }
-        self.group_start_indices
-            .push_back((group_values, start_idx));
-        self.window_frame_end_idx = self.window_frame_start_idx + 1;
-        Ok(())
-    }
-
-    fn initialized(&self) -> bool {
-        self.reached_end || !self.group_start_indices.is_empty()
-    }
-
-    /// This function advances the window frame by one group.
-    fn advance_one_group<const SEARCH_SIDE: bool>(
-        &mut self,
-        item_columns: &[ArrayRef],
-    ) -> Result<()> {
-        let last_group_values = self.group_start_indices.back();
-        let last_group_values = if let Some(values) = last_group_values {
-            values
-        } else {
-            return Ok(());
-        };
-        let next_group_and_start_index =
-            WindowFrameGroups::find_next_group_and_start_index(
-                item_columns,
-                &last_group_values.0,
-                last_group_values.1,
-            )?;
-        if let Some(entry) = next_group_and_start_index {
-            self.group_start_indices.push_back(entry);
-            self.window_frame_end_idx += 1;
-        } else {
-            // not enough groups to proceed
-            self.reached_end = true;
-        }
-        Ok(())
-    }
-
-    /// This function drops the oldest group from the window frame.
-    fn shift_one_group<const SEARCH_SIDE: bool>(&mut self, delta: u64) {
-        let current_window_frame_start_idx = if !SEARCH_SIDE {
-            self.current_group_idx + delta
-        } else if self.current_group_idx >= delta {
-            self.current_group_idx - delta
-        } else {
-            0
-        };
-        if current_window_frame_start_idx > self.window_frame_start_idx {
-            self.group_start_indices.pop_front();
-            self.window_frame_start_idx += 1;
-        }
-    }
-
-    /// This function is the main public interface of WindowFrameGroups. It is meant to be
-    /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE`
-    /// supplied as false and true, respectively).
-    pub fn process<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
-        &mut self,
-        current_row_values: &[ScalarValue],
-        delta: u64,
-        item_columns: &[ArrayRef],
-        length: usize,
-    ) -> Result<usize> {
-        if BISECT_SIDE {
-            // When we call this function to get the window frame start index, it tries to initialize
-            // the internal grouping state if this is not already done before. This initialization takes
-            // place only when the window frame start index is greater than or equal to zero. In this
-            // case, the current row is stored in group_start_indices, with row values as the group
-            // identifier and row index as the start index of the group.
-            if !self.initialized() {
-                self.initialize::<SEARCH_SIDE>(delta, item_columns)?;
-            }
-        } else if !self.reached_end {
-            // When we call this function to get the window frame end index, it extends the window
-            // frame one by one until the current row's window frame end index is reached by finding
-            // the next group.
-            self.extend_window_frame_if_necessary::<SEARCH_SIDE>(item_columns, delta)?;
-        }
-        // We keep track of previous row values, so that a group change can be identified.
-        // If there is a group change, the window frame is advanced and shifted by one group.
-        let group_change = match &self.previous_row_values {
-            None => false,
-            Some(values) => current_row_values != values,
-        };
-        if self.previous_row_values.is_none() || group_change {
-            self.previous_row_values = Some(current_row_values.to_vec());
-        }
-        if group_change {
-            self.current_group_idx += 1;
-            self.advance_one_group::<SEARCH_SIDE>(item_columns)?;
-            self.shift_one_group::<SEARCH_SIDE>(delta);
-        }
-        Ok(if self.group_start_indices.is_empty() {
-            if self.reached_end {
-                length
-            } else {
-                0
-            }
-        } else if BISECT_SIDE {
-            match self.group_start_indices.get(0) {
-                Some(&(_, idx)) => idx,
-                None => 0,
-            }
-        } else {
-            match (self.reached_end, self.group_start_indices.back()) {
-                (false, Some(&(_, idx))) => idx,
-                _ => length,
-            }
-        })
-    }
-
-    /// This function finds the next group and its start index for a given group and start index.
-    /// It utilizes an exponentially growing step size to find the group boundary.
-    // TODO: For small group sizes, proceeding one-by-one to find the group change can be more efficient.
-    // Statistics about previous group sizes can be used to choose one-by-one vs. exponentially growing,
-    // or even to set the base step_size when exponentially growing. We can also create a benchmark
-    // implementation to get insights about the crossover point.
-    fn find_next_group_and_start_index(
-        item_columns: &[ArrayRef],
-        current_row_values: &[ScalarValue],
-        idx: usize,
-    ) -> Result<Option<(Vec<ScalarValue>, usize)>> {
-        let mut step_size: usize = 1;
-        let data_size: usize = item_columns
-            .get(0)
-            .ok_or_else(|| {
-                DataFusionError::Internal("Column array shouldn't be empty".to_string())
-            })?
-            .len();
-        let mut low = idx;
-        let mut high = idx + step_size;
-        while high < data_size {
-            let val = item_columns
-                .iter()
-                .map(|arr| ScalarValue::try_from_array(arr, high))
-                .collect::<Result<Vec<ScalarValue>>>()?;
-            if val == current_row_values {
-                low = high;
-                step_size *= 2;
-                high += step_size;
-            } else {
-                break;
-            }
-        }
-        low = find_bisect_point(
-            item_columns,
-            current_row_values,
-            |current, to_compare| Ok(current == to_compare),
-            low,
-            cmp::min(high, data_size),
-        )?;
-        if low == data_size {
-            return Ok(None);
-        }
-        let val = item_columns
-            .iter()
-            .map(|arr| ScalarValue::try_from_array(arr, low))
-            .collect::<Result<Vec<ScalarValue>>>()?;
-        Ok(Some((val, low)))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use arrow::array::Float64Array;
-    use datafusion_common::ScalarValue;
-    use std::sync::Arc;
-
-    use crate::from_slice::FromSlice;
-
-    use super::*;
-
-    #[test]
-    fn test_find_next_group_and_start_index() {
-        const NUM_ROWS: usize = 6;
-        const NEXT_INDICES: [usize; 5] = [1, 2, 4, 4, 5];
-
-        let arrays: Vec<ArrayRef> = vec![
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 9., 10.])),
-            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 3., 4.0, 5.0])),
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 10., 11.0])),
-            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 8., 5., 0.0])),
-        ];
-        for (current_idx, next_idx) in NEXT_INDICES.iter().enumerate() {
-            let current_row_values = arrays
-                .iter()
-                .map(|col| ScalarValue::try_from_array(col, current_idx))
-                .collect::<Result<Vec<ScalarValue>>>()
-                .unwrap();
-            let next_row_values = arrays
-                .iter()
-                .map(|col| ScalarValue::try_from_array(col, *next_idx))
-                .collect::<Result<Vec<ScalarValue>>>()
-                .unwrap();
-            let res = WindowFrameGroups::find_next_group_and_start_index(
-                &arrays,
-                &current_row_values,
-                current_idx,
-            )
-            .unwrap();
-            assert_eq!(res, Some((next_row_values, *next_idx)));
-        }
-        let current_idx = NUM_ROWS - 1;
-        let current_row_values = arrays
-            .iter()
-            .map(|col| ScalarValue::try_from_array(col, current_idx))
-            .collect::<Result<Vec<ScalarValue>>>()
-            .unwrap();
-        let res = WindowFrameGroups::find_next_group_and_start_index(
-            &arrays,
-            &current_row_values,
-            current_idx,
-        )
-        .unwrap();
-        assert_eq!(res, None);
-    }
-
-    #[test]
-    fn test_window_frame_groups_preceding_huge_delta() {
-        const START: bool = true;
-        const END: bool = false;
-        const PRECEDING: bool = true;
-        const DELTA: u64 = 10;
-        const NUM_ROWS: usize = 5;
-
-        let arrays: Vec<ArrayRef> = vec![
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
-            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
-            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
-        ];
-
-        let mut window_frame_groups = WindowFrameGroups::default();
-        window_frame_groups
-            .initialize::<PRECEDING>(DELTA, &arrays)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
-        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
-        assert!(!window_frame_groups.reached_end);
-        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
-
-        window_frame_groups
-            .extend_window_frame_if_necessary::<PRECEDING>(&arrays, DELTA)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
-        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
-        assert!(!window_frame_groups.reached_end);
-        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
-
-        for idx in 0..NUM_ROWS {
-            let current_row_values = arrays
-                .iter()
-                .map(|col| ScalarValue::try_from_array(col, idx))
-                .collect::<Result<Vec<ScalarValue>>>()
-                .unwrap();
-            let start = window_frame_groups
-                .process::<START, PRECEDING>(
-                    &current_row_values,
-                    DELTA,
-                    &arrays,
-                    NUM_ROWS,
-                )
-                .unwrap();
-            assert_eq!(start, 0);
-            let end = window_frame_groups
-                .process::<END, PRECEDING>(&current_row_values, DELTA, &arrays, NUM_ROWS)
-                .unwrap();
-            assert_eq!(end, 0);
-        }
-    }
-
-    #[test]
-    fn test_window_frame_groups_following_huge_delta() {
-        const START: bool = true;
-        const END: bool = false;
-        const FOLLOWING: bool = false;
-        const DELTA: u64 = 10;
-        const NUM_ROWS: usize = 5;
-
-        let arrays: Vec<ArrayRef> = vec![
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
-            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
-            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
-        ];
-
-        let mut window_frame_groups = WindowFrameGroups::default();
-        window_frame_groups
-            .initialize::<FOLLOWING>(DELTA, &arrays)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, DELTA);
-        assert_eq!(window_frame_groups.window_frame_end_idx, DELTA);
-        assert!(window_frame_groups.reached_end);
-        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
-
-        window_frame_groups
-            .extend_window_frame_if_necessary::<FOLLOWING>(&arrays, DELTA)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, DELTA);
-        assert_eq!(window_frame_groups.window_frame_end_idx, DELTA);
-        assert!(window_frame_groups.reached_end);
-        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
-
-        for idx in 0..NUM_ROWS {
-            let current_row_values = arrays
-                .iter()
-                .map(|col| ScalarValue::try_from_array(col, idx))
-                .collect::<Result<Vec<ScalarValue>>>()
-                .unwrap();
-            let start = window_frame_groups
-                .process::<START, FOLLOWING>(
-                    &current_row_values,
-                    DELTA,
-                    &arrays,
-                    NUM_ROWS,
-                )
-                .unwrap();
-            assert_eq!(start, NUM_ROWS);
-            let end = window_frame_groups
-                .process::<END, FOLLOWING>(&current_row_values, DELTA, &arrays, NUM_ROWS)
-                .unwrap();
-            assert_eq!(end, NUM_ROWS);
-        }
-    }
-
-    #[test]
-    fn test_window_frame_groups_preceding_and_following_huge_delta() {
-        const START: bool = true;
-        const END: bool = false;
-        const FOLLOWING: bool = false;
-        const PRECEDING: bool = true;
-        const DELTA: u64 = 10;
-        const NUM_ROWS: usize = 5;
-
-        let arrays: Vec<ArrayRef> = vec![
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
-            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
-            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
-        ];
-
-        let mut window_frame_groups = WindowFrameGroups::default();
-        window_frame_groups
-            .initialize::<PRECEDING>(DELTA, &arrays)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
-        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
-        assert!(!window_frame_groups.reached_end);
-        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
-
-        window_frame_groups
-            .extend_window_frame_if_necessary::<FOLLOWING>(&arrays, DELTA)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
-        assert_eq!(window_frame_groups.window_frame_end_idx, NUM_ROWS as u64);
-        assert!(window_frame_groups.reached_end);
-        assert_eq!(window_frame_groups.group_start_indices.len(), NUM_ROWS);
-
-        for idx in 0..NUM_ROWS {
-            let current_row_values = arrays
-                .iter()
-                .map(|col| ScalarValue::try_from_array(col, idx))
-                .collect::<Result<Vec<ScalarValue>>>()
-                .unwrap();
-            let start = window_frame_groups
-                .process::<START, PRECEDING>(
-                    &current_row_values,
-                    DELTA,
-                    &arrays,
-                    NUM_ROWS,
-                )
-                .unwrap();
-            assert_eq!(start, 0);
-            let end = window_frame_groups
-                .process::<END, FOLLOWING>(&current_row_values, DELTA, &arrays, NUM_ROWS)
-                .unwrap();
-            assert_eq!(end, NUM_ROWS);
-        }
-    }
-
-    #[test]
-    fn test_window_frame_groups_preceding_and_following_1() {
-        const START: bool = true;
-        const END: bool = false;
-        const FOLLOWING: bool = false;
-        const PRECEDING: bool = true;
-        const DELTA: u64 = 1;
-        const NUM_ROWS: usize = 5;
-
-        let arrays: Vec<ArrayRef> = vec![
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
-            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
-            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
-        ];
-
-        let mut window_frame_groups = WindowFrameGroups::default();
-        window_frame_groups
-            .initialize::<PRECEDING>(DELTA, &arrays)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
-        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
-        assert!(!window_frame_groups.reached_end);
-        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
-
-        window_frame_groups
-            .extend_window_frame_if_necessary::<FOLLOWING>(&arrays, DELTA)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
-        assert_eq!(window_frame_groups.window_frame_end_idx, 2 * DELTA + 1);
-        assert!(!window_frame_groups.reached_end);
-        assert_eq!(
-            window_frame_groups.group_start_indices.len(),
-            2 * DELTA as usize + 1
-        );
-
-        for idx in 0..NUM_ROWS {
-            let current_row_values = arrays
-                .iter()
-                .map(|col| ScalarValue::try_from_array(col, idx))
-                .collect::<Result<Vec<ScalarValue>>>()
-                .unwrap();
-            let start_idx = if idx < DELTA as usize {
-                0
-            } else {
-                idx - DELTA as usize
-            };
-            let start = window_frame_groups
-                .process::<START, PRECEDING>(
-                    &current_row_values,
-                    DELTA,
-                    &arrays,
-                    NUM_ROWS,
-                )
-                .unwrap();
-            assert_eq!(start, start_idx);
-            let end_idx = if idx + 1 + DELTA as usize > NUM_ROWS {
-                NUM_ROWS
-            } else {
-                idx + 1 + DELTA as usize
-            };
-            let end = window_frame_groups
-                .process::<END, FOLLOWING>(&current_row_values, DELTA, &arrays, NUM_ROWS)
-                .unwrap();
-            assert_eq!(end, end_idx);
-        }
-    }
-
-    #[test]
-    fn test_window_frame_groups_preceding_1_and_unbounded_following() {
-        const START: bool = true;
-        const PRECEDING: bool = true;
-        const DELTA: u64 = 1;
-        const NUM_ROWS: usize = 5;
-
-        let arrays: Vec<ArrayRef> = vec![
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
-            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
-            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
-        ];
-
-        let mut window_frame_groups = WindowFrameGroups::default();
-        window_frame_groups
-            .initialize::<PRECEDING>(DELTA, &arrays)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
-        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
-        assert!(!window_frame_groups.reached_end);
-        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
-
-        for idx in 0..NUM_ROWS {
-            let current_row_values = arrays
-                .iter()
-                .map(|col| ScalarValue::try_from_array(col, idx))
-                .collect::<Result<Vec<ScalarValue>>>()
-                .unwrap();
-            let start_idx = if idx < DELTA as usize {
-                0
-            } else {
-                idx - DELTA as usize
-            };
-            let start = window_frame_groups
-                .process::<START, PRECEDING>(
-                    &current_row_values,
-                    DELTA,
-                    &arrays,
-                    NUM_ROWS,
-                )
-                .unwrap();
-            assert_eq!(start, start_idx);
-        }
-    }
-
-    #[test]
-    fn test_window_frame_groups_current_and_unbounded_following() {
-        const START: bool = true;
-        const PRECEDING: bool = true;
-        const DELTA: u64 = 0;
-        const NUM_ROWS: usize = 5;
-
-        let arrays: Vec<ArrayRef> = vec![
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
-            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
-            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
-            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
-        ];
-
-        let mut window_frame_groups = WindowFrameGroups::default();
-        window_frame_groups
-            .initialize::<PRECEDING>(DELTA, &arrays)
-            .unwrap();
-        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
-        assert_eq!(window_frame_groups.window_frame_end_idx, 1);
-        assert!(!window_frame_groups.reached_end);
-        assert_eq!(window_frame_groups.group_start_indices.len(), 1);
-
-        for idx in 0..NUM_ROWS {
-            let current_row_values = arrays
-                .iter()
-                .map(|col| ScalarValue::try_from_array(col, idx))
-                .collect::<Result<Vec<ScalarValue>>>()
-                .unwrap();
-
-            let start = window_frame_groups
-                .process::<START, PRECEDING>(
-                    &current_row_values,
-                    DELTA,
-                    &arrays,
-                    NUM_ROWS,
-                )
-                .unwrap();
-            assert_eq!(start, idx);
-        }
-    }
-}
diff --git a/datafusion/physical-expr/src/window/mod.rs b/datafusion/physical-expr/src/window/mod.rs
index 7de388b621cc..40ed658ee38a 100644
--- a/datafusion/physical-expr/src/window/mod.rs
+++ b/datafusion/physical-expr/src/window/mod.rs
@@ -19,13 +19,13 @@ mod aggregate;
 mod built_in;
 mod built_in_window_function_expr;
 pub(crate) mod cume_dist;
-mod groups;
 pub(crate) mod lead_lag;
 pub(crate) mod nth_value;
 pub(crate) mod partition_evaluator;
 pub(crate) mod rank;
 pub(crate) mod row_number;
 mod window_expr;
+mod window_frame_state;
 
 pub use aggregate::AggregateWindowExpr;
 pub use built_in::BuiltInWindowExpr;
diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs
index 45352c0409d9..67caba51dcab 100644
--- a/datafusion/physical-expr/src/window/window_expr.rs
+++ b/datafusion/physical-expr/src/window/window_expr.rs
@@ -20,19 +20,12 @@ use arrow::compute::kernels::partition::lexicographical_partition_ranges;
 use arrow::compute::kernels::sort::{SortColumn, SortOptions};
 use arrow::record_batch::RecordBatch;
 use arrow::{array::ArrayRef, datatypes::Field};
-use datafusion_common::bisect::bisect;
-use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_common::{DataFusionError, Result};
 use std::any::Any;
-use std::cmp::min;
 use std::fmt::Debug;
 use std::ops::Range;
 use std::sync::Arc;
 
-use datafusion_expr::WindowFrameBound;
-use datafusion_expr::{WindowFrame, WindowFrameUnits};
-
-use crate::window::groups::WindowFrameGroups;
-
 /// A window expression that:
 /// * knows its resulting field
 pub trait WindowExpr: Send + Sync + Debug {
@@ -117,317 +110,4 @@ pub trait WindowExpr: Send + Sync + Debug {
         sort_columns.extend(order_by_columns);
         Ok(sort_columns)
     }
-
-    /// We use start and end bounds to calculate current row's starting and ending range.
-    /// This function supports different modes.
-    fn calculate_range(
-        &self,
-        window_frame: &Option<Arc<WindowFrame>>,
-        window_frame_groups: &mut WindowFrameGroups,
-        range_columns: &[ArrayRef],
-        sort_options: &[SortOptions],
-        length: usize,
-        idx: usize,
-    ) -> Result<(usize, usize)> {
-        if let Some(window_frame) = window_frame {
-            match window_frame.units {
-                WindowFrameUnits::Range => {
-                    let start = match &window_frame.start_bound {
-                        WindowFrameBound::Preceding(n) => {
-                            if n.is_null() {
-                                // UNBOUNDED PRECEDING
-                                0
-                            } else {
-                                calculate_index_of_row::<true, true>(
-                                    range_columns,
-                                    sort_options,
-                                    idx,
-                                    Some(n),
-                                )?
-                            }
-                        }
-                        WindowFrameBound::CurrentRow => {
-                            if range_columns.is_empty() {
-                                0
-                            } else {
-                                calculate_index_of_row::<true, true>(
-                                    range_columns,
-                                    sort_options,
-                                    idx,
-                                    None,
-                                )?
-                            }
-                        }
-                        WindowFrameBound::Following(n) => {
-                            calculate_index_of_row::<true, false>(
-                                range_columns,
-                                sort_options,
-                                idx,
-                                Some(n),
-                            )?
-                        }
-                    };
-                    let end = match &window_frame.end_bound {
-                        WindowFrameBound::Preceding(n) => {
-                            calculate_index_of_row::<false, true>(
-                                range_columns,
-                                sort_options,
-                                idx,
-                                Some(n),
-                            )?
-                        }
-                        WindowFrameBound::CurrentRow => {
-                            if range_columns.is_empty() {
-                                length
-                            } else {
-                                calculate_index_of_row::<false, false>(
-                                    range_columns,
-                                    sort_options,
-                                    idx,
-                                    None,
-                                )?
-                            }
-                        }
-                        WindowFrameBound::Following(n) => {
-                            if n.is_null() {
-                                // UNBOUNDED FOLLOWING
-                                length
-                            } else {
-                                calculate_index_of_row::<false, false>(
-                                    range_columns,
-                                    sort_options,
-                                    idx,
-                                    Some(n),
-                                )?
-                            }
-                        }
-                    };
-                    Ok((start, end))
-                }
-                WindowFrameUnits::Rows => {
-                    let start = match window_frame.start_bound {
-                        // UNBOUNDED PRECEDING
-                        WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
-                        WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
-                            if idx >= n as usize {
-                                idx - n as usize
-                            } else {
-                                0
-                            }
-                        }
-                        WindowFrameBound::CurrentRow => idx,
-                        // UNBOUNDED FOLLOWING
-                        WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
-                            return Err(DataFusionError::Internal(format!(
-                                "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'",
-                                window_frame
-                            )))
-                        }
-                        WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
-                            min(idx + n as usize, length)
-                        }
-                        // ERRONEOUS FRAMES
-                        WindowFrameBound::Preceding(_)
-                        | WindowFrameBound::Following(_) => {
-                            return Err(DataFusionError::Internal(
-                                "Rows should be Uint".to_string(),
-                            ))
-                        }
-                    };
-                    let end = match window_frame.end_bound {
-                        // UNBOUNDED PRECEDING
-                        WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
-                            return Err(DataFusionError::Internal(format!(
-                                "Frame end cannot be UNBOUNDED PRECEDING '{:?}'",
-                                window_frame
-                            )))
-                        }
-                        WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
-                            if idx >= n as usize {
-                                idx - n as usize + 1
-                            } else {
-                                0
-                            }
-                        }
-                        WindowFrameBound::CurrentRow => idx + 1,
-                        // UNBOUNDED FOLLOWING
-                        WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
-                        WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
-                            min(idx + n as usize + 1, length)
-                        }
-                        // ERRONEOUS FRAMES
-                        WindowFrameBound::Preceding(_)
-                        | WindowFrameBound::Following(_) => {
-                            return Err(DataFusionError::Internal(
-                                "Rows should be Uint".to_string(),
-                            ))
-                        }
-                    };
-                    Ok((start, end))
-                }
-                WindowFrameUnits::Groups => {
-                    if range_columns.is_empty() {
-                        return Err(DataFusionError::Execution(
-                            "GROUPS mode requires an ORDER BY clause".to_string(),
-                        ));
-                    }
-                    let start = match window_frame.start_bound {
-                        // UNBOUNDED PRECEDING
-                        WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
-                        WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
-                            calculate_index_of_group::<true, true>(
-                                range_columns,
-                                window_frame_groups,
-                                idx,
-                                n,
-                                length,
-                            )?
-                        }
-                        WindowFrameBound::CurrentRow => {
-                            calculate_index_of_group::<true, true>(
-                                range_columns,
-                                window_frame_groups,
-                                idx,
-                                0,
-                                length,
-                            )?
-                        }
-                        WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
-                            calculate_index_of_group::<true, false>(
-                                range_columns,
-                                window_frame_groups,
-                                idx,
-                                n,
-                                length,
-                            )?
-                        }
-                        // UNBOUNDED FOLLOWING
-                        WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
-                            return Err(DataFusionError::Internal(format!(
-                                "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'",
-                                window_frame
-                            )))
-                        }
-                        // ERRONEOUS FRAMES
-                        WindowFrameBound::Preceding(_)
-                        | WindowFrameBound::Following(_) => {
-                            return Err(DataFusionError::Internal(
-                                "Groups should be Uint".to_string(),
-                            ))
-                        }
-                    };
-                    let end = match window_frame.end_bound {
-                        // UNBOUNDED PRECEDING
-                        WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
-                            return Err(DataFusionError::Internal(format!(
-                                "Frame end cannot be UNBOUNDED PRECEDING '{:?}'",
-                                window_frame
-                            )))
-                        }
-                        WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
-                            calculate_index_of_group::<false, true>(
-                                range_columns,
-                                window_frame_groups,
-                                idx,
-                                n,
-                                length,
-                            )?
-                        }
-                        WindowFrameBound::CurrentRow => {
-                            calculate_index_of_group::<false, false>(
-                                range_columns,
-                                window_frame_groups,
-                                idx,
-                                0,
-                                length,
-                            )?
-                        }
-                        WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
-                            calculate_index_of_group::<false, false>(
-                                range_columns,
-                                window_frame_groups,
-                                idx,
-                                n,
-                                length,
-                            )?
-                        }
-                        // UNBOUNDED FOLLOWING
-                        WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
-                        // ERRONEOUS FRAMES
-                        WindowFrameBound::Preceding(_)
-                        | WindowFrameBound::Following(_) => {
-                            return Err(DataFusionError::Internal(
-                                "Groups should be Uint".to_string(),
-                            ))
-                        }
-                    };
-                    Ok((start, end))
-                }
-            }
-        } else {
-            Ok((0, length))
-        }
-    }
-}
-
-fn calculate_index_of_row<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
-    range_columns: &[ArrayRef],
-    sort_options: &[SortOptions],
-    idx: usize,
-    delta: Option<&ScalarValue>,
-) -> Result<usize> {
-    let current_row_values = range_columns
-        .iter()
-        .map(|col| ScalarValue::try_from_array(col, idx))
-        .collect::<Result<Vec<ScalarValue>>>()?;
-    let end_range = if let Some(delta) = delta {
-        let is_descending: bool = sort_options
-            .first()
-            .ok_or_else(|| DataFusionError::Internal("Array is empty".to_string()))?
-            .descending;
-
-        current_row_values
-            .iter()
-            .map(|value| {
-                if value.is_null() {
-                    return Ok(value.clone());
-                }
-                if SEARCH_SIDE == is_descending {
-                    // TODO: Handle positive overflows
-                    value.add(delta)
-                } else if value.is_unsigned() && value < delta {
-                    // NOTE: This gets a polymorphic zero without having long coercion code for ScalarValue.
-                    //       If we decide to implement a "default" construction mechanism for ScalarValue,
-                    //       change the following statement to use that.
-                    value.sub(value)
-                } else {
-                    // TODO: Handle negative overflows
-                    value.sub(delta)
-                }
-            })
-            .collect::<Result<Vec<ScalarValue>>>()?
-    } else {
-        current_row_values
-    };
-    // `BISECT_SIDE` true means bisect_left, false means bisect_right
-    bisect::<BISECT_SIDE>(range_columns, &end_range, sort_options)
-}
-
-fn calculate_index_of_group<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
-    range_columns: &[ArrayRef],
-    window_frame_groups: &mut WindowFrameGroups,
-    idx: usize,
-    delta: u64,
-    length: usize,
-) -> Result<usize> {
-    let current_row_values = range_columns
-        .iter()
-        .map(|col| ScalarValue::try_from_array(col, idx))
-        .collect::<Result<Vec<ScalarValue>>>()?;
-    window_frame_groups.process::<BISECT_SIDE, SEARCH_SIDE>(
-        &current_row_values,
-        delta,
-        range_columns,
-        length,
-    )
 }
diff --git a/datafusion/physical-expr/src/window/window_frame_state.rs b/datafusion/physical-expr/src/window/window_frame_state.rs
new file mode 100644
index 000000000000..1e3ba4310bc2
--- /dev/null
+++ b/datafusion/physical-expr/src/window/window_frame_state.rs
@@ -0,0 +1,1033 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module provides utilities for window frame index calculations depending on the window frame mode:
+//! RANGE, ROWS, GROUPS.
+
+use arrow::array::ArrayRef;
+use arrow::compute::kernels::sort::SortOptions;
+use datafusion_common::bisect::{bisect, find_bisect_point};
+use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_expr::{WindowFrame, WindowFrameBound, WindowFrameUnits};
+use std::cmp::min;
+use std::collections::VecDeque;
+use std::fmt::Debug;
+use std::sync::Arc;
+
+pub trait WindowFrameStateImpl: Debug {
+    /// We use start and end bounds to calculate current row's starting and ending range.
+    fn calculate_range(
+        &mut self,
+        window_frame: &Option<Arc<WindowFrame>>,
+        range_columns: &[ArrayRef],
+        sort_options: &[SortOptions],
+        length: usize,
+        idx: usize,
+    ) -> Result<(usize, usize)>;
+}
+
+#[derive(Debug)]
+pub enum WindowFrameState {
+    Rows(WindowFrameStateRows),
+    Range(WindowFrameStateRange),
+    Groups(WindowFrameStateGroups),
+    NoFrame(WindowFrameStateNoFrame),
+}
+
+impl WindowFrameStateImpl for WindowFrameState {
+    fn calculate_range(
+        &mut self,
+        window_frame: &Option<Arc<WindowFrame>>,
+        range_columns: &[ArrayRef],
+        sort_options: &[SortOptions],
+        length: usize,
+        idx: usize,
+    ) -> Result<(usize, usize)> {
+        match *self {
+            WindowFrameState::Rows(ref mut frame) => frame.calculate_range(
+                window_frame,
+                range_columns,
+                sort_options,
+                length,
+                idx,
+            ),
+            WindowFrameState::Range(ref mut frame) => frame.calculate_range(
+                window_frame,
+                range_columns,
+                sort_options,
+                length,
+                idx,
+            ),
+            WindowFrameState::Groups(ref mut frame) => frame.calculate_range(
+                window_frame,
+                range_columns,
+                sort_options,
+                length,
+                idx,
+            ),
+            WindowFrameState::NoFrame(ref mut frame) => frame.calculate_range(
+                window_frame,
+                range_columns,
+                sort_options,
+                length,
+                idx,
+            ),
+        }
+    }
+}
+
+impl WindowFrameState {
+    pub fn new(window_frame: &Option<Arc<WindowFrame>>) -> WindowFrameState {
+        if let Some(window_frame) = window_frame {
+            match window_frame.units {
+                WindowFrameUnits::Rows => {
+                    WindowFrameState::Rows(WindowFrameStateRows::default())
+                }
+                WindowFrameUnits::Range => {
+                    WindowFrameState::Range(WindowFrameStateRange::default())
+                }
+                WindowFrameUnits::Groups => {
+                    WindowFrameState::Groups(WindowFrameStateGroups::default())
+                }
+            }
+        } else {
+            WindowFrameState::NoFrame(WindowFrameStateNoFrame::default())
+        }
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct WindowFrameStateNoFrame {}
+
+impl WindowFrameStateImpl for WindowFrameStateNoFrame {
+    fn calculate_range(
+        &mut self,
+        _window_frame: &Option<Arc<WindowFrame>>,
+        _range_columns: &[ArrayRef],
+        _sort_options: &[SortOptions],
+        length: usize,
+        _idx: usize,
+    ) -> Result<(usize, usize)> {
+        Ok((0, length))
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct WindowFrameStateRows {}
+
+impl WindowFrameStateImpl for WindowFrameStateRows {
+    fn calculate_range(
+        &mut self,
+        window_frame: &Option<Arc<WindowFrame>>,
+        _range_columns: &[ArrayRef],
+        _sort_options: &[SortOptions],
+        length: usize,
+        idx: usize,
+    ) -> Result<(usize, usize)> {
+        if let Some(window_frame) = window_frame {
+            let start = match window_frame.start_bound {
+                // UNBOUNDED PRECEDING
+                WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
+                WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
+                    if idx >= n as usize {
+                        idx - n as usize
+                    } else {
+                        0
+                    }
+                }
+                WindowFrameBound::CurrentRow => idx,
+                // UNBOUNDED FOLLOWING
+                WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
+                    return Err(DataFusionError::Internal(format!(
+                        "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'",
+                        window_frame
+                    )))
+                }
+                WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
+                    min(idx + n as usize, length)
+                }
+                // ERRONEOUS FRAMES
+                WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
+                    return Err(DataFusionError::Internal(
+                        "Rows should be Uint".to_string(),
+                    ))
+                }
+            };
+            let end = match window_frame.end_bound {
+                // UNBOUNDED PRECEDING
+                WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
+                    return Err(DataFusionError::Internal(format!(
+                        "Frame end cannot be UNBOUNDED PRECEDING '{:?}'",
+                        window_frame
+                    )))
+                }
+                WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
+                    if idx >= n as usize {
+                        idx - n as usize + 1
+                    } else {
+                        0
+                    }
+                }
+                WindowFrameBound::CurrentRow => idx + 1,
+                // UNBOUNDED FOLLOWING
+                WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
+                WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
+                    min(idx + n as usize + 1, length)
+                }
+                // ERRONEOUS FRAMES
+                WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
+                    return Err(DataFusionError::Internal(
+                        "Rows should be Uint".to_string(),
+                    ))
+                }
+            };
+            Ok((start, end))
+        } else {
+            Ok((0, length))
+        }
+    }
+}
+
+/// This structure encapsulates all the state information we require as we
+/// scan ranges of data while processing window frames.
+#[derive(Debug, Default)]
+pub struct WindowFrameStateRange {}
+
+impl WindowFrameStateImpl for WindowFrameStateRange {
+    fn calculate_range(
+        &mut self,
+        window_frame: &Option<Arc<WindowFrame>>,
+        range_columns: &[ArrayRef],
+        sort_options: &[SortOptions],
+        length: usize,
+        idx: usize,
+    ) -> Result<(usize, usize)> {
+        if let Some(window_frame) = window_frame {
+            let start = match &window_frame.start_bound {
+                WindowFrameBound::Preceding(n) => {
+                    if n.is_null() {
+                        // UNBOUNDED PRECEDING
+                        0
+                    } else {
+                        self.calculate_index_of_row::<true, true>(
+                            range_columns,
+                            sort_options,
+                            idx,
+                            Some(n),
+                        )?
+                    }
+                }
+                WindowFrameBound::CurrentRow => {
+                    if range_columns.is_empty() {
+                        0
+                    } else {
+                        self.calculate_index_of_row::<true, true>(
+                            range_columns,
+                            sort_options,
+                            idx,
+                            None,
+                        )?
+                    }
+                }
+                WindowFrameBound::Following(n) => self
+                    .calculate_index_of_row::<true, false>(
+                        range_columns,
+                        sort_options,
+                        idx,
+                        Some(n),
+                    )?,
+            };
+            let end = match &window_frame.end_bound {
+                WindowFrameBound::Preceding(n) => self
+                    .calculate_index_of_row::<false, true>(
+                        range_columns,
+                        sort_options,
+                        idx,
+                        Some(n),
+                    )?,
+                WindowFrameBound::CurrentRow => {
+                    if range_columns.is_empty() {
+                        length
+                    } else {
+                        self.calculate_index_of_row::<false, false>(
+                            range_columns,
+                            sort_options,
+                            idx,
+                            None,
+                        )?
+                    }
+                }
+                WindowFrameBound::Following(n) => {
+                    if n.is_null() {
+                        // UNBOUNDED FOLLOWING
+                        length
+                    } else {
+                        self.calculate_index_of_row::<false, false>(
+                            range_columns,
+                            sort_options,
+                            idx,
+                            Some(n),
+                        )?
+                    }
+                }
+            };
+            Ok((start, end))
+        } else {
+            Ok((0, length))
+        }
+    }
+}
+
+impl WindowFrameStateRange {
+    fn calculate_index_of_row<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
+        &mut self,
+        range_columns: &[ArrayRef],
+        sort_options: &[SortOptions],
+        idx: usize,
+        delta: Option<&ScalarValue>,
+    ) -> Result<usize> {
+        let current_row_values = range_columns
+            .iter()
+            .map(|col| ScalarValue::try_from_array(col, idx))
+            .collect::<Result<Vec<ScalarValue>>>()?;
+        let end_range = if let Some(delta) = delta {
+            let is_descending: bool = sort_options
+                .first()
+                .ok_or_else(|| DataFusionError::Internal("Array is empty".to_string()))?
+                .descending;
+
+            current_row_values
+                .iter()
+                .map(|value| {
+                    if value.is_null() {
+                        return Ok(value.clone());
+                    }
+                    if SEARCH_SIDE == is_descending {
+                        // TODO: Handle positive overflows
+                        value.add(delta)
+                    } else if value.is_unsigned() && value < delta {
+                        // NOTE: This gets a polymorphic zero without having long coercion code for ScalarValue.
+                        //       If we decide to implement a "default" construction mechanism for ScalarValue,
+                        //       change the following statement to use that.
+                        value.sub(value)
+                    } else {
+                        // TODO: Handle negative overflows
+                        value.sub(delta)
+                    }
+                })
+                .collect::<Result<Vec<ScalarValue>>>()?
+        } else {
+            current_row_values
+        };
+        // `BISECT_SIDE` true means bisect_left, false means bisect_right
+        bisect::<BISECT_SIDE>(range_columns, &end_range, sort_options)
+    }
+}
+
+/// In GROUPS mode, rows with duplicate sorting values are grouped together.
+/// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode.
+/// The syntax is as follows:
+///     GROUPS frame_start [ frame_exclusion ]
+///     GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ]
+/// The optional frame_exclusion specifier is not yet supported.
+/// The frame_start and frame_end parameters allow us to specify which rows the window
+/// frame starts and ends with. They accept the following values:
+///    - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start.
+///    - offset PRECEDING: When used in frame_start, it refers to the first row of the group
+///                        that comes "offset" groups before the current group (i.e. the group
+///                        containing the current row). When used in frame_end, it refers to the
+///                        last row of the group that comes "offset" groups before the current group.
+///    - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing
+///                   the current row. When used in frame_end, it refers to the last row of the group
+///                   containing the current row.
+///    - offset FOLLOWING: When used in frame_start, it refers to the first row of the group
+///                        that comes "offset" groups after the current group (i.e. the group
+///                        containing the current row). When used in frame_end, it refers to the
+///                        last row of the group that comes "offset" groups after the current group.
+///    - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end.
+
+/// This structure encapsulates all the state information we require as we
+/// scan groups of data while processing window frames.
+
+#[derive(Debug, Default)]
+pub struct WindowFrameStateGroups {
+    current_group_idx: u64,
+    group_start_indices: VecDeque<(Vec<ScalarValue>, usize)>,
+    previous_row_values: Option<Vec<ScalarValue>>,
+    reached_end: bool,
+    window_frame_end_idx: u64,
+    window_frame_start_idx: u64,
+}
+
+impl WindowFrameStateImpl for WindowFrameStateGroups {
+    fn calculate_range(
+        &mut self,
+        window_frame: &Option<Arc<WindowFrame>>,
+        range_columns: &[ArrayRef],
+        _sort_options: &[SortOptions],
+        length: usize,
+        idx: usize,
+    ) -> Result<(usize, usize)> {
+        if range_columns.is_empty() {
+            return Err(DataFusionError::Execution(
+                "GROUPS mode requires an ORDER BY clause".to_string(),
+            ));
+        }
+        if let Some(window_frame) = window_frame {
+            let start = match window_frame.start_bound {
+                // UNBOUNDED PRECEDING
+                WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
+                WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self
+                    .calculate_index_of_group::<true, true>(
+                        range_columns,
+                        idx,
+                        n,
+                        length,
+                    )?,
+                WindowFrameBound::CurrentRow => self
+                    .calculate_index_of_group::<true, true>(
+                        range_columns,
+                        idx,
+                        0,
+                        length,
+                    )?,
+                WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self
+                    .calculate_index_of_group::<true, false>(
+                        range_columns,
+                        idx,
+                        n,
+                        length,
+                    )?,
+                // UNBOUNDED FOLLOWING
+                WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
+                    return Err(DataFusionError::Internal(format!(
+                        "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'",
+                        window_frame
+                    )))
+                }
+                // ERRONEOUS FRAMES
+                WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
+                    return Err(DataFusionError::Internal(
+                        "Groups should be Uint".to_string(),
+                    ))
+                }
+            };
+            let end = match window_frame.end_bound {
+                // UNBOUNDED PRECEDING
+                WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
+                    return Err(DataFusionError::Internal(format!(
+                        "Frame end cannot be UNBOUNDED PRECEDING '{:?}'",
+                        window_frame
+                    )))
+                }
+                WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self
+                    .calculate_index_of_group::<false, true>(
+                        range_columns,
+                        idx,
+                        n,
+                        length,
+                    )?,
+                WindowFrameBound::CurrentRow => self
+                    .calculate_index_of_group::<false, false>(
+                        range_columns,
+                        idx,
+                        0,
+                        length,
+                    )?,
+                WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self
+                    .calculate_index_of_group::<false, false>(
+                        range_columns,
+                        idx,
+                        n,
+                        length,
+                    )?,
+                // UNBOUNDED FOLLOWING
+                WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
+                // ERRONEOUS FRAMES
+                WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
+                    return Err(DataFusionError::Internal(
+                        "Groups should be Uint".to_string(),
+                    ))
+                }
+            };
+            Ok((start, end))
+        } else {
+            Ok((0, length))
+        }
+    }
+}
+
+impl WindowFrameStateGroups {
+    /// This function is the main public interface of WindowFrameStateGroups. It is meant to be
+    /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE`
+    /// supplied as false and true, respectively).
+    pub fn calculate_index_of_group<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
+        &mut self,
+        range_columns: &[ArrayRef],
+        idx: usize,
+        delta: u64,
+        length: usize,
+    ) -> Result<usize> {
+        let current_row_values = range_columns
+            .iter()
+            .map(|col| ScalarValue::try_from_array(col, idx))
+            .collect::<Result<Vec<ScalarValue>>>()?;
+
+        if BISECT_SIDE {
+            // When we call this function to get the window frame start index, it tries to initialize
+            // the internal grouping state if this is not already done before. This initialization takes
+            // place only when the window frame start index is greater than or equal to zero. In this
+            // case, the current row is stored in group_start_indices, with row values as the group
+            // identifier and row index as the start index of the group.
+            if !self.initialized() {
+                self.initialize::<SEARCH_SIDE>(delta, range_columns)?;
+            }
+        } else if !self.reached_end {
+            // When we call this function to get the window frame end index, it extends the window
+            // frame one by one until the current row's window frame end index is reached by finding
+            // the next group.
+            self.extend_window_frame_if_necessary::<SEARCH_SIDE>(range_columns, delta)?;
+        }
+        // We keep track of previous row values, so that a group change can be identified.
+        // If there is a group change, the window frame is advanced and shifted by one group.
+        let group_change = match &self.previous_row_values {
+            None => false,
+            Some(values) => &current_row_values != values,
+        };
+        if self.previous_row_values.is_none() || group_change {
+            self.previous_row_values = Some(current_row_values);
+        }
+        if group_change {
+            self.current_group_idx += 1;
+            self.advance_one_group::<SEARCH_SIDE>(range_columns)?;
+            self.shift_one_group::<SEARCH_SIDE>(delta);
+        }
+        Ok(if self.group_start_indices.is_empty() {
+            if self.reached_end {
+                length
+            } else {
+                0
+            }
+        } else if BISECT_SIDE {
+            match self.group_start_indices.get(0) {
+                Some(&(_, idx)) => idx,
+                None => 0,
+            }
+        } else {
+            match (self.reached_end, self.group_start_indices.back()) {
+                (false, Some(&(_, idx))) => idx,
+                _ => length,
+            }
+        })
+    }
+
+    fn extend_window_frame_if_necessary<const SEARCH_SIDE: bool>(
+        &mut self,
+        range_columns: &[ArrayRef],
+        delta: u64,
+    ) -> Result<()> {
+        let current_window_frame_end_idx = if !SEARCH_SIDE {
+            self.current_group_idx + delta + 1
+        } else if self.current_group_idx >= delta {
+            self.current_group_idx - delta + 1
+        } else {
+            0
+        };
+        if current_window_frame_end_idx == 0 {
+            // the end index of the window frame is still before the first index
+            return Ok(());
+        }
+        if self.group_start_indices.is_empty() {
+            self.initialize_window_frame_start(range_columns)?;
+        }
+        while !self.reached_end
+            && self.window_frame_end_idx <= current_window_frame_end_idx
+        {
+            self.advance_one_group::<SEARCH_SIDE>(range_columns)?;
+        }
+        Ok(())
+    }
+
+    fn initialize<const SEARCH_SIDE: bool>(
+        &mut self,
+        delta: u64,
+        range_columns: &[ArrayRef],
+    ) -> Result<()> {
+        if !SEARCH_SIDE {
+            self.window_frame_start_idx = self.current_group_idx + delta;
+            self.initialize_window_frame_start(range_columns)
+        } else if self.current_group_idx >= delta {
+            self.window_frame_start_idx = self.current_group_idx - delta;
+            self.initialize_window_frame_start(range_columns)
+        } else {
+            Ok(())
+        }
+    }
+
+    fn initialize_window_frame_start(
+        &mut self,
+        range_columns: &[ArrayRef],
+    ) -> Result<()> {
+        let mut group_values = range_columns
+            .iter()
+            .map(|col| ScalarValue::try_from_array(col, 0))
+            .collect::<Result<Vec<ScalarValue>>>()?;
+        let mut start_idx: usize = 0;
+        for _ in 0..self.window_frame_start_idx {
+            let next_group_and_start_index =
+                WindowFrameStateGroups::find_next_group_and_start_index(
+                    range_columns,
+                    &group_values,
+                    start_idx,
+                )?;
+            if let Some(entry) = next_group_and_start_index {
+                (group_values, start_idx) = entry;
+            } else {
+                // not enough groups to generate a window frame
+                self.window_frame_end_idx = self.window_frame_start_idx;
+                self.reached_end = true;
+                return Ok(());
+            }
+        }
+        self.group_start_indices
+            .push_back((group_values, start_idx));
+        self.window_frame_end_idx = self.window_frame_start_idx + 1;
+        Ok(())
+    }
+
+    fn initialized(&self) -> bool {
+        self.reached_end || !self.group_start_indices.is_empty()
+    }
+
+    /// This function advances the window frame by one group.
+    fn advance_one_group<const SEARCH_SIDE: bool>(
+        &mut self,
+        range_columns: &[ArrayRef],
+    ) -> Result<()> {
+        let last_group_values = self.group_start_indices.back();
+        let last_group_values = if let Some(values) = last_group_values {
+            values
+        } else {
+            return Ok(());
+        };
+        let next_group_and_start_index =
+            WindowFrameStateGroups::find_next_group_and_start_index(
+                range_columns,
+                &last_group_values.0,
+                last_group_values.1,
+            )?;
+        if let Some(entry) = next_group_and_start_index {
+            self.group_start_indices.push_back(entry);
+            self.window_frame_end_idx += 1;
+        } else {
+            // not enough groups to proceed
+            self.reached_end = true;
+        }
+        Ok(())
+    }
+
+    /// This function drops the oldest group from the window frame.
+    fn shift_one_group<const SEARCH_SIDE: bool>(&mut self, delta: u64) {
+        let current_window_frame_start_idx = if !SEARCH_SIDE {
+            self.current_group_idx + delta
+        } else if self.current_group_idx >= delta {
+            self.current_group_idx - delta
+        } else {
+            0
+        };
+        if current_window_frame_start_idx > self.window_frame_start_idx {
+            self.group_start_indices.pop_front();
+            self.window_frame_start_idx += 1;
+        }
+    }
+
+    /// This function finds the next group and its start index for a given group and start index.
+    /// It utilizes an exponentially growing step size to find the group boundary.
+    // TODO: For small group sizes, proceeding one-by-one to find the group change can be more efficient.
+    // Statistics about previous group sizes can be used to choose one-by-one vs. exponentially growing,
+    // or even to set the base step_size when exponentially growing. We can also create a benchmark
+    // implementation to get insights about the crossover point.
+    fn find_next_group_and_start_index(
+        range_columns: &[ArrayRef],
+        current_row_values: &[ScalarValue],
+        idx: usize,
+    ) -> Result<Option<(Vec<ScalarValue>, usize)>> {
+        let mut step_size: usize = 1;
+        let data_size: usize = range_columns
+            .get(0)
+            .ok_or_else(|| {
+                DataFusionError::Internal("Column array shouldn't be empty".to_string())
+            })?
+            .len();
+        let mut low = idx;
+        let mut high = idx + step_size;
+        while high < data_size {
+            let val = range_columns
+                .iter()
+                .map(|arr| ScalarValue::try_from_array(arr, high))
+                .collect::<Result<Vec<ScalarValue>>>()?;
+            if val == current_row_values {
+                low = high;
+                step_size *= 2;
+                high += step_size;
+            } else {
+                break;
+            }
+        }
+        low = find_bisect_point(
+            range_columns,
+            current_row_values,
+            |current, to_compare| Ok(current == to_compare),
+            low,
+            min(high, data_size),
+        )?;
+        if low == data_size {
+            return Ok(None);
+        }
+        let val = range_columns
+            .iter()
+            .map(|arr| ScalarValue::try_from_array(arr, low))
+            .collect::<Result<Vec<ScalarValue>>>()?;
+        Ok(Some((val, low)))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::array::Float64Array;
+    use datafusion_common::ScalarValue;
+    use std::sync::Arc;
+
+    use crate::from_slice::FromSlice;
+
+    use super::*;
+
+    #[test]
+    fn test_find_next_group_and_start_index() {
+        const NUM_ROWS: usize = 6;
+        const NEXT_INDICES: [usize; 5] = [1, 2, 4, 4, 5];
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 3., 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 8., 5., 0.0])),
+        ];
+        for (current_idx, next_idx) in NEXT_INDICES.iter().enumerate() {
+            let current_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, current_idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+            let next_row_values = arrays
+                .iter()
+                .map(|col| ScalarValue::try_from_array(col, *next_idx))
+                .collect::<Result<Vec<ScalarValue>>>()
+                .unwrap();
+            let res = WindowFrameStateGroups::find_next_group_and_start_index(
+                &arrays,
+                &current_row_values,
+                current_idx,
+            )
+            .unwrap();
+            assert_eq!(res, Some((next_row_values, *next_idx)));
+        }
+        let current_idx = NUM_ROWS - 1;
+        let current_row_values = arrays
+            .iter()
+            .map(|col| ScalarValue::try_from_array(col, current_idx))
+            .collect::<Result<Vec<ScalarValue>>>()
+            .unwrap();
+        let res = WindowFrameStateGroups::find_next_group_and_start_index(
+            &arrays,
+            &current_row_values,
+            current_idx,
+        )
+        .unwrap();
+        assert_eq!(res, None);
+    }
+
+    #[test]
+    fn test_window_frame_groups_preceding_huge_delta() {
+        const START: bool = true;
+        const END: bool = false;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 10;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameStateGroups::default();
+        window_frame_groups
+            .initialize::<PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        window_frame_groups
+            .extend_window_frame_if_necessary::<PRECEDING>(&arrays, DELTA)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        for idx in 0..NUM_ROWS {
+            let start = window_frame_groups
+                .calculate_index_of_group::<START, PRECEDING>(
+                    &arrays, idx, DELTA, NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, 0);
+            let end = window_frame_groups
+                .calculate_index_of_group::<END, PRECEDING>(&arrays, idx, DELTA, NUM_ROWS)
+                .unwrap();
+            assert_eq!(end, 0);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_following_huge_delta() {
+        const START: bool = true;
+        const END: bool = false;
+        const FOLLOWING: bool = false;
+        const DELTA: u64 = 10;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameStateGroups::default();
+        window_frame_groups
+            .initialize::<FOLLOWING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, DELTA);
+        assert_eq!(window_frame_groups.window_frame_end_idx, DELTA);
+        assert!(window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        window_frame_groups
+            .extend_window_frame_if_necessary::<FOLLOWING>(&arrays, DELTA)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, DELTA);
+        assert_eq!(window_frame_groups.window_frame_end_idx, DELTA);
+        assert!(window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        for idx in 0..NUM_ROWS {
+            let start = window_frame_groups
+                .calculate_index_of_group::<START, FOLLOWING>(
+                    &arrays, idx, DELTA, NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, NUM_ROWS);
+            let end = window_frame_groups
+                .calculate_index_of_group::<END, FOLLOWING>(&arrays, idx, DELTA, NUM_ROWS)
+                .unwrap();
+            assert_eq!(end, NUM_ROWS);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_preceding_and_following_huge_delta() {
+        const START: bool = true;
+        const END: bool = false;
+        const FOLLOWING: bool = false;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 10;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameStateGroups::default();
+        window_frame_groups
+            .initialize::<PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        window_frame_groups
+            .extend_window_frame_if_necessary::<FOLLOWING>(&arrays, DELTA)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, NUM_ROWS as u64);
+        assert!(window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), NUM_ROWS);
+
+        for idx in 0..NUM_ROWS {
+            let start = window_frame_groups
+                .calculate_index_of_group::<START, PRECEDING>(
+                    &arrays, idx, DELTA, NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, 0);
+            let end = window_frame_groups
+                .calculate_index_of_group::<END, FOLLOWING>(&arrays, idx, DELTA, NUM_ROWS)
+                .unwrap();
+            assert_eq!(end, NUM_ROWS);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_preceding_and_following_1() {
+        const START: bool = true;
+        const END: bool = false;
+        const FOLLOWING: bool = false;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 1;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameStateGroups::default();
+        window_frame_groups
+            .initialize::<PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        window_frame_groups
+            .extend_window_frame_if_necessary::<FOLLOWING>(&arrays, DELTA)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 2 * DELTA + 1);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(
+            window_frame_groups.group_start_indices.len(),
+            2 * DELTA as usize + 1
+        );
+
+        for idx in 0..NUM_ROWS {
+            let start_idx = if idx < DELTA as usize {
+                0
+            } else {
+                idx - DELTA as usize
+            };
+            let start = window_frame_groups
+                .calculate_index_of_group::<START, PRECEDING>(
+                    &arrays, idx, DELTA, NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, start_idx);
+            let end_idx = if idx + 1 + DELTA as usize > NUM_ROWS {
+                NUM_ROWS
+            } else {
+                idx + 1 + DELTA as usize
+            };
+            let end = window_frame_groups
+                .calculate_index_of_group::<END, FOLLOWING>(&arrays, idx, DELTA, NUM_ROWS)
+                .unwrap();
+            assert_eq!(end, end_idx);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_preceding_1_and_unbounded_following() {
+        const START: bool = true;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 1;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameStateGroups::default();
+        window_frame_groups
+            .initialize::<PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 0);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 0);
+
+        for idx in 0..NUM_ROWS {
+            let start_idx = if idx < DELTA as usize {
+                0
+            } else {
+                idx - DELTA as usize
+            };
+            let start = window_frame_groups
+                .calculate_index_of_group::<START, PRECEDING>(
+                    &arrays, idx, DELTA, NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, start_idx);
+        }
+    }
+
+    #[test]
+    fn test_window_frame_groups_current_and_unbounded_following() {
+        const START: bool = true;
+        const PRECEDING: bool = true;
+        const DELTA: u64 = 0;
+        const NUM_ROWS: usize = 5;
+
+        let arrays: Vec<ArrayRef> = vec![
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])),
+            Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])),
+            Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])),
+            Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])),
+        ];
+
+        let mut window_frame_groups = WindowFrameStateGroups::default();
+        window_frame_groups
+            .initialize::<PRECEDING>(DELTA, &arrays)
+            .unwrap();
+        assert_eq!(window_frame_groups.window_frame_start_idx, 0);
+        assert_eq!(window_frame_groups.window_frame_end_idx, 1);
+        assert!(!window_frame_groups.reached_end);
+        assert_eq!(window_frame_groups.group_start_indices.len(), 1);
+
+        for idx in 0..NUM_ROWS {
+            let start = window_frame_groups
+                .calculate_index_of_group::<START, PRECEDING>(
+                    &arrays, idx, DELTA, NUM_ROWS,
+                )
+                .unwrap();
+            assert_eq!(start, idx);
+        }
+    }
+}

From b757d626c7dcc2bf16d1ec3d5526ebaa93eacd8b Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Wed, 9 Nov 2022 16:39:39 +0300
Subject: [PATCH 6/7] Do not use unnecessary traits and structs for window
 frame states

---
 .../physical-expr/src/window/aggregate.rs     |   2 +-
 .../physical-expr/src/window/built_in.rs      |   2 +-
 .../src/window/window_frame_state.rs          | 124 +++++++-----------
 3 files changed, 49 insertions(+), 79 deletions(-)

diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
index 0fc13c1f4683..672bd539036d 100644
--- a/datafusion/physical-expr/src/window/aggregate.rs
+++ b/datafusion/physical-expr/src/window/aggregate.rs
@@ -33,7 +33,7 @@ use datafusion_expr::WindowFrame;
 use crate::{expressions::PhysicalSortExpr, PhysicalExpr};
 use crate::{window::WindowExpr, AggregateExpr};
 
-use super::window_frame_state::{WindowFrameState, WindowFrameStateImpl};
+use super::window_frame_state::WindowFrameState;
 
 /// A window expr that takes the form of an aggregate function
 #[derive(Debug)]
diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs
index 04cbf4f523ad..126b77fcf2f6 100644
--- a/datafusion/physical-expr/src/window/built_in.rs
+++ b/datafusion/physical-expr/src/window/built_in.rs
@@ -17,7 +17,7 @@
 
 //! Physical exec for built-in window function expressions.
 
-use super::window_frame_state::{WindowFrameState, WindowFrameStateImpl};
+use super::window_frame_state::WindowFrameState;
 use super::BuiltInWindowFunctionExpr;
 use super::WindowExpr;
 use crate::{expressions::PhysicalSortExpr, PhysicalExpr};
diff --git a/datafusion/physical-expr/src/window/window_frame_state.rs b/datafusion/physical-expr/src/window/window_frame_state.rs
index 1e3ba4310bc2..931ee232aac3 100644
--- a/datafusion/physical-expr/src/window/window_frame_state.rs
+++ b/datafusion/physical-expr/src/window/window_frame_state.rs
@@ -28,28 +28,18 @@ use std::collections::VecDeque;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-pub trait WindowFrameStateImpl: Debug {
-    /// We use start and end bounds to calculate current row's starting and ending range.
-    fn calculate_range(
-        &mut self,
-        window_frame: &Option<Arc<WindowFrame>>,
-        range_columns: &[ArrayRef],
-        sort_options: &[SortOptions],
-        length: usize,
-        idx: usize,
-    ) -> Result<(usize, usize)>;
-}
-
+/// This object stores the window frame state for use in incremental calculations.
 #[derive(Debug)]
 pub enum WindowFrameState {
     Rows(WindowFrameStateRows),
     Range(WindowFrameStateRange),
     Groups(WindowFrameStateGroups),
-    NoFrame(WindowFrameStateNoFrame),
+    Default,
 }
 
-impl WindowFrameStateImpl for WindowFrameState {
-    fn calculate_range(
+impl WindowFrameState {
+    /// This function calculates beginning/ending indices for the frame of the current row.
+    pub fn calculate_range(
         &mut self,
         window_frame: &Option<Arc<WindowFrame>>,
         range_columns: &[ArrayRef],
@@ -79,19 +69,12 @@ impl WindowFrameStateImpl for WindowFrameState {
                 length,
                 idx,
             ),
-            WindowFrameState::NoFrame(ref mut frame) => frame.calculate_range(
-                window_frame,
-                range_columns,
-                sort_options,
-                length,
-                idx,
-            ),
+            WindowFrameState::Default => Ok((0, length)),
         }
     }
-}
 
-impl WindowFrameState {
-    pub fn new(window_frame: &Option<Arc<WindowFrame>>) -> WindowFrameState {
+    /// Create a new default state for the given window frame.
+    pub fn new(window_frame: &Option<Arc<WindowFrame>>) -> Self {
         if let Some(window_frame) = window_frame {
             match window_frame.units {
                 WindowFrameUnits::Rows => {
@@ -105,31 +88,17 @@ impl WindowFrameState {
                 }
             }
         } else {
-            WindowFrameState::NoFrame(WindowFrameStateNoFrame::default())
+            WindowFrameState::Default
         }
     }
 }
 
-#[derive(Debug, Default)]
-pub struct WindowFrameStateNoFrame {}
-
-impl WindowFrameStateImpl for WindowFrameStateNoFrame {
-    fn calculate_range(
-        &mut self,
-        _window_frame: &Option<Arc<WindowFrame>>,
-        _range_columns: &[ArrayRef],
-        _sort_options: &[SortOptions],
-        length: usize,
-        _idx: usize,
-    ) -> Result<(usize, usize)> {
-        Ok((0, length))
-    }
-}
-
+/// This empty struct reflects the stateless nature of row-based window frames.
 #[derive(Debug, Default)]
 pub struct WindowFrameStateRows {}
 
-impl WindowFrameStateImpl for WindowFrameStateRows {
+impl WindowFrameStateRows {
+    /// This function calculates beginning/ending indices for the frame of the current row.
     fn calculate_range(
         &mut self,
         window_frame: &Option<Arc<WindowFrame>>,
@@ -203,11 +172,13 @@ impl WindowFrameStateImpl for WindowFrameStateRows {
 }
 
 /// This structure encapsulates all the state information we require as we
-/// scan ranges of data while processing window frames.
+/// scan ranges of data while processing window frames. Currently we calculate
+/// things from scratch every time, but we will make this incremental in the future.
 #[derive(Debug, Default)]
 pub struct WindowFrameStateRange {}
 
-impl WindowFrameStateImpl for WindowFrameStateRange {
+impl WindowFrameStateRange {
+    /// This function calculates beginning/ending indices for the frame of the current row.
     fn calculate_range(
         &mut self,
         window_frame: &Option<Arc<WindowFrame>>,
@@ -290,9 +261,10 @@ impl WindowFrameStateImpl for WindowFrameStateRange {
             Ok((0, length))
         }
     }
-}
 
-impl WindowFrameStateRange {
+    /// This function does the heavy lifting when finding range boundaries. It is meant to be
+    /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE`
+    /// supplied as false and true, respectively).
     fn calculate_index_of_row<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
         &mut self,
         range_columns: &[ArrayRef],
@@ -338,31 +310,30 @@ impl WindowFrameStateRange {
     }
 }
 
-/// In GROUPS mode, rows with duplicate sorting values are grouped together.
-/// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode.
-/// The syntax is as follows:
-///     GROUPS frame_start [ frame_exclusion ]
-///     GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ]
-/// The optional frame_exclusion specifier is not yet supported.
-/// The frame_start and frame_end parameters allow us to specify which rows the window
-/// frame starts and ends with. They accept the following values:
-///    - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start.
-///    - offset PRECEDING: When used in frame_start, it refers to the first row of the group
-///                        that comes "offset" groups before the current group (i.e. the group
-///                        containing the current row). When used in frame_end, it refers to the
-///                        last row of the group that comes "offset" groups before the current group.
-///    - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing
-///                   the current row. When used in frame_end, it refers to the last row of the group
-///                   containing the current row.
-///    - offset FOLLOWING: When used in frame_start, it refers to the first row of the group
-///                        that comes "offset" groups after the current group (i.e. the group
-///                        containing the current row). When used in frame_end, it refers to the
-///                        last row of the group that comes "offset" groups after the current group.
-///    - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end.
-
-/// This structure encapsulates all the state information we require as we
-/// scan groups of data while processing window frames.
-
+// In GROUPS mode, rows with duplicate sorting values are grouped together.
+// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode.
+// The syntax is as follows:
+//     GROUPS frame_start [ frame_exclusion ]
+//     GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ]
+// The optional frame_exclusion specifier is not yet supported.
+// The frame_start and frame_end parameters allow us to specify which rows the window
+// frame starts and ends with. They accept the following values:
+//    - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start.
+//    - offset PRECEDING: When used in frame_start, it refers to the first row of the group
+//                        that comes "offset" groups before the current group (i.e. the group
+//                        containing the current row). When used in frame_end, it refers to the
+//                        last row of the group that comes "offset" groups before the current group.
+//    - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing
+//                   the current row. When used in frame_end, it refers to the last row of the group
+//                   containing the current row.
+//    - offset FOLLOWING: When used in frame_start, it refers to the first row of the group
+//                        that comes "offset" groups after the current group (i.e. the group
+//                        containing the current row). When used in frame_end, it refers to the
+//                        last row of the group that comes "offset" groups after the current group.
+//    - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end.
+
+// This structure encapsulates all the state information we require as we
+// scan groups of data while processing window frames.
 #[derive(Debug, Default)]
 pub struct WindowFrameStateGroups {
     current_group_idx: u64,
@@ -373,7 +344,8 @@ pub struct WindowFrameStateGroups {
     window_frame_start_idx: u64,
 }
 
-impl WindowFrameStateImpl for WindowFrameStateGroups {
+impl WindowFrameStateGroups {
+    /// This function calculates beginning/ending indices for the frame of the current row.
     fn calculate_range(
         &mut self,
         window_frame: &Option<Arc<WindowFrame>>,
@@ -469,13 +441,11 @@ impl WindowFrameStateImpl for WindowFrameStateGroups {
             Ok((0, length))
         }
     }
-}
 
-impl WindowFrameStateGroups {
-    /// This function is the main public interface of WindowFrameStateGroups. It is meant to be
+    /// This function does the heavy lifting when finding group boundaries. It is meant to be
     /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE`
     /// supplied as false and true, respectively).
-    pub fn calculate_index_of_group<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
+    fn calculate_index_of_group<const BISECT_SIDE: bool, const SEARCH_SIDE: bool>(
         &mut self,
         range_columns: &[ArrayRef],
         idx: usize,

From 1e952890b33d90b3bfad9ea019b9a6eb5af13750 Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Thu, 10 Nov 2022 10:50:55 +0300
Subject: [PATCH 7/7] Refactor to avoid carrying the window frame object around

---
 .../physical-expr/src/window/aggregate.rs     |   7 +-
 .../physical-expr/src/window/built_in.rs      |   7 +-
 .../src/window/window_frame_state.rs          | 469 ++++++++----------
 3 files changed, 225 insertions(+), 258 deletions(-)

diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
index 672bd539036d..69b0812c1aa6 100644
--- a/datafusion/physical-expr/src/window/aggregate.rs
+++ b/datafusion/physical-expr/src/window/aggregate.rs
@@ -33,7 +33,7 @@ use datafusion_expr::WindowFrame;
 use crate::{expressions::PhysicalSortExpr, PhysicalExpr};
 use crate::{window::WindowExpr, AggregateExpr};
 
-use super::window_frame_state::WindowFrameState;
+use super::window_frame_state::WindowFrameContext;
 
 /// A window expr that takes the form of an aggregate function
 #[derive(Debug)]
@@ -116,14 +116,13 @@ impl WindowExpr for AggregateWindowExpr {
                 .map(|v| v.slice(partition_range.start, length))
                 .collect::<Vec<_>>();
 
-            let mut window_frame_state = WindowFrameState::new(&window_frame);
+            let mut window_frame_ctx = WindowFrameContext::new(&window_frame);
             let mut last_range: (usize, usize) = (0, 0);
 
             // We iterate on each row to perform a running calculation.
             // First, cur_range is calculated, then it is compared with last_range.
             for i in 0..length {
-                let cur_range = window_frame_state.calculate_range(
-                    &window_frame,
+                let cur_range = window_frame_ctx.calculate_range(
                     &slice_order_bys,
                     &sort_options,
                     length,
diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs
index 126b77fcf2f6..da551d5347d4 100644
--- a/datafusion/physical-expr/src/window/built_in.rs
+++ b/datafusion/physical-expr/src/window/built_in.rs
@@ -17,7 +17,7 @@
 
 //! Physical exec for built-in window function expressions.
 
-use super::window_frame_state::WindowFrameState;
+use super::window_frame_state::WindowFrameContext;
 use super::BuiltInWindowFunctionExpr;
 use super::WindowExpr;
 use crate::{expressions::PhysicalSortExpr, PhysicalExpr};
@@ -114,11 +114,10 @@ impl WindowExpr for BuiltInWindowExpr {
                     .iter()
                     .map(|v| v.slice(partition_range.start, length))
                     .collect::<Vec<_>>();
-                let mut window_frame_state = WindowFrameState::new(&window_frame);
+                let mut window_frame_ctx = WindowFrameContext::new(&window_frame);
                 // We iterate on each row to calculate window frame range and and window function result
                 for idx in 0..length {
-                    let range = window_frame_state.calculate_range(
-                        &window_frame,
+                    let range = window_frame_ctx.calculate_range(
                         &slice_order_bys,
                         &sort_options,
                         num_rows,
diff --git a/datafusion/physical-expr/src/window/window_frame_state.rs b/datafusion/physical-expr/src/window/window_frame_state.rs
index 931ee232aac3..6ccab1d8df48 100644
--- a/datafusion/physical-expr/src/window/window_frame_state.rs
+++ b/datafusion/physical-expr/src/window/window_frame_state.rs
@@ -30,144 +30,137 @@ use std::sync::Arc;
 
 /// This object stores the window frame state for use in incremental calculations.
 #[derive(Debug)]
-pub enum WindowFrameState {
-    Rows(WindowFrameStateRows),
-    Range(WindowFrameStateRange),
-    Groups(WindowFrameStateGroups),
+pub enum WindowFrameContext<'a> {
+    // ROWS-frames are inherently stateless:
+    Rows(&'a Arc<WindowFrame>),
+    // RANGE-frames will soon have a stateful implementation that is more efficient than a stateless one:
+    Range {
+        window_frame: &'a Arc<WindowFrame>,
+        state: WindowFrameStateRange,
+    },
+    // GROUPS-frames have a stateful implementation that is more efficient than a stateless one:
+    Groups {
+        window_frame: &'a Arc<WindowFrame>,
+        state: WindowFrameStateGroups,
+    },
     Default,
 }
 
-impl WindowFrameState {
+impl<'a> WindowFrameContext<'a> {
+    /// Create a new default state for the given window frame.
+    pub fn new(window_frame: &'a Option<Arc<WindowFrame>>) -> Self {
+        if let Some(window_frame) = window_frame {
+            match window_frame.units {
+                WindowFrameUnits::Rows => WindowFrameContext::Rows(window_frame),
+                WindowFrameUnits::Range => WindowFrameContext::Range {
+                    window_frame,
+                    state: WindowFrameStateRange::default(),
+                },
+                WindowFrameUnits::Groups => WindowFrameContext::Groups {
+                    window_frame,
+                    state: WindowFrameStateGroups::default(),
+                },
+            }
+        } else {
+            WindowFrameContext::Default
+        }
+    }
+
     /// This function calculates beginning/ending indices for the frame of the current row.
     pub fn calculate_range(
         &mut self,
-        window_frame: &Option<Arc<WindowFrame>>,
         range_columns: &[ArrayRef],
         sort_options: &[SortOptions],
         length: usize,
         idx: usize,
     ) -> Result<(usize, usize)> {
         match *self {
-            WindowFrameState::Rows(ref mut frame) => frame.calculate_range(
+            WindowFrameContext::Rows(window_frame) => {
+                Self::calculate_range_rows(window_frame, length, idx)
+            }
+            WindowFrameContext::Range {
                 window_frame,
-                range_columns,
-                sort_options,
-                length,
-                idx,
-            ),
-            WindowFrameState::Range(ref mut frame) => frame.calculate_range(
+                ref mut state,
+            } => state.calculate_range(
                 window_frame,
                 range_columns,
                 sort_options,
                 length,
                 idx,
             ),
-            WindowFrameState::Groups(ref mut frame) => frame.calculate_range(
+            WindowFrameContext::Groups {
+                window_frame,
+                ref mut state,
+            } => state.calculate_range(
                 window_frame,
                 range_columns,
                 sort_options,
                 length,
                 idx,
             ),
-            WindowFrameState::Default => Ok((0, length)),
+            WindowFrameContext::Default => Ok((0, length)),
         }
     }
 
-    /// Create a new default state for the given window frame.
-    pub fn new(window_frame: &Option<Arc<WindowFrame>>) -> Self {
-        if let Some(window_frame) = window_frame {
-            match window_frame.units {
-                WindowFrameUnits::Rows => {
-                    WindowFrameState::Rows(WindowFrameStateRows::default())
-                }
-                WindowFrameUnits::Range => {
-                    WindowFrameState::Range(WindowFrameStateRange::default())
-                }
-                WindowFrameUnits::Groups => {
-                    WindowFrameState::Groups(WindowFrameStateGroups::default())
-                }
-            }
-        } else {
-            WindowFrameState::Default
-        }
-    }
-}
-
-/// This empty struct reflects the stateless nature of row-based window frames.
-#[derive(Debug, Default)]
-pub struct WindowFrameStateRows {}
-
-impl WindowFrameStateRows {
     /// This function calculates beginning/ending indices for the frame of the current row.
-    fn calculate_range(
-        &mut self,
-        window_frame: &Option<Arc<WindowFrame>>,
-        _range_columns: &[ArrayRef],
-        _sort_options: &[SortOptions],
+    fn calculate_range_rows(
+        window_frame: &Arc<WindowFrame>,
         length: usize,
         idx: usize,
     ) -> Result<(usize, usize)> {
-        if let Some(window_frame) = window_frame {
-            let start = match window_frame.start_bound {
-                // UNBOUNDED PRECEDING
-                WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
-                WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
-                    if idx >= n as usize {
-                        idx - n as usize
-                    } else {
-                        0
-                    }
-                }
-                WindowFrameBound::CurrentRow => idx,
-                // UNBOUNDED FOLLOWING
-                WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
-                    return Err(DataFusionError::Internal(format!(
-                        "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'",
-                        window_frame
-                    )))
-                }
-                WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
-                    min(idx + n as usize, length)
-                }
-                // ERRONEOUS FRAMES
-                WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
-                    return Err(DataFusionError::Internal(
-                        "Rows should be Uint".to_string(),
-                    ))
+        let start = match window_frame.start_bound {
+            // UNBOUNDED PRECEDING
+            WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
+            WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
+                if idx >= n as usize {
+                    idx - n as usize
+                } else {
+                    0
                 }
-            };
-            let end = match window_frame.end_bound {
-                // UNBOUNDED PRECEDING
-                WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
-                    return Err(DataFusionError::Internal(format!(
-                        "Frame end cannot be UNBOUNDED PRECEDING '{:?}'",
-                        window_frame
-                    )))
-                }
-                WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
-                    if idx >= n as usize {
-                        idx - n as usize + 1
-                    } else {
-                        0
-                    }
-                }
-                WindowFrameBound::CurrentRow => idx + 1,
-                // UNBOUNDED FOLLOWING
-                WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
-                WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
-                    min(idx + n as usize + 1, length)
-                }
-                // ERRONEOUS FRAMES
-                WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
-                    return Err(DataFusionError::Internal(
-                        "Rows should be Uint".to_string(),
-                    ))
+            }
+            WindowFrameBound::CurrentRow => idx,
+            // UNBOUNDED FOLLOWING
+            WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
+                return Err(DataFusionError::Internal(format!(
+                    "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'",
+                    window_frame
+                )))
+            }
+            WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
+                min(idx + n as usize, length)
+            }
+            // ERRONEOUS FRAMES
+            WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
+                return Err(DataFusionError::Internal("Rows should be Uint".to_string()))
+            }
+        };
+        let end = match window_frame.end_bound {
+            // UNBOUNDED PRECEDING
+            WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
+                return Err(DataFusionError::Internal(format!(
+                    "Frame end cannot be UNBOUNDED PRECEDING '{:?}'",
+                    window_frame
+                )))
+            }
+            WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
+                if idx >= n as usize {
+                    idx - n as usize + 1
+                } else {
+                    0
                 }
-            };
-            Ok((start, end))
-        } else {
-            Ok((0, length))
-        }
+            }
+            WindowFrameBound::CurrentRow => idx + 1,
+            // UNBOUNDED FOLLOWING
+            WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
+            WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
+                min(idx + n as usize + 1, length)
+            }
+            // ERRONEOUS FRAMES
+            WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
+                return Err(DataFusionError::Internal("Rows should be Uint".to_string()))
+            }
+        };
+        Ok((start, end))
     }
 }
 
@@ -181,85 +174,81 @@ impl WindowFrameStateRange {
     /// This function calculates beginning/ending indices for the frame of the current row.
     fn calculate_range(
         &mut self,
-        window_frame: &Option<Arc<WindowFrame>>,
+        window_frame: &Arc<WindowFrame>,
         range_columns: &[ArrayRef],
         sort_options: &[SortOptions],
         length: usize,
         idx: usize,
     ) -> Result<(usize, usize)> {
-        if let Some(window_frame) = window_frame {
-            let start = match &window_frame.start_bound {
-                WindowFrameBound::Preceding(n) => {
-                    if n.is_null() {
-                        // UNBOUNDED PRECEDING
-                        0
-                    } else {
-                        self.calculate_index_of_row::<true, true>(
-                            range_columns,
-                            sort_options,
-                            idx,
-                            Some(n),
-                        )?
-                    }
+        let start = match window_frame.start_bound {
+            WindowFrameBound::Preceding(ref n) => {
+                if n.is_null() {
+                    // UNBOUNDED PRECEDING
+                    0
+                } else {
+                    self.calculate_index_of_row::<true, true>(
+                        range_columns,
+                        sort_options,
+                        idx,
+                        Some(n),
+                    )?
                 }
-                WindowFrameBound::CurrentRow => {
-                    if range_columns.is_empty() {
-                        0
-                    } else {
-                        self.calculate_index_of_row::<true, true>(
-                            range_columns,
-                            sort_options,
-                            idx,
-                            None,
-                        )?
-                    }
+            }
+            WindowFrameBound::CurrentRow => {
+                if range_columns.is_empty() {
+                    0
+                } else {
+                    self.calculate_index_of_row::<true, true>(
+                        range_columns,
+                        sort_options,
+                        idx,
+                        None,
+                    )?
                 }
-                WindowFrameBound::Following(n) => self
-                    .calculate_index_of_row::<true, false>(
+            }
+            WindowFrameBound::Following(ref n) => self
+                .calculate_index_of_row::<true, false>(
+                    range_columns,
+                    sort_options,
+                    idx,
+                    Some(n),
+                )?,
+        };
+        let end = match window_frame.end_bound {
+            WindowFrameBound::Preceding(ref n) => self
+                .calculate_index_of_row::<false, true>(
+                    range_columns,
+                    sort_options,
+                    idx,
+                    Some(n),
+                )?,
+            WindowFrameBound::CurrentRow => {
+                if range_columns.is_empty() {
+                    length
+                } else {
+                    self.calculate_index_of_row::<false, false>(
                         range_columns,
                         sort_options,
                         idx,
-                        Some(n),
-                    )?,
-            };
-            let end = match &window_frame.end_bound {
-                WindowFrameBound::Preceding(n) => self
-                    .calculate_index_of_row::<false, true>(
+                        None,
+                    )?
+                }
+            }
+            WindowFrameBound::Following(ref n) => {
+                if n.is_null() {
+                    // UNBOUNDED FOLLOWING
+                    length
+                } else {
+                    self.calculate_index_of_row::<false, false>(
                         range_columns,
                         sort_options,
                         idx,
                         Some(n),
-                    )?,
-                WindowFrameBound::CurrentRow => {
-                    if range_columns.is_empty() {
-                        length
-                    } else {
-                        self.calculate_index_of_row::<false, false>(
-                            range_columns,
-                            sort_options,
-                            idx,
-                            None,
-                        )?
-                    }
-                }
-                WindowFrameBound::Following(n) => {
-                    if n.is_null() {
-                        // UNBOUNDED FOLLOWING
-                        length
-                    } else {
-                        self.calculate_index_of_row::<false, false>(
-                            range_columns,
-                            sort_options,
-                            idx,
-                            Some(n),
-                        )?
-                    }
+                    )?
                 }
-            };
-            Ok((start, end))
-        } else {
-            Ok((0, length))
-        }
+            }
+        };
+        Ok((start, end))
     }
 
     /// This function does the heavy lifting when finding range boundaries. It is meant to be
@@ -348,7 +337,7 @@ impl WindowFrameStateGroups {
     /// This function calculates beginning/ending indices for the frame of the current row.
     fn calculate_range(
         &mut self,
-        window_frame: &Option<Arc<WindowFrame>>,
+        window_frame: &Arc<WindowFrame>,
         range_columns: &[ArrayRef],
         _sort_options: &[SortOptions],
         length: usize,
@@ -359,87 +348,67 @@ impl WindowFrameStateGroups {
                 "GROUPS mode requires an ORDER BY clause".to_string(),
             ));
         }
-        if let Some(window_frame) = window_frame {
-            let start = match window_frame.start_bound {
-                // UNBOUNDED PRECEDING
-                WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
-                WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self
-                    .calculate_index_of_group::<true, true>(
-                        range_columns,
-                        idx,
-                        n,
-                        length,
-                    )?,
-                WindowFrameBound::CurrentRow => self
-                    .calculate_index_of_group::<true, true>(
-                        range_columns,
-                        idx,
-                        0,
-                        length,
-                    )?,
-                WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self
-                    .calculate_index_of_group::<true, false>(
-                        range_columns,
-                        idx,
-                        n,
-                        length,
-                    )?,
-                // UNBOUNDED FOLLOWING
-                WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
-                    return Err(DataFusionError::Internal(format!(
-                        "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'",
-                        window_frame
-                    )))
-                }
-                // ERRONEOUS FRAMES
-                WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
-                    return Err(DataFusionError::Internal(
-                        "Groups should be Uint".to_string(),
-                    ))
-                }
-            };
-            let end = match window_frame.end_bound {
-                // UNBOUNDED PRECEDING
-                WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
-                    return Err(DataFusionError::Internal(format!(
-                        "Frame end cannot be UNBOUNDED PRECEDING '{:?}'",
-                        window_frame
-                    )))
-                }
-                WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self
-                    .calculate_index_of_group::<false, true>(
-                        range_columns,
-                        idx,
-                        n,
-                        length,
-                    )?,
-                WindowFrameBound::CurrentRow => self
-                    .calculate_index_of_group::<false, false>(
-                        range_columns,
-                        idx,
-                        0,
-                        length,
-                    )?,
-                WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self
-                    .calculate_index_of_group::<false, false>(
-                        range_columns,
-                        idx,
-                        n,
-                        length,
-                    )?,
-                // UNBOUNDED FOLLOWING
-                WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
-                // ERRONEOUS FRAMES
-                WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
-                    return Err(DataFusionError::Internal(
-                        "Groups should be Uint".to_string(),
-                    ))
-                }
-            };
-            Ok((start, end))
-        } else {
-            Ok((0, length))
-        }
+        let start = match window_frame.start_bound {
+            // UNBOUNDED PRECEDING
+            WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
+            WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self
+                .calculate_index_of_group::<true, true>(range_columns, idx, n, length)?,
+            WindowFrameBound::CurrentRow => self.calculate_index_of_group::<true, true>(
+                range_columns,
+                idx,
+                0,
+                length,
+            )?,
+            WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self
+                .calculate_index_of_group::<true, false>(range_columns, idx, n, length)?,
+            // UNBOUNDED FOLLOWING
+            WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
+                return Err(DataFusionError::Internal(format!(
+                    "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'",
+                    window_frame
+                )))
+            }
+            // ERRONEOUS FRAMES
+            WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
+                return Err(DataFusionError::Internal(
+                    "Groups should be Uint".to_string(),
+                ))
+            }
+        };
+        let end = match window_frame.end_bound {
+            // UNBOUNDED PRECEDING
+            WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
+                return Err(DataFusionError::Internal(format!(
+                    "Frame end cannot be UNBOUNDED PRECEDING '{:?}'",
+                    window_frame
+                )))
+            }
+            WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self
+                .calculate_index_of_group::<false, true>(range_columns, idx, n, length)?,
+            WindowFrameBound::CurrentRow => self
+                .calculate_index_of_group::<false, false>(
+                    range_columns,
+                    idx,
+                    0,
+                    length,
+                )?,
+            WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self
+                .calculate_index_of_group::<false, false>(
+                    range_columns,
+                    idx,
+                    n,
+                    length,
+                )?,
+            // UNBOUNDED FOLLOWING
+            WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
+            // ERRONEOUS FRAMES
+            WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
+                return Err(DataFusionError::Internal(
+                    "Groups should be Uint".to_string(),
+                ))
+            }
+        };
+        Ok((start, end))
     }
 
     /// This function does the heavy lifting when finding group boundaries. It is meant to be