From fe5547d4dbf9417cb2e7cd90d945f2b19f3c1d37 Mon Sep 17 00:00:00 2001 From: Zeynep Arikoglu Date: Sun, 6 Nov 2022 17:54:35 +0300 Subject: [PATCH 1/7] Implementation of GROUPS mode in window frame --- datafusion/common/src/bisect.rs | 25 +- datafusion/core/src/physical_plan/planner.rs | 8 +- datafusion/core/tests/sql/window.rs | 116 ++- .../physical-expr/src/window/aggregate.rs | 4 +- .../physical-expr/src/window/built_in.rs | 4 + datafusion/physical-expr/src/window/groups.rs | 712 ++++++++++++++++++ datafusion/physical-expr/src/window/mod.rs | 1 + .../physical-expr/src/window/window_expr.rs | 143 +++- .../sqls/simple_window_groups.sql | 71 ++ integration-tests/test_psql_parity.py | 3 +- 10 files changed, 1046 insertions(+), 41 deletions(-) create mode 100644 datafusion/physical-expr/src/window/groups.rs create mode 100644 integration-tests/sqls/simple_window_groups.sql diff --git a/datafusion/common/src/bisect.rs b/datafusion/common/src/bisect.rs index 5060bf013120..bf31f055d617 100644 --- a/datafusion/common/src/bisect.rs +++ b/datafusion/common/src/bisect.rs @@ -60,22 +60,37 @@ pub fn bisect( target: &[ScalarValue], sort_options: &[SortOptions], ) -> Result { - let mut low: usize = 0; - let mut high: usize = item_columns + let low: usize = 0; + let high: usize = item_columns .get(0) .ok_or_else(|| { DataFusionError::Internal("Column array shouldn't be empty".to_string()) })? .len(); + let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| { + let cmp = compare(current, target, sort_options)?; + Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() }) + }; + find_bisect_point(item_columns, target, compare_fn, low, high) +} + +pub fn find_bisect_point( + item_columns: &[ArrayRef], + target: &[ScalarValue], + compare_fn: F, + mut low: usize, + mut high: usize, +) -> Result +where + F: Fn(&[ScalarValue], &[ScalarValue]) -> Result, +{ while low < high { let mid = ((high - low) / 2) + low; let val = item_columns .iter() .map(|arr| ScalarValue::try_from_array(arr, mid)) .collect::>>()?; - let cmp = compare(&val, target, sort_options)?; - let flag = if SIDE { cmp.is_lt() } else { cmp.is_le() }; - if flag { + if compare_fn(&val, target)? { low = mid + 1; } else { high = mid; diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index 88a43e1117e6..6318e7963381 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -64,7 +64,7 @@ use datafusion_expr::expr::{ }; use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::utils::{expand_wildcard, expr_to_columns}; -use datafusion_expr::{WindowFrame, WindowFrameBound, WindowFrameUnits}; +use datafusion_expr::{WindowFrame, WindowFrameBound}; use datafusion_optimizer::utils::unalias; use datafusion_physical_expr::expressions::Literal; use datafusion_sql::utils::window_expr_common_partition_keys; @@ -1511,12 +1511,6 @@ pub fn create_window_expr_with_name( }) .collect::>>()?; if let Some(ref window_frame) = window_frame { - if window_frame.units == WindowFrameUnits::Groups { - return Err(DataFusionError::NotImplemented( - "Window frame definitions involving GROUPS are not supported yet" - .to_string(), - )); - } if !is_window_valid(window_frame) { return Err(DataFusionError::Execution(format!( "Invalid window frame: start bound ({}) cannot be larger than end bound ({})", diff --git a/datafusion/core/tests/sql/window.rs b/datafusion/core/tests/sql/window.rs index a36d90c2a10d..0ab96e44213b 100644 --- a/datafusion/core/tests/sql/window.rs +++ b/datafusion/core/tests/sql/window.rs @@ -1189,24 +1189,120 @@ async fn window_frame_ranges_unbounded_preceding_err() -> Result<()> { } #[tokio::test] -async fn window_frame_groups_query() -> Result<()> { +async fn window_frame_groups_preceding_following_desc() -> Result<()> { + let ctx = SessionContext::new(); + register_aggregate_csv(&ctx).await?; + let sql = "SELECT + SUM(c4) OVER(ORDER BY c2 DESC GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING), + SUM(c3) OVER(ORDER BY c2 DESC GROUPS BETWEEN 10000 PRECEDING AND 10000 FOLLOWING), + COUNT(*) OVER(ORDER BY c2 DESC GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING) + FROM aggregate_test_100 + ORDER BY c9 + LIMIT 5"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------------------+----------------------------+-----------------+", + "| SUM(aggregate_test_100.c4) | SUM(aggregate_test_100.c3) | COUNT(UInt8(1)) |", + "+----------------------------+----------------------------+-----------------+", + "| 52276 | 781 | 56 |", + "| 260620 | 781 | 63 |", + "| -28623 | 781 | 37 |", + "| 260620 | 781 | 63 |", + "| 260620 | 781 | 63 |", + "+----------------------------+----------------------------+-----------------+", + ]; + assert_batches_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn window_frame_groups_order_by_null_desc() -> Result<()> { + let ctx = SessionContext::new(); + register_aggregate_null_cases_csv(&ctx).await?; + let sql = "SELECT + COUNT(c2) OVER (ORDER BY c1 DESC GROUPS BETWEEN 5 PRECEDING AND 3 FOLLOWING) + FROM null_cases + LIMIT 5"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+----------------------+", + "| COUNT(null_cases.c2) |", + "+----------------------+", + "| 12 |", + "| 12 |", + "| 12 |", + "| 12 |", + "| 12 |", + "+----------------------+", + ]; + assert_batches_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn window_frame_groups() -> Result<()> { + let ctx = SessionContext::new(); + register_aggregate_null_cases_csv(&ctx).await?; + let sql = "SELECT + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as a, + SUM(c1) OVER (ORDER BY c3 DESC GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as b, + SUM(c1) OVER (ORDER BY c3 NULLS first GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as c, + SUM(c1) OVER (ORDER BY c3 DESC NULLS last GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as d, + SUM(c1) OVER (ORDER BY c3 DESC NULLS first GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as e, + SUM(c1) OVER (ORDER BY c3 NULLS first GROUPS BETWEEN 9 PRECEDING AND 11 FOLLOWING) as f, + SUM(c1) OVER (ORDER BY c3 GROUPS current row) as a1, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 9 PRECEDING AND 5 PRECEDING) as a2, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN UNBOUNDED PRECEDING AND 5 PRECEDING) as a3, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING) as a4, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN UNBOUNDED PRECEDING AND current row) as a5, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as a6, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as a7, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 3 FOLLOWING AND UNBOUNDED FOLLOWING) as a8, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN current row AND UNBOUNDED FOLLOWING) as a9, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN current row AND 3 FOLLOWING) as a10, + SUM(c1) OVER (ORDER BY c3 GROUPS BETWEEN 5 FOLLOWING AND 7 FOLLOWING) as a11 + FROM null_cases + ORDER BY c3 + LIMIT 10"; + let actual = execute_to_batches(&ctx, sql).await; + let expected = vec![ + "+-----+-----+-----+-----+-----+-----+----+-----+-----+-----+-----+------+------+------+------+-----+-----+", + "| a | b | c | d | e | f | a1 | a2 | a3 | a4 | a5 | a6 | a7 | a8 | a9 | a10 | a11 |", + "+-----+-----+-----+-----+-----+-----+----+-----+-----+-----+-----+------+------+------+------+-----+-----+", + "| 412 | 307 | 412 | 307 | 307 | 412 | | | | 412 | | 4627 | 4627 | 4531 | 4627 | 115 | 85 |", + "| 488 | 339 | 488 | 339 | 339 | 488 | 72 | | | 488 | 72 | 4627 | 4627 | 4512 | 4627 | 140 | 153 |", + "| 543 | 412 | 543 | 412 | 412 | 543 | 24 | | | 543 | 96 | 4627 | 4627 | 4487 | 4555 | 82 | 122 |", + "| 553 | 488 | 553 | 488 | 488 | 553 | 19 | | | 553 | 115 | 4627 | 4555 | 4473 | 4531 | 89 | 114 |", + "| 553 | 543 | 553 | 543 | 543 | 553 | 25 | | | 553 | 140 | 4627 | 4531 | 4442 | 4512 | 110 | 105 |", + "| 591 | 553 | 591 | 553 | 553 | 591 | 14 | | | 591 | 154 | 4627 | 4512 | 4402 | 4487 | 167 | 181 |", + "| 651 | 553 | 651 | 553 | 553 | 651 | 31 | 72 | 72 | 651 | 185 | 4627 | 4487 | 4320 | 4473 | 153 | 204 |", + "| 662 | 591 | 662 | 591 | 591 | 662 | 40 | 96 | 96 | 662 | 225 | 4627 | 4473 | 4320 | 4442 | 154 | 141 |", + "| 697 | 651 | 697 | 651 | 651 | 697 | 82 | 115 | 115 | 697 | 307 | 4627 | 4442 | 4288 | 4402 | 187 | 65 |", + "| 758 | 662 | 758 | 662 | 662 | 758 | | 140 | 140 | 758 | 307 | 4627 | 4402 | 4215 | 4320 | 181 | 48 |", + "+-----+-----+-----+-----+-----+-----+----+-----+-----+-----+-----+------+------+------+------+-----+-----+", + ]; + assert_batches_eq!(expected, &actual); + Ok(()) +} + +#[tokio::test] +async fn window_frame_groups_without_order_by() -> Result<()> { let ctx = SessionContext::new(); register_aggregate_csv(&ctx).await?; // execute the query let df = ctx .sql( "SELECT - COUNT(c1) OVER (ORDER BY c2 GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING) - FROM aggregate_test_100;", + SUM(c4) OVER(PARTITION BY c2 GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING) + FROM aggregate_test_100 + ORDER BY c9;", ) .await?; - let results = df.collect().await; - assert!(results - .as_ref() - .err() - .unwrap() - .to_string() - .contains("Window frame definitions involving GROUPS are not supported yet")); + let err = df.collect().await.unwrap_err(); + assert_contains!( + err.to_string(), + "Execution error: GROUPS mode requires an ORDER BY clause".to_owned() + ); Ok(()) } diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs index e6c754387f1c..1c41f2bb2ed7 100644 --- a/datafusion/physical-expr/src/window/aggregate.rs +++ b/datafusion/physical-expr/src/window/aggregate.rs @@ -31,7 +31,7 @@ use datafusion_common::ScalarValue; use datafusion_expr::WindowFrame; use crate::{expressions::PhysicalSortExpr, PhysicalExpr}; -use crate::{window::WindowExpr, AggregateExpr}; +use crate::{window::groups::WindowFrameGroups, window::WindowExpr, AggregateExpr}; /// A window expr that takes the form of an aggregate function #[derive(Debug)] @@ -114,6 +114,7 @@ impl WindowExpr for AggregateWindowExpr { .map(|v| v.slice(partition_range.start, length)) .collect::>(); + let mut window_frame_groups = WindowFrameGroups::default(); let mut last_range: (usize, usize) = (0, 0); // We iterate on each row to perform a running calculation. @@ -121,6 +122,7 @@ impl WindowExpr for AggregateWindowExpr { for i in 0..length { let cur_range = self.calculate_range( &window_frame, + &mut window_frame_groups, &slice_order_bys, &sort_options, length, diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs index e4e377175653..698bffdf92bd 100644 --- a/datafusion/physical-expr/src/window/built_in.rs +++ b/datafusion/physical-expr/src/window/built_in.rs @@ -31,6 +31,8 @@ use std::any::Any; use std::ops::Range; use std::sync::Arc; +use crate::window::groups::WindowFrameGroups; + /// A window expr that takes the form of a built in window function #[derive(Debug)] pub struct BuiltInWindowExpr { @@ -113,10 +115,12 @@ impl WindowExpr for BuiltInWindowExpr { .iter() .map(|v| v.slice(partition_range.start, length)) .collect::>(); + let mut window_frame_groups = WindowFrameGroups::default(); // We iterate on each row to calculate window frame range and and window function result for idx in 0..length { let range = self.calculate_range( &window_frame, + &mut window_frame_groups, &slice_order_bys, &sort_options, num_rows, diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs new file mode 100644 index 000000000000..d1d79ba8700d --- /dev/null +++ b/datafusion/physical-expr/src/window/groups.rs @@ -0,0 +1,712 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! This module provides the utilities for the GROUPS window frame index calculations. + +use arrow::array::ArrayRef; +use std::cmp; +use std::collections::VecDeque; + +use datafusion_common::bisect::find_bisect_point; +use datafusion_common::{DataFusionError, Result, ScalarValue}; + +/// In the GROUPS mode, the rows with duplicate sorting values are grouped together. +/// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode. +/// The syntax is as follows: +/// GROUPS frame_start [ frame_exclusion ] +/// GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ] +/// The frame_exclusion is not yet supported. +/// The frame_start and frame_end parameters allow us to specify which rows the window frame starts and ends with. +/// They accept the following values: +/// - UNBOUNDED PRECEDING: (possible only in frame_start) start with the first row of the partition +/// - offset PRECEDING: When used as frame_start, it means the first row of the group which is a given number of +/// groups before the current group (the group containing the current row). +/// When used as frame_end, it means the last row of the group which is a given number of groups +/// before the current group. +/// - CURRENT ROW: When used as frame_start, it means the first row in a group containing the current row. +/// When used as frame_end, it means the last row in a group containing the current row. +/// - offset FOLLOWING: When used as frame_start, it means the first row of the group which is a given number of +/// groups after the current group (the group containing the current row). +/// When used as frame_end, it means the last row of the group which is a given number of groups +/// after the current group. +/// - UNBOUNDED FOLLOWING: (possible only as a frame_end) end with the last row of the partition +/// +/// In the following implementation, 'process' is the only public interface of the WindowFrameGroups. It is called for +/// frame_start and frame_end of each row, consecutively. +/// The call for frame_start, first, tries to initialize the WindowFrameGroups handler if it has not already been +/// initialized. The initialization means the setting of the window frame start index. The window frame start index can +/// only be set, if the current row's window frame start is greater or equal to zero depending on the offset. If the +/// window frame start index is greater or equal to zero, it means the current row can be stored in the +/// WindowFrameGroups handler's window frame (group_start_indices), the row values as the group identifier and the row +/// index as the start index of the group. Then, it keeps track of the previous row values, so that a group change can +/// be identified. If there is a group change, the WindowFrameGroups handler's window frame is proceeded by one group. +/// The call for frame_end, first, calculates the current row's window frame end index from the offset. If the current +/// row's window frame end index is greater than zero, the WindowFrameGroups handler's window frame is extended +/// one-by-one until the current row's window frame end index is reached by finding the next group. +/// The call to 'process' for frame_start returns the index of the the WindowFrameGroups handler's window frame's first +/// entry, if there is a window frame, otherwise it returns 0. +/// The call to 'process' for frame_end returns the index of the the WindowFrameGroups handler's window frame's last +/// entry, if there is a window frame, otherwise it returns the last index of the partition (length). + +#[derive(Debug, Default)] +pub struct WindowFrameGroups { + current_group_idx: u64, + group_start_indices: VecDeque<(Vec, usize)>, + previous_row_values: Option>, + reached_end: bool, + window_frame_end_idx: u64, + window_frame_start_idx: u64, +} + +impl WindowFrameGroups { + fn extend_window_frame_if_necessary< + const BISECT_SIDE: bool, + const SEARCH_SIDE: bool, + >( + &mut self, + item_columns: &[ArrayRef], + delta: u64, + ) -> Result<()> { + if BISECT_SIDE || self.reached_end { + return Ok(()); + } + let current_window_frame_end_idx = if !SEARCH_SIDE { + self.current_group_idx + delta + 1 + } else if self.current_group_idx >= delta { + self.current_group_idx - delta + 1 + } else { + 0 + }; + if current_window_frame_end_idx == 0 { + // the end index of the window frame is still before the first index + return Ok(()); + } + while !self.reached_end + && current_window_frame_end_idx >= self.window_frame_end_idx + { + if self.group_start_indices.is_empty() { + self.initialize_window_frame_start(item_columns)?; + continue; + } + self.proceed_one_group::(item_columns, delta)?; + } + Ok(()) + } + + fn initialize( + &mut self, + delta: u64, + item_columns: &[ArrayRef], + ) -> Result<()> { + if BISECT_SIDE { + if !SEARCH_SIDE { + self.window_frame_start_idx = self.current_group_idx + delta; + return self.initialize_window_frame_start(item_columns); + } else if self.current_group_idx >= delta { + self.window_frame_start_idx = self.current_group_idx - delta; + return self.initialize_window_frame_start(item_columns); + } + } + Ok(()) + } + + fn initialize_window_frame_start(&mut self, item_columns: &[ArrayRef]) -> Result<()> { + let mut group_values = item_columns + .iter() + .map(|col| ScalarValue::try_from_array(col, 0)) + .collect::>>()?; + let mut start_idx: usize = 0; + for _ in 0..self.window_frame_start_idx { + let next_group_and_start_index = + WindowFrameGroups::find_next_group_and_start_index( + item_columns, + &group_values, + start_idx, + None, + )?; + match next_group_and_start_index { + Some((group, index)) => { + group_values = group; + start_idx = index; + } + // not enough groups to generate a window frame + None => { + self.reached_end = true; + self.window_frame_end_idx = self.window_frame_start_idx; + return Ok(()); + } + } + } + self.group_start_indices + .push_back((group_values, start_idx)); + self.window_frame_end_idx = self.window_frame_start_idx + 1; + Ok(()) + } + + fn initialized(&self) -> bool { + self.reached_end || !self.group_start_indices.is_empty() + } + + /// This function proceeds the window frame by one group. + /// First, it extends the window frame by adding one next group as the last entry. + /// Then, if this function is called due to a group change (with BISECT_SIDE true), it pops the front entry. + /// If this function is called with BISECT_SIDE false, it means we are trying to extend the window frame to reach the + /// window frame size, so no popping of the front entry. + fn proceed_one_group( + &mut self, + item_columns: &[ArrayRef], + delta: u64, + ) -> Result<()> { + let last_group_values = self.group_start_indices.back(); + let last_group_values = if let Some(values) = last_group_values { + values + } else { + return Ok(()); + }; + let next_group_and_start_index = + WindowFrameGroups::find_next_group_and_start_index( + item_columns, + &last_group_values.0, + last_group_values.1, + None, + )?; + match next_group_and_start_index { + Some((group, index)) => { + self.group_start_indices.push_back((group, index)); + self.window_frame_end_idx += 1; + } + // not enough groups to proceed + None => { + self.reached_end = true; + } + } + if BISECT_SIDE { + let current_window_frame_start_idx = if !SEARCH_SIDE { + self.current_group_idx + delta + } else if self.current_group_idx >= delta { + self.current_group_idx - delta + } else { + 0 + }; + if current_window_frame_start_idx > self.window_frame_start_idx { + self.group_start_indices.pop_front(); + self.window_frame_start_idx += 1; + } + } + Ok(()) + } + + pub fn process( + &mut self, + current_row_values: &[ScalarValue], + delta: u64, + item_columns: &[ArrayRef], + length: usize, + ) -> Result { + if !self.initialized() { + self.initialize::(delta, item_columns)?; + } + self.extend_window_frame_if_necessary::( + item_columns, + delta, + )?; + let group_change = match &self.previous_row_values { + None => false, + Some(values) => current_row_values != values, + }; + if self.previous_row_values.is_none() || group_change { + self.previous_row_values = Some(current_row_values.to_vec()); + } + if group_change { + self.current_group_idx += 1; + self.proceed_one_group::(item_columns, delta)?; + } + Ok(if self.group_start_indices.is_empty() { + if self.reached_end { + length + } else { + 0 + } + } else if BISECT_SIDE { + match self.group_start_indices.get(0) { + Some(&(_, idx)) => idx, + None => 0, + } + } else { + match (self.reached_end, self.group_start_indices.back()) { + (false, Some(&(_, idx))) => idx, + _ => length, + } + }) + } + + /// This function finds the next group and its start index for a given group and start index. + /// It uses the bisect function, thus it should work on a bounded frame with a start and an end point. + /// To find the end point, a step_size is used with a default value of 100. After proceeding the end point, + /// if the entry at the end point still has the same group identifier (row values), the start point is set as the + /// end point, and the end point is proceeded one step size more. The end point is found when the entry at the end + /// point has a different group identifier (different row values), or the end of the partition is reached. + /// Then, the bisect function can be applied with low as the start point, and high as the end point. + /// + /// Improvement: + /// For small group sizes, proceeding one-by-one to find the group change can be more efficient. + /// A statistics about the group sizes can be utilized to decide upon which approach to use, or even set the + /// step_size. + /// We will also need a benchmark implementation to prove the efficiency. + fn find_next_group_and_start_index( + item_columns: &[ArrayRef], + current_row_values: &[ScalarValue], + idx: usize, + step_size: Option, + ) -> Result, usize)>> { + let step_size = if let Some(n) = step_size { n } else { 100 }; + if step_size == 0 { + return Err(DataFusionError::Internal( + "Step size cannot be 0".to_string(), + )); + } + let data_size: usize = item_columns + .get(0) + .ok_or_else(|| { + DataFusionError::Internal("Column array shouldn't be empty".to_string()) + })? + .len(); + let mut low: usize = idx; + let mut high: usize = cmp::min(low + step_size, data_size); + while high < data_size { + let val = item_columns + .iter() + .map(|arr| ScalarValue::try_from_array(arr, high)) + .collect::>>()?; + if val == current_row_values { + low = high; + high = cmp::min(low + step_size, data_size); + } else { + break; + } + } + low = find_bisect_point( + item_columns, + current_row_values, + |current, to_compare| Ok(current == to_compare), + low, + high, + )?; + if low == data_size { + return Ok(None); + } + let val = item_columns + .iter() + .map(|arr| ScalarValue::try_from_array(arr, low)) + .collect::>>()?; + Ok(Some((val, low))) + } +} + +#[cfg(test)] +mod tests { + use arrow::array::Float64Array; + use datafusion_common::ScalarValue; + use std::sync::Arc; + + use crate::from_slice::FromSlice; + + use super::*; + + #[test] + fn test_find_next_group_and_start_index() { + const NUM_ROWS: usize = 6; + const NEXT_INDICES: [usize; 5] = [1, 2, 4, 4, 5]; + const STEP_SIZE: usize = 1; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 3., 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 8., 5., 0.0])), + ]; + for (current_idx, next_idx) in NEXT_INDICES.iter().enumerate() { + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, current_idx)) + .collect::>>() + .unwrap(); + let next_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, *next_idx)) + .collect::>>() + .unwrap(); + let res = WindowFrameGroups::find_next_group_and_start_index( + &arrays, + ¤t_row_values, + current_idx, + None, + ) + .unwrap(); + assert_eq!(res, Some((next_row_values.clone(), *next_idx))); + let res = WindowFrameGroups::find_next_group_and_start_index( + &arrays, + ¤t_row_values, + current_idx, + Some(STEP_SIZE), + ) + .unwrap(); + assert_eq!(res, Some((next_row_values, *next_idx))); + } + let current_idx = NUM_ROWS - 1; + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, current_idx)) + .collect::>>() + .unwrap(); + let res = WindowFrameGroups::find_next_group_and_start_index( + &arrays, + ¤t_row_values, + current_idx, + None, + ) + .unwrap(); + assert_eq!(res, None); + let res = WindowFrameGroups::find_next_group_and_start_index( + &arrays, + ¤t_row_values, + current_idx, + Some(STEP_SIZE), + ) + .unwrap(); + assert_eq!(res, None); + } + + #[test] + fn test_window_frame_groups_preceding_huge_delta() { + const START: bool = true; + const END: bool = false; + const PRECEDING: bool = true; + const DELTA: u64 = 10; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + window_frame_groups + .extend_window_frame_if_necessary::(&arrays, DELTA) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + for idx in 0..NUM_ROWS { + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, idx)) + .collect::>>() + .unwrap(); + let start = window_frame_groups + .process::( + ¤t_row_values, + DELTA, + &arrays, + NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, 0); + let end = window_frame_groups + .process::(¤t_row_values, DELTA, &arrays, NUM_ROWS) + .unwrap(); + assert_eq!(end, 0); + } + } + + #[test] + fn test_window_frame_groups_following_huge_delta() { + const START: bool = true; + const END: bool = false; + const FOLLOWING: bool = false; + const DELTA: u64 = 10; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, DELTA); + assert_eq!(window_frame_groups.window_frame_end_idx, DELTA); + assert!(window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + window_frame_groups + .extend_window_frame_if_necessary::(&arrays, DELTA) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, DELTA); + assert_eq!(window_frame_groups.window_frame_end_idx, DELTA); + assert!(window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + for idx in 0..NUM_ROWS { + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, idx)) + .collect::>>() + .unwrap(); + let start = window_frame_groups + .process::( + ¤t_row_values, + DELTA, + &arrays, + NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, NUM_ROWS); + let end = window_frame_groups + .process::(¤t_row_values, DELTA, &arrays, NUM_ROWS) + .unwrap(); + assert_eq!(end, NUM_ROWS); + } + } + + #[test] + fn test_window_frame_groups_preceding_and_following_huge_delta() { + const START: bool = true; + const END: bool = false; + const FOLLOWING: bool = false; + const PRECEDING: bool = true; + const DELTA: u64 = 10; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + window_frame_groups + .extend_window_frame_if_necessary::(&arrays, DELTA) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, NUM_ROWS as u64); + assert!(window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), NUM_ROWS); + + for idx in 0..NUM_ROWS { + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, idx)) + .collect::>>() + .unwrap(); + let start = window_frame_groups + .process::( + ¤t_row_values, + DELTA, + &arrays, + NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, 0); + let end = window_frame_groups + .process::(¤t_row_values, DELTA, &arrays, NUM_ROWS) + .unwrap(); + assert_eq!(end, NUM_ROWS); + } + } + + #[test] + fn test_window_frame_groups_preceding_and_following_1() { + const START: bool = true; + const END: bool = false; + const FOLLOWING: bool = false; + const PRECEDING: bool = true; + const DELTA: u64 = 1; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + window_frame_groups + .extend_window_frame_if_necessary::(&arrays, DELTA) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 2 * DELTA + 1); + assert!(!window_frame_groups.reached_end); + assert_eq!( + window_frame_groups.group_start_indices.len(), + 2 * DELTA as usize + 1 + ); + + for idx in 0..NUM_ROWS { + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, idx)) + .collect::>>() + .unwrap(); + let start_idx = if idx < DELTA as usize { + 0 + } else { + idx - DELTA as usize + }; + let start = window_frame_groups + .process::( + ¤t_row_values, + DELTA, + &arrays, + NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, start_idx); + let end_idx = if idx + 1 + DELTA as usize > NUM_ROWS { + NUM_ROWS + } else { + idx + 1 + DELTA as usize + }; + let end = window_frame_groups + .process::(¤t_row_values, DELTA, &arrays, NUM_ROWS) + .unwrap(); + assert_eq!(end, end_idx); + } + } + + #[test] + fn test_window_frame_groups_preceding_1_and_unbounded_following() { + const START: bool = true; + const PRECEDING: bool = true; + const DELTA: u64 = 1; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + for idx in 0..NUM_ROWS { + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, idx)) + .collect::>>() + .unwrap(); + let start_idx = if idx < DELTA as usize { + 0 + } else { + idx - DELTA as usize + }; + let start = window_frame_groups + .process::( + ¤t_row_values, + DELTA, + &arrays, + NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, start_idx); + } + } + + #[test] + fn test_window_frame_groups_current_and_unbounded_following() { + const START: bool = true; + const PRECEDING: bool = true; + const DELTA: u64 = 0; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 1); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 1); + + for idx in 0..NUM_ROWS { + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, idx)) + .collect::>>() + .unwrap(); + + let start = window_frame_groups + .process::( + ¤t_row_values, + DELTA, + &arrays, + NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, idx); + } + } +} diff --git a/datafusion/physical-expr/src/window/mod.rs b/datafusion/physical-expr/src/window/mod.rs index 059f8b88637c..7de388b621cc 100644 --- a/datafusion/physical-expr/src/window/mod.rs +++ b/datafusion/physical-expr/src/window/mod.rs @@ -19,6 +19,7 @@ mod aggregate; mod built_in; mod built_in_window_function_expr; pub(crate) mod cume_dist; +mod groups; pub(crate) mod lead_lag; pub(crate) mod nth_value; pub(crate) mod partition_evaluator; diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs index 9c4b1b17970d..86010349c171 100644 --- a/datafusion/physical-expr/src/window/window_expr.rs +++ b/datafusion/physical-expr/src/window/window_expr.rs @@ -31,6 +31,8 @@ use std::sync::Arc; use datafusion_expr::WindowFrameBound; use datafusion_expr::{WindowFrame, WindowFrameUnits}; +use crate::window::groups::WindowFrameGroups; + /// A window expression that: /// * knows its resulting field pub trait WindowExpr: Send + Sync + Debug { @@ -117,10 +119,11 @@ pub trait WindowExpr: Send + Sync + Debug { } /// We use start and end bounds to calculate current row's starting and ending range. - /// This function supports different modes, but we currently do not support window calculation for GROUPS inside window frames. + /// This function supports different modes. fn calculate_range( &self, window_frame: &Option>, + window_frame_groups: &mut WindowFrameGroups, range_columns: &[ArrayRef], sort_options: &[SortOptions], length: usize, @@ -212,11 +215,6 @@ pub trait WindowExpr: Send + Sync + Debug { 0 } } - WindowFrameBound::Preceding(_) => { - return Err(DataFusionError::Internal( - "Rows should be Uint".to_string(), - )) - } WindowFrameBound::CurrentRow => idx, // UNBOUNDED FOLLOWING WindowFrameBound::Following(ScalarValue::UInt64(None)) => { @@ -228,7 +226,9 @@ pub trait WindowExpr: Send + Sync + Debug { WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { min(idx + n as usize, length) } - WindowFrameBound::Following(_) => { + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) + | WindowFrameBound::Following(_) => { return Err(DataFusionError::Internal( "Rows should be Uint".to_string(), )) @@ -249,18 +249,15 @@ pub trait WindowExpr: Send + Sync + Debug { 0 } } - WindowFrameBound::Preceding(_) => { - return Err(DataFusionError::Internal( - "Rows should be Uint".to_string(), - )) - } WindowFrameBound::CurrentRow => idx + 1, // UNBOUNDED FOLLOWING WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { min(idx + n as usize + 1, length) } - WindowFrameBound::Following(_) => { + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) + | WindowFrameBound::Following(_) => { return Err(DataFusionError::Internal( "Rows should be Uint".to_string(), )) @@ -268,9 +265,104 @@ pub trait WindowExpr: Send + Sync + Debug { }; Ok((start, end)) } - WindowFrameUnits::Groups => Err(DataFusionError::NotImplemented( - "Window frame for groups is not implemented".to_string(), - )), + WindowFrameUnits::Groups => { + if range_columns.is_empty() { + return Err(DataFusionError::Execution( + "GROUPS mode requires an ORDER BY clause".to_string(), + )); + } + let start = match window_frame.start_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0, + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { + calculate_index_of_group::( + range_columns, + window_frame_groups, + idx, + n, + length, + )? + } + WindowFrameBound::CurrentRow => { + calculate_index_of_group::( + range_columns, + window_frame_groups, + idx, + 0, + length, + )? + } + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { + calculate_index_of_group::( + range_columns, + window_frame_groups, + idx, + n, + length, + )? + } + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'", + window_frame + ))) + } + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) + | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal( + "Groups should be Uint".to_string(), + )) + } + }; + let end = match window_frame.end_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame end cannot be UNBOUNDED PRECEDING '{:?}'", + window_frame + ))) + } + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { + calculate_index_of_group::( + range_columns, + window_frame_groups, + idx, + n, + length, + )? + } + WindowFrameBound::CurrentRow => { + calculate_index_of_group::( + range_columns, + window_frame_groups, + idx, + 0, + length, + )? + } + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { + calculate_index_of_group::( + range_columns, + window_frame_groups, + idx, + n, + length, + )? + } + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) + | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal( + "Groups should be Uint".to_string(), + )) + } + }; + Ok((start, end)) + } } } else { Ok((0, length)) @@ -320,3 +412,22 @@ fn calculate_index_of_row( // `BISECT_SIDE` true means bisect_left, false means bisect_right bisect::(range_columns, &end_range, sort_options) } + +fn calculate_index_of_group( + range_columns: &[ArrayRef], + window_frame_groups: &mut WindowFrameGroups, + idx: usize, + delta: u64, + length: usize, +) -> Result { + let current_row_values = range_columns + .iter() + .map(|col| ScalarValue::try_from_array(col, idx)) + .collect::>>()?; + window_frame_groups.process::( + ¤t_row_values, + delta, + range_columns, + length, + ) +} diff --git a/integration-tests/sqls/simple_window_groups.sql b/integration-tests/sqls/simple_window_groups.sql new file mode 100644 index 000000000000..dd369c28ca4d --- /dev/null +++ b/integration-tests/sqls/simple_window_groups.sql @@ -0,0 +1,71 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at + +-- http://www.apache.org/licenses/LICENSE-2.0 + +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +SELECT + SUM(c4) OVER(ORDER BY c3 DESC GROUPS BETWEEN 3 PRECEDING AND 1 FOLLOWING) as summation1, + SUM(c4) OVER(ORDER BY c3 DESC GROUPS BETWEEN 3 PRECEDING AND 2 PRECEDING) as summation2, + SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN 2 PRECEDING AND 2 PRECEDING) as summation3, + SUM(c4) OVER(ORDER BY c3 DESC GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING) as summation4, + SUM(c3) OVER(ORDER BY c4 DESC GROUPS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) as summation5, + SUM(c5) OVER(ORDER BY c3 DESC GROUPS 2 PRECEDING) as summation6, + SUM(c5) OVER(ORDER BY c3 DESC GROUPS BETWEEN CURRENT ROW AND 3 FOLLOWING) as summation7, + SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as summation8, + SUM(c4) OVER(ORDER BY c4 DESC GROUPS UNBOUNDED PRECEDING) as summation9, + SUM(c5) OVER(ORDER BY c4 DESC GROUPS CURRENT ROW) as summation10, + SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as summation11, + SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as summation12, + SUM(c5) OVER(ORDER BY c4 DESC GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) as summation13, + SUM(c4) OVER(PARTITION BY c4 ORDER BY c3 GROUPS BETWEEN 3 PRECEDING AND 1 FOLLOWING) as summation21, + SUM(c4) OVER(PARTITION BY c4 ORDER BY c3 GROUPS BETWEEN 3 PRECEDING AND 2 PRECEDING) as summation22, + SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN 2 PRECEDING AND 2 PRECEDING) as summation23, + SUM(c4) OVER(PARTITION BY c4 ORDER BY c3 GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING) as summation24, + SUM(c3) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) as summation25, + SUM(c5) OVER(PARTITION BY c4 ORDER BY c3 GROUPS 2 PRECEDING) as summation26, + SUM(c5) OVER(PARTITION BY c4 ORDER BY c3 GROUPS BETWEEN CURRENT ROW AND 3 FOLLOWING) as summation27, + SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as summation28, + SUM(c4) OVER(PARTITION BY c4 ORDER BY c4 GROUPS UNBOUNDED PRECEDING) as summation29, + SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS CURRENT ROW) as summation30, + SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as summation31, + SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as summation32, + SUM(c5) OVER(PARTITION BY c4 ORDER BY c4 GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) as summation33, + SUM(c4) OVER(ORDER BY c5, c3 GROUPS BETWEEN 3 PRECEDING AND 1 FOLLOWING) as summation41, + SUM(c4) OVER(ORDER BY c5, c3 GROUPS BETWEEN 3 PRECEDING AND 2 PRECEDING) as summation42, + SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN 2 PRECEDING AND 2 PRECEDING) as summation43, + SUM(c4) OVER(ORDER BY c5, c3 GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING) as summation44, + SUM(c3) OVER(ORDER BY c5, c4 GROUPS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) as summation45, + SUM(c5) OVER(ORDER BY c5, c3 GROUPS 2 PRECEDING) as summation46, + SUM(c5) OVER(ORDER BY c5, c3 GROUPS BETWEEN CURRENT ROW AND 3 FOLLOWING) as summation47, + SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as summation48, + SUM(c4) OVER(ORDER BY c5, c4 GROUPS UNBOUNDED PRECEDING) as summation49, + SUM(c5) OVER(ORDER BY c5, c4 GROUPS CURRENT ROW) as summation50, + SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as summation51, + SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as summation52, + SUM(c5) OVER(ORDER BY c5, c4 GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) as summation53, + SUM(c4) OVER(ORDER BY c3 GROUPS BETWEEN 3 PRECEDING AND 1 FOLLOWING) as summation61, + SUM(c4) OVER(ORDER BY c3 GROUPS BETWEEN 3 PRECEDING AND 2 PRECEDING) as summation62, + SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN 2 PRECEDING AND 2 PRECEDING) as summation63, + SUM(c4) OVER(ORDER BY c3 GROUPS BETWEEN 1 FOLLOWING AND 3 FOLLOWING) as summation64, + SUM(c3) OVER(ORDER BY c4 GROUPS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) as summation65, + SUM(c5) OVER(ORDER BY c3 GROUPS 2 PRECEDING) as summation66, + SUM(c5) OVER(ORDER BY c3 GROUPS BETWEEN CURRENT ROW AND 3 FOLLOWING) as summation67, + SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as summation68, + SUM(c4) OVER(ORDER BY c4 GROUPS UNBOUNDED PRECEDING) as summation69, + SUM(c5) OVER(ORDER BY c4 GROUPS CURRENT ROW) as summation70, + SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as summation71, + SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) as summation72, + SUM(c5) OVER(ORDER BY c4 GROUPS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) as summation73 +FROM test +ORDER BY c9; diff --git a/integration-tests/test_psql_parity.py b/integration-tests/test_psql_parity.py index be197eb612ff..e4c2bb17edb6 100644 --- a/integration-tests/test_psql_parity.py +++ b/integration-tests/test_psql_parity.py @@ -82,11 +82,10 @@ def generate_csv_from_psql(fname: str): class TestPsqlParity: def test_tests_count(self): - assert len(test_files) == 25, "tests are missed" + assert len(test_files) == 26, "tests are missed" @pytest.mark.parametrize("fname", test_files, ids=str) def test_sql_file(self, fname): datafusion_output = pd.read_csv(io.BytesIO(generate_csv_from_datafusion(fname))) psql_output = pd.read_csv(io.BytesIO(generate_csv_from_psql(fname))) np.testing.assert_allclose(datafusion_output, psql_output, equal_nan=True, verbose=True) - From 6c8e2bb3f0794576091b417f0c5024fb683b892b Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Mon, 7 Nov 2022 08:45:29 +0300 Subject: [PATCH 2/7] Break down/flatten some functions, move comments inline --- datafusion/physical-expr/src/window/groups.rs | 205 ++++++++---------- .../physical-expr/src/window/window_expr.rs | 2 +- 2 files changed, 95 insertions(+), 112 deletions(-) diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs index d1d79ba8700d..06f5695298ec 100644 --- a/datafusion/physical-expr/src/window/groups.rs +++ b/datafusion/physical-expr/src/window/groups.rs @@ -15,7 +15,28 @@ // specific language governing permissions and limitations // under the License. -//! This module provides the utilities for the GROUPS window frame index calculations. +//! This module provides utilities for GROUPS mode window frame index calculations. +// In GROUPS mode, rows with duplicate sorting values are grouped together. +// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode. +// The syntax is as follows: +// GROUPS frame_start [ frame_exclusion ] +// GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ] +// The optional frame_exclusion specifier is not yet supported. +// The frame_start and frame_end parameters allow us to specify which rows the window +// frame starts and ends with. They accept the following values: +// - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start. +// - offset PRECEDING: When used in frame_start, it refers to the first row of the group +// that comes "offset" groups before the current group (i.e. the group +// containing the current row). When used in frame_end, it refers to the +// last row of the group that comes "offset" groups before the current group. +// - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing +// the current row. When used in frame_end, it refers to the last row of the group +// containing the current row. +// - offset FOLLOWING: When used in frame_start, it refers to the first row of the group +// that comes "offset" groups after the current group (i.e. the group +// containing the current row). When used in frame_end, it refers to the +// last row of the group that comes "offset" groups after the current group. +// - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end. use arrow::array::ArrayRef; use std::cmp; @@ -24,44 +45,8 @@ use std::collections::VecDeque; use datafusion_common::bisect::find_bisect_point; use datafusion_common::{DataFusionError, Result, ScalarValue}; -/// In the GROUPS mode, the rows with duplicate sorting values are grouped together. -/// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode. -/// The syntax is as follows: -/// GROUPS frame_start [ frame_exclusion ] -/// GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ] -/// The frame_exclusion is not yet supported. -/// The frame_start and frame_end parameters allow us to specify which rows the window frame starts and ends with. -/// They accept the following values: -/// - UNBOUNDED PRECEDING: (possible only in frame_start) start with the first row of the partition -/// - offset PRECEDING: When used as frame_start, it means the first row of the group which is a given number of -/// groups before the current group (the group containing the current row). -/// When used as frame_end, it means the last row of the group which is a given number of groups -/// before the current group. -/// - CURRENT ROW: When used as frame_start, it means the first row in a group containing the current row. -/// When used as frame_end, it means the last row in a group containing the current row. -/// - offset FOLLOWING: When used as frame_start, it means the first row of the group which is a given number of -/// groups after the current group (the group containing the current row). -/// When used as frame_end, it means the last row of the group which is a given number of groups -/// after the current group. -/// - UNBOUNDED FOLLOWING: (possible only as a frame_end) end with the last row of the partition -/// -/// In the following implementation, 'process' is the only public interface of the WindowFrameGroups. It is called for -/// frame_start and frame_end of each row, consecutively. -/// The call for frame_start, first, tries to initialize the WindowFrameGroups handler if it has not already been -/// initialized. The initialization means the setting of the window frame start index. The window frame start index can -/// only be set, if the current row's window frame start is greater or equal to zero depending on the offset. If the -/// window frame start index is greater or equal to zero, it means the current row can be stored in the -/// WindowFrameGroups handler's window frame (group_start_indices), the row values as the group identifier and the row -/// index as the start index of the group. Then, it keeps track of the previous row values, so that a group change can -/// be identified. If there is a group change, the WindowFrameGroups handler's window frame is proceeded by one group. -/// The call for frame_end, first, calculates the current row's window frame end index from the offset. If the current -/// row's window frame end index is greater than zero, the WindowFrameGroups handler's window frame is extended -/// one-by-one until the current row's window frame end index is reached by finding the next group. -/// The call to 'process' for frame_start returns the index of the the WindowFrameGroups handler's window frame's first -/// entry, if there is a window frame, otherwise it returns 0. -/// The call to 'process' for frame_end returns the index of the the WindowFrameGroups handler's window frame's last -/// entry, if there is a window frame, otherwise it returns the last index of the partition (length). - +/// This structure encapsulates all the state information we require as we +/// scan groups of data while processing window frames. #[derive(Debug, Default)] pub struct WindowFrameGroups { current_group_idx: u64, @@ -73,17 +58,11 @@ pub struct WindowFrameGroups { } impl WindowFrameGroups { - fn extend_window_frame_if_necessary< - const BISECT_SIDE: bool, - const SEARCH_SIDE: bool, - >( + fn extend_window_frame_if_necessary( &mut self, item_columns: &[ArrayRef], delta: u64, ) -> Result<()> { - if BISECT_SIDE || self.reached_end { - return Ok(()); - } let current_window_frame_end_idx = if !SEARCH_SIDE { self.current_group_idx + delta + 1 } else if self.current_group_idx >= delta { @@ -95,33 +74,32 @@ impl WindowFrameGroups { // the end index of the window frame is still before the first index return Ok(()); } + if self.group_start_indices.is_empty() { + // TODO: Do we need to check for self.window_frame_end_idx <= current_window_frame_end_idx? + self.initialize_window_frame_start(item_columns)?; + } while !self.reached_end - && current_window_frame_end_idx >= self.window_frame_end_idx + && self.window_frame_end_idx <= current_window_frame_end_idx { - if self.group_start_indices.is_empty() { - self.initialize_window_frame_start(item_columns)?; - continue; - } - self.proceed_one_group::(item_columns, delta)?; + self.advance_one_group::(item_columns)?; } Ok(()) } - fn initialize( + fn initialize( &mut self, delta: u64, item_columns: &[ArrayRef], ) -> Result<()> { - if BISECT_SIDE { - if !SEARCH_SIDE { - self.window_frame_start_idx = self.current_group_idx + delta; - return self.initialize_window_frame_start(item_columns); - } else if self.current_group_idx >= delta { - self.window_frame_start_idx = self.current_group_idx - delta; - return self.initialize_window_frame_start(item_columns); - } + if !SEARCH_SIDE { + self.window_frame_start_idx = self.current_group_idx + delta; + self.initialize_window_frame_start(item_columns) + } else if self.current_group_idx >= delta { + self.window_frame_start_idx = self.current_group_idx - delta; + self.initialize_window_frame_start(item_columns) + } else { + Ok(()) } - Ok(()) } fn initialize_window_frame_start(&mut self, item_columns: &[ArrayRef]) -> Result<()> { @@ -161,15 +139,10 @@ impl WindowFrameGroups { self.reached_end || !self.group_start_indices.is_empty() } - /// This function proceeds the window frame by one group. - /// First, it extends the window frame by adding one next group as the last entry. - /// Then, if this function is called due to a group change (with BISECT_SIDE true), it pops the front entry. - /// If this function is called with BISECT_SIDE false, it means we are trying to extend the window frame to reach the - /// window frame size, so no popping of the front entry. - fn proceed_one_group( + /// This function advances the window frame by one group. + fn advance_one_group( &mut self, item_columns: &[ArrayRef], - delta: u64, ) -> Result<()> { let last_group_values = self.group_start_indices.back(); let last_group_values = if let Some(values) = last_group_values { @@ -194,22 +167,27 @@ impl WindowFrameGroups { self.reached_end = true; } } - if BISECT_SIDE { - let current_window_frame_start_idx = if !SEARCH_SIDE { - self.current_group_idx + delta - } else if self.current_group_idx >= delta { - self.current_group_idx - delta - } else { - 0 - }; - if current_window_frame_start_idx > self.window_frame_start_idx { - self.group_start_indices.pop_front(); - self.window_frame_start_idx += 1; - } - } Ok(()) } + /// This function drops the oldest group from the window frame. + fn shift_one_group(&mut self, delta: u64) { + let current_window_frame_start_idx = if !SEARCH_SIDE { + self.current_group_idx + delta + } else if self.current_group_idx >= delta { + self.current_group_idx - delta + } else { + 0 + }; + if current_window_frame_start_idx > self.window_frame_start_idx { + self.group_start_indices.pop_front(); + self.window_frame_start_idx += 1; + } + } + + /// This function is the main public interface of WindowFrameGroups. It is meant to be + /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE` + /// supplied as false and true, respectively). pub fn process( &mut self, current_row_values: &[ScalarValue], @@ -217,13 +195,23 @@ impl WindowFrameGroups { item_columns: &[ArrayRef], length: usize, ) -> Result { - if !self.initialized() { - self.initialize::(delta, item_columns)?; + if BISECT_SIDE { + // When we call this function to get the window frame start index, it tries to initialize + // the internal grouping state if this is not already done before. This initialization takes + // place only when the window frame start index is greater than or equal to zero. In this + // case, the current row is stored in group_start_indices, with row values as the group + // identifier and row index as the start index of the group. + if !self.initialized() { + self.initialize::(delta, item_columns)?; + } + } else if !self.reached_end { + // When we call this function to get the window frame end index, it extends the window + // frame one by one until the current row's window frame end index is reached by finding + // the next group. + self.extend_window_frame_if_necessary::(item_columns, delta)?; } - self.extend_window_frame_if_necessary::( - item_columns, - delta, - )?; + // We keep track of previous row values, so that a group change can be identified. + // If there is a group change, the window frame is advanced and shifted by one group. let group_change = match &self.previous_row_values { None => false, Some(values) => current_row_values != values, @@ -233,7 +221,8 @@ impl WindowFrameGroups { } if group_change { self.current_group_idx += 1; - self.proceed_one_group::(item_columns, delta)?; + self.advance_one_group::(item_columns)?; + self.shift_one_group::(delta); } Ok(if self.group_start_indices.is_empty() { if self.reached_end { @@ -255,18 +244,12 @@ impl WindowFrameGroups { } /// This function finds the next group and its start index for a given group and start index. - /// It uses the bisect function, thus it should work on a bounded frame with a start and an end point. - /// To find the end point, a step_size is used with a default value of 100. After proceeding the end point, - /// if the entry at the end point still has the same group identifier (row values), the start point is set as the - /// end point, and the end point is proceeded one step size more. The end point is found when the entry at the end - /// point has a different group identifier (different row values), or the end of the partition is reached. - /// Then, the bisect function can be applied with low as the start point, and high as the end point. - /// - /// Improvement: - /// For small group sizes, proceeding one-by-one to find the group change can be more efficient. - /// A statistics about the group sizes can be utilized to decide upon which approach to use, or even set the - /// step_size. - /// We will also need a benchmark implementation to prove the efficiency. + /// TODO: Explain the algorithm clearly. + // FUTURE WORK/ENHANCEMENTS: + // For small group sizes, proceeding one-by-one to find the group change can be more efficient. + // Statistics about previous group sizes can be utilized to choose the approach to use, or even + // to set the base step_size. We can also create a benchmark implementation to get insights about + // the crossover point. fn find_next_group_and_start_index( item_columns: &[ArrayRef], current_row_values: &[ScalarValue], @@ -408,7 +391,7 @@ mod tests { let mut window_frame_groups = WindowFrameGroups::default(); window_frame_groups - .initialize::(DELTA, &arrays) + .initialize::(DELTA, &arrays) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, 0); assert_eq!(window_frame_groups.window_frame_end_idx, 0); @@ -416,7 +399,7 @@ mod tests { assert_eq!(window_frame_groups.group_start_indices.len(), 0); window_frame_groups - .extend_window_frame_if_necessary::(&arrays, DELTA) + .extend_window_frame_if_necessary::(&arrays, DELTA) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, 0); assert_eq!(window_frame_groups.window_frame_end_idx, 0); @@ -462,7 +445,7 @@ mod tests { let mut window_frame_groups = WindowFrameGroups::default(); window_frame_groups - .initialize::(DELTA, &arrays) + .initialize::(DELTA, &arrays) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, DELTA); assert_eq!(window_frame_groups.window_frame_end_idx, DELTA); @@ -470,7 +453,7 @@ mod tests { assert_eq!(window_frame_groups.group_start_indices.len(), 0); window_frame_groups - .extend_window_frame_if_necessary::(&arrays, DELTA) + .extend_window_frame_if_necessary::(&arrays, DELTA) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, DELTA); assert_eq!(window_frame_groups.window_frame_end_idx, DELTA); @@ -517,7 +500,7 @@ mod tests { let mut window_frame_groups = WindowFrameGroups::default(); window_frame_groups - .initialize::(DELTA, &arrays) + .initialize::(DELTA, &arrays) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, 0); assert_eq!(window_frame_groups.window_frame_end_idx, 0); @@ -525,7 +508,7 @@ mod tests { assert_eq!(window_frame_groups.group_start_indices.len(), 0); window_frame_groups - .extend_window_frame_if_necessary::(&arrays, DELTA) + .extend_window_frame_if_necessary::(&arrays, DELTA) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, 0); assert_eq!(window_frame_groups.window_frame_end_idx, NUM_ROWS as u64); @@ -572,7 +555,7 @@ mod tests { let mut window_frame_groups = WindowFrameGroups::default(); window_frame_groups - .initialize::(DELTA, &arrays) + .initialize::(DELTA, &arrays) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, 0); assert_eq!(window_frame_groups.window_frame_end_idx, 0); @@ -580,7 +563,7 @@ mod tests { assert_eq!(window_frame_groups.group_start_indices.len(), 0); window_frame_groups - .extend_window_frame_if_necessary::(&arrays, DELTA) + .extend_window_frame_if_necessary::(&arrays, DELTA) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, 0); assert_eq!(window_frame_groups.window_frame_end_idx, 2 * DELTA + 1); @@ -638,7 +621,7 @@ mod tests { let mut window_frame_groups = WindowFrameGroups::default(); window_frame_groups - .initialize::(DELTA, &arrays) + .initialize::(DELTA, &arrays) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, 0); assert_eq!(window_frame_groups.window_frame_end_idx, 0); @@ -684,7 +667,7 @@ mod tests { let mut window_frame_groups = WindowFrameGroups::default(); window_frame_groups - .initialize::(DELTA, &arrays) + .initialize::(DELTA, &arrays) .unwrap(); assert_eq!(window_frame_groups.window_frame_start_idx, 0); assert_eq!(window_frame_groups.window_frame_end_idx, 1); diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs index 86010349c171..45352c0409d9 100644 --- a/datafusion/physical-expr/src/window/window_expr.rs +++ b/datafusion/physical-expr/src/window/window_expr.rs @@ -133,9 +133,9 @@ pub trait WindowExpr: Send + Sync + Debug { match window_frame.units { WindowFrameUnits::Range => { let start = match &window_frame.start_bound { - // UNBOUNDED PRECEDING WindowFrameBound::Preceding(n) => { if n.is_null() { + // UNBOUNDED PRECEDING 0 } else { calculate_index_of_row::( From 6bc0d7266eb2d43783cbd9857e6d9285d5d942a3 Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Mon, 7 Nov 2022 18:29:41 +0300 Subject: [PATCH 3/7] Change find_next_group_and_start_index to use an exponentially growing search algorithm --- datafusion/physical-expr/src/window/groups.rs | 78 ++++++------------- 1 file changed, 22 insertions(+), 56 deletions(-) diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs index 06f5695298ec..90920513975b 100644 --- a/datafusion/physical-expr/src/window/groups.rs +++ b/datafusion/physical-expr/src/window/groups.rs @@ -114,19 +114,14 @@ impl WindowFrameGroups { item_columns, &group_values, start_idx, - None, )?; - match next_group_and_start_index { - Some((group, index)) => { - group_values = group; - start_idx = index; - } + if let Some(entry) = next_group_and_start_index { + (group_values, start_idx) = entry; + } else { // not enough groups to generate a window frame - None => { - self.reached_end = true; - self.window_frame_end_idx = self.window_frame_start_idx; - return Ok(()); - } + self.window_frame_end_idx = self.window_frame_start_idx; + self.reached_end = true; + return Ok(()); } } self.group_start_indices @@ -155,17 +150,13 @@ impl WindowFrameGroups { item_columns, &last_group_values.0, last_group_values.1, - None, )?; - match next_group_and_start_index { - Some((group, index)) => { - self.group_start_indices.push_back((group, index)); - self.window_frame_end_idx += 1; - } + if let Some(entry) = next_group_and_start_index { + self.group_start_indices.push_back(entry); + self.window_frame_end_idx += 1; + } else { // not enough groups to proceed - None => { - self.reached_end = true; - } + self.reached_end = true; } Ok(()) } @@ -244,32 +235,25 @@ impl WindowFrameGroups { } /// This function finds the next group and its start index for a given group and start index. - /// TODO: Explain the algorithm clearly. - // FUTURE WORK/ENHANCEMENTS: - // For small group sizes, proceeding one-by-one to find the group change can be more efficient. - // Statistics about previous group sizes can be utilized to choose the approach to use, or even - // to set the base step_size. We can also create a benchmark implementation to get insights about - // the crossover point. + /// It utilizes an exponentially growing step size to find the group boundary. + // TODO: For small group sizes, proceeding one-by-one to find the group change can be more efficient. + // Statistics about previous group sizes can be used to choose one-by-one vs. exponentially growing, + // or even to set the base step_size when exponentially growing. We can also create a benchmark + // implementation to get insights about the crossover point. fn find_next_group_and_start_index( item_columns: &[ArrayRef], current_row_values: &[ScalarValue], idx: usize, - step_size: Option, ) -> Result, usize)>> { - let step_size = if let Some(n) = step_size { n } else { 100 }; - if step_size == 0 { - return Err(DataFusionError::Internal( - "Step size cannot be 0".to_string(), - )); - } + let mut step_size: usize = 1; let data_size: usize = item_columns .get(0) .ok_or_else(|| { DataFusionError::Internal("Column array shouldn't be empty".to_string()) })? .len(); - let mut low: usize = idx; - let mut high: usize = cmp::min(low + step_size, data_size); + let mut low = idx; + let mut high = idx + step_size; while high < data_size { let val = item_columns .iter() @@ -277,7 +261,8 @@ impl WindowFrameGroups { .collect::>>()?; if val == current_row_values { low = high; - high = cmp::min(low + step_size, data_size); + step_size *= 2; + high += step_size; } else { break; } @@ -287,7 +272,7 @@ impl WindowFrameGroups { current_row_values, |current, to_compare| Ok(current == to_compare), low, - high, + cmp::min(high, data_size), )?; if low == data_size { return Ok(None); @@ -314,7 +299,6 @@ mod tests { fn test_find_next_group_and_start_index() { const NUM_ROWS: usize = 6; const NEXT_INDICES: [usize; 5] = [1, 2, 4, 4, 5]; - const STEP_SIZE: usize = 1; let arrays: Vec = vec![ Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 9., 10.])), @@ -337,15 +321,6 @@ mod tests { &arrays, ¤t_row_values, current_idx, - None, - ) - .unwrap(); - assert_eq!(res, Some((next_row_values.clone(), *next_idx))); - let res = WindowFrameGroups::find_next_group_and_start_index( - &arrays, - ¤t_row_values, - current_idx, - Some(STEP_SIZE), ) .unwrap(); assert_eq!(res, Some((next_row_values, *next_idx))); @@ -360,15 +335,6 @@ mod tests { &arrays, ¤t_row_values, current_idx, - None, - ) - .unwrap(); - assert_eq!(res, None); - let res = WindowFrameGroups::find_next_group_and_start_index( - &arrays, - ¤t_row_values, - current_idx, - Some(STEP_SIZE), ) .unwrap(); assert_eq!(res, None); From 81caf4be6a90b11d820cf0306400e4990c578304 Mon Sep 17 00:00:00 2001 From: Zeynep Arikoglu Date: Tue, 8 Nov 2022 14:27:20 +0300 Subject: [PATCH 4/7] Removed the TODO after verification of correct implementation --- datafusion/physical-expr/src/window/groups.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs index 90920513975b..8e0a28f1df64 100644 --- a/datafusion/physical-expr/src/window/groups.rs +++ b/datafusion/physical-expr/src/window/groups.rs @@ -75,7 +75,6 @@ impl WindowFrameGroups { return Ok(()); } if self.group_start_indices.is_empty() { - // TODO: Do we need to check for self.window_frame_end_idx <= current_window_frame_end_idx? self.initialize_window_frame_start(item_columns)?; } while !self.reached_end From 0d809f6a2220ae5a4d1daea48c5d4f96c8af57e7 Mon Sep 17 00:00:00 2001 From: Zeynep Arikoglu Date: Wed, 9 Nov 2022 15:57:01 +0300 Subject: [PATCH 5/7] Window frame state capturing the state of the window frame calculations --- .../physical-expr/src/window/aggregate.rs | 9 +- .../physical-expr/src/window/built_in.rs | 8 +- datafusion/physical-expr/src/window/groups.rs | 660 ----------- datafusion/physical-expr/src/window/mod.rs | 2 +- .../physical-expr/src/window/window_expr.rs | 322 +---- .../src/window/window_frame_state.rs | 1033 +++++++++++++++++ 6 files changed, 1043 insertions(+), 991 deletions(-) delete mode 100644 datafusion/physical-expr/src/window/groups.rs create mode 100644 datafusion/physical-expr/src/window/window_frame_state.rs diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs index 1c41f2bb2ed7..0fc13c1f4683 100644 --- a/datafusion/physical-expr/src/window/aggregate.rs +++ b/datafusion/physical-expr/src/window/aggregate.rs @@ -31,7 +31,9 @@ use datafusion_common::ScalarValue; use datafusion_expr::WindowFrame; use crate::{expressions::PhysicalSortExpr, PhysicalExpr}; -use crate::{window::groups::WindowFrameGroups, window::WindowExpr, AggregateExpr}; +use crate::{window::WindowExpr, AggregateExpr}; + +use super::window_frame_state::{WindowFrameState, WindowFrameStateImpl}; /// A window expr that takes the form of an aggregate function #[derive(Debug)] @@ -114,15 +116,14 @@ impl WindowExpr for AggregateWindowExpr { .map(|v| v.slice(partition_range.start, length)) .collect::>(); - let mut window_frame_groups = WindowFrameGroups::default(); + let mut window_frame_state = WindowFrameState::new(&window_frame); let mut last_range: (usize, usize) = (0, 0); // We iterate on each row to perform a running calculation. // First, cur_range is calculated, then it is compared with last_range. for i in 0..length { - let cur_range = self.calculate_range( + let cur_range = window_frame_state.calculate_range( &window_frame, - &mut window_frame_groups, &slice_order_bys, &sort_options, length, diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs index 698bffdf92bd..04cbf4f523ad 100644 --- a/datafusion/physical-expr/src/window/built_in.rs +++ b/datafusion/physical-expr/src/window/built_in.rs @@ -17,6 +17,7 @@ //! Physical exec for built-in window function expressions. +use super::window_frame_state::{WindowFrameState, WindowFrameStateImpl}; use super::BuiltInWindowFunctionExpr; use super::WindowExpr; use crate::{expressions::PhysicalSortExpr, PhysicalExpr}; @@ -31,8 +32,6 @@ use std::any::Any; use std::ops::Range; use std::sync::Arc; -use crate::window::groups::WindowFrameGroups; - /// A window expr that takes the form of a built in window function #[derive(Debug)] pub struct BuiltInWindowExpr { @@ -115,12 +114,11 @@ impl WindowExpr for BuiltInWindowExpr { .iter() .map(|v| v.slice(partition_range.start, length)) .collect::>(); - let mut window_frame_groups = WindowFrameGroups::default(); + let mut window_frame_state = WindowFrameState::new(&window_frame); // We iterate on each row to calculate window frame range and and window function result for idx in 0..length { - let range = self.calculate_range( + let range = window_frame_state.calculate_range( &window_frame, - &mut window_frame_groups, &slice_order_bys, &sort_options, num_rows, diff --git a/datafusion/physical-expr/src/window/groups.rs b/datafusion/physical-expr/src/window/groups.rs deleted file mode 100644 index 8e0a28f1df64..000000000000 --- a/datafusion/physical-expr/src/window/groups.rs +++ /dev/null @@ -1,660 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! This module provides utilities for GROUPS mode window frame index calculations. -// In GROUPS mode, rows with duplicate sorting values are grouped together. -// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode. -// The syntax is as follows: -// GROUPS frame_start [ frame_exclusion ] -// GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ] -// The optional frame_exclusion specifier is not yet supported. -// The frame_start and frame_end parameters allow us to specify which rows the window -// frame starts and ends with. They accept the following values: -// - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start. -// - offset PRECEDING: When used in frame_start, it refers to the first row of the group -// that comes "offset" groups before the current group (i.e. the group -// containing the current row). When used in frame_end, it refers to the -// last row of the group that comes "offset" groups before the current group. -// - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing -// the current row. When used in frame_end, it refers to the last row of the group -// containing the current row. -// - offset FOLLOWING: When used in frame_start, it refers to the first row of the group -// that comes "offset" groups after the current group (i.e. the group -// containing the current row). When used in frame_end, it refers to the -// last row of the group that comes "offset" groups after the current group. -// - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end. - -use arrow::array::ArrayRef; -use std::cmp; -use std::collections::VecDeque; - -use datafusion_common::bisect::find_bisect_point; -use datafusion_common::{DataFusionError, Result, ScalarValue}; - -/// This structure encapsulates all the state information we require as we -/// scan groups of data while processing window frames. -#[derive(Debug, Default)] -pub struct WindowFrameGroups { - current_group_idx: u64, - group_start_indices: VecDeque<(Vec, usize)>, - previous_row_values: Option>, - reached_end: bool, - window_frame_end_idx: u64, - window_frame_start_idx: u64, -} - -impl WindowFrameGroups { - fn extend_window_frame_if_necessary( - &mut self, - item_columns: &[ArrayRef], - delta: u64, - ) -> Result<()> { - let current_window_frame_end_idx = if !SEARCH_SIDE { - self.current_group_idx + delta + 1 - } else if self.current_group_idx >= delta { - self.current_group_idx - delta + 1 - } else { - 0 - }; - if current_window_frame_end_idx == 0 { - // the end index of the window frame is still before the first index - return Ok(()); - } - if self.group_start_indices.is_empty() { - self.initialize_window_frame_start(item_columns)?; - } - while !self.reached_end - && self.window_frame_end_idx <= current_window_frame_end_idx - { - self.advance_one_group::(item_columns)?; - } - Ok(()) - } - - fn initialize( - &mut self, - delta: u64, - item_columns: &[ArrayRef], - ) -> Result<()> { - if !SEARCH_SIDE { - self.window_frame_start_idx = self.current_group_idx + delta; - self.initialize_window_frame_start(item_columns) - } else if self.current_group_idx >= delta { - self.window_frame_start_idx = self.current_group_idx - delta; - self.initialize_window_frame_start(item_columns) - } else { - Ok(()) - } - } - - fn initialize_window_frame_start(&mut self, item_columns: &[ArrayRef]) -> Result<()> { - let mut group_values = item_columns - .iter() - .map(|col| ScalarValue::try_from_array(col, 0)) - .collect::>>()?; - let mut start_idx: usize = 0; - for _ in 0..self.window_frame_start_idx { - let next_group_and_start_index = - WindowFrameGroups::find_next_group_and_start_index( - item_columns, - &group_values, - start_idx, - )?; - if let Some(entry) = next_group_and_start_index { - (group_values, start_idx) = entry; - } else { - // not enough groups to generate a window frame - self.window_frame_end_idx = self.window_frame_start_idx; - self.reached_end = true; - return Ok(()); - } - } - self.group_start_indices - .push_back((group_values, start_idx)); - self.window_frame_end_idx = self.window_frame_start_idx + 1; - Ok(()) - } - - fn initialized(&self) -> bool { - self.reached_end || !self.group_start_indices.is_empty() - } - - /// This function advances the window frame by one group. - fn advance_one_group( - &mut self, - item_columns: &[ArrayRef], - ) -> Result<()> { - let last_group_values = self.group_start_indices.back(); - let last_group_values = if let Some(values) = last_group_values { - values - } else { - return Ok(()); - }; - let next_group_and_start_index = - WindowFrameGroups::find_next_group_and_start_index( - item_columns, - &last_group_values.0, - last_group_values.1, - )?; - if let Some(entry) = next_group_and_start_index { - self.group_start_indices.push_back(entry); - self.window_frame_end_idx += 1; - } else { - // not enough groups to proceed - self.reached_end = true; - } - Ok(()) - } - - /// This function drops the oldest group from the window frame. - fn shift_one_group(&mut self, delta: u64) { - let current_window_frame_start_idx = if !SEARCH_SIDE { - self.current_group_idx + delta - } else if self.current_group_idx >= delta { - self.current_group_idx - delta - } else { - 0 - }; - if current_window_frame_start_idx > self.window_frame_start_idx { - self.group_start_indices.pop_front(); - self.window_frame_start_idx += 1; - } - } - - /// This function is the main public interface of WindowFrameGroups. It is meant to be - /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE` - /// supplied as false and true, respectively). - pub fn process( - &mut self, - current_row_values: &[ScalarValue], - delta: u64, - item_columns: &[ArrayRef], - length: usize, - ) -> Result { - if BISECT_SIDE { - // When we call this function to get the window frame start index, it tries to initialize - // the internal grouping state if this is not already done before. This initialization takes - // place only when the window frame start index is greater than or equal to zero. In this - // case, the current row is stored in group_start_indices, with row values as the group - // identifier and row index as the start index of the group. - if !self.initialized() { - self.initialize::(delta, item_columns)?; - } - } else if !self.reached_end { - // When we call this function to get the window frame end index, it extends the window - // frame one by one until the current row's window frame end index is reached by finding - // the next group. - self.extend_window_frame_if_necessary::(item_columns, delta)?; - } - // We keep track of previous row values, so that a group change can be identified. - // If there is a group change, the window frame is advanced and shifted by one group. - let group_change = match &self.previous_row_values { - None => false, - Some(values) => current_row_values != values, - }; - if self.previous_row_values.is_none() || group_change { - self.previous_row_values = Some(current_row_values.to_vec()); - } - if group_change { - self.current_group_idx += 1; - self.advance_one_group::(item_columns)?; - self.shift_one_group::(delta); - } - Ok(if self.group_start_indices.is_empty() { - if self.reached_end { - length - } else { - 0 - } - } else if BISECT_SIDE { - match self.group_start_indices.get(0) { - Some(&(_, idx)) => idx, - None => 0, - } - } else { - match (self.reached_end, self.group_start_indices.back()) { - (false, Some(&(_, idx))) => idx, - _ => length, - } - }) - } - - /// This function finds the next group and its start index for a given group and start index. - /// It utilizes an exponentially growing step size to find the group boundary. - // TODO: For small group sizes, proceeding one-by-one to find the group change can be more efficient. - // Statistics about previous group sizes can be used to choose one-by-one vs. exponentially growing, - // or even to set the base step_size when exponentially growing. We can also create a benchmark - // implementation to get insights about the crossover point. - fn find_next_group_and_start_index( - item_columns: &[ArrayRef], - current_row_values: &[ScalarValue], - idx: usize, - ) -> Result, usize)>> { - let mut step_size: usize = 1; - let data_size: usize = item_columns - .get(0) - .ok_or_else(|| { - DataFusionError::Internal("Column array shouldn't be empty".to_string()) - })? - .len(); - let mut low = idx; - let mut high = idx + step_size; - while high < data_size { - let val = item_columns - .iter() - .map(|arr| ScalarValue::try_from_array(arr, high)) - .collect::>>()?; - if val == current_row_values { - low = high; - step_size *= 2; - high += step_size; - } else { - break; - } - } - low = find_bisect_point( - item_columns, - current_row_values, - |current, to_compare| Ok(current == to_compare), - low, - cmp::min(high, data_size), - )?; - if low == data_size { - return Ok(None); - } - let val = item_columns - .iter() - .map(|arr| ScalarValue::try_from_array(arr, low)) - .collect::>>()?; - Ok(Some((val, low))) - } -} - -#[cfg(test)] -mod tests { - use arrow::array::Float64Array; - use datafusion_common::ScalarValue; - use std::sync::Arc; - - use crate::from_slice::FromSlice; - - use super::*; - - #[test] - fn test_find_next_group_and_start_index() { - const NUM_ROWS: usize = 6; - const NEXT_INDICES: [usize; 5] = [1, 2, 4, 4, 5]; - - let arrays: Vec = vec![ - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 9., 10.])), - Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 3., 4.0, 5.0])), - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 10., 11.0])), - Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 8., 5., 0.0])), - ]; - for (current_idx, next_idx) in NEXT_INDICES.iter().enumerate() { - let current_row_values = arrays - .iter() - .map(|col| ScalarValue::try_from_array(col, current_idx)) - .collect::>>() - .unwrap(); - let next_row_values = arrays - .iter() - .map(|col| ScalarValue::try_from_array(col, *next_idx)) - .collect::>>() - .unwrap(); - let res = WindowFrameGroups::find_next_group_and_start_index( - &arrays, - ¤t_row_values, - current_idx, - ) - .unwrap(); - assert_eq!(res, Some((next_row_values, *next_idx))); - } - let current_idx = NUM_ROWS - 1; - let current_row_values = arrays - .iter() - .map(|col| ScalarValue::try_from_array(col, current_idx)) - .collect::>>() - .unwrap(); - let res = WindowFrameGroups::find_next_group_and_start_index( - &arrays, - ¤t_row_values, - current_idx, - ) - .unwrap(); - assert_eq!(res, None); - } - - #[test] - fn test_window_frame_groups_preceding_huge_delta() { - const START: bool = true; - const END: bool = false; - const PRECEDING: bool = true; - const DELTA: u64 = 10; - const NUM_ROWS: usize = 5; - - let arrays: Vec = vec![ - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), - Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), - Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), - ]; - - let mut window_frame_groups = WindowFrameGroups::default(); - window_frame_groups - .initialize::(DELTA, &arrays) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, 0); - assert_eq!(window_frame_groups.window_frame_end_idx, 0); - assert!(!window_frame_groups.reached_end); - assert_eq!(window_frame_groups.group_start_indices.len(), 0); - - window_frame_groups - .extend_window_frame_if_necessary::(&arrays, DELTA) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, 0); - assert_eq!(window_frame_groups.window_frame_end_idx, 0); - assert!(!window_frame_groups.reached_end); - assert_eq!(window_frame_groups.group_start_indices.len(), 0); - - for idx in 0..NUM_ROWS { - let current_row_values = arrays - .iter() - .map(|col| ScalarValue::try_from_array(col, idx)) - .collect::>>() - .unwrap(); - let start = window_frame_groups - .process::( - ¤t_row_values, - DELTA, - &arrays, - NUM_ROWS, - ) - .unwrap(); - assert_eq!(start, 0); - let end = window_frame_groups - .process::(¤t_row_values, DELTA, &arrays, NUM_ROWS) - .unwrap(); - assert_eq!(end, 0); - } - } - - #[test] - fn test_window_frame_groups_following_huge_delta() { - const START: bool = true; - const END: bool = false; - const FOLLOWING: bool = false; - const DELTA: u64 = 10; - const NUM_ROWS: usize = 5; - - let arrays: Vec = vec![ - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), - Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), - Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), - ]; - - let mut window_frame_groups = WindowFrameGroups::default(); - window_frame_groups - .initialize::(DELTA, &arrays) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, DELTA); - assert_eq!(window_frame_groups.window_frame_end_idx, DELTA); - assert!(window_frame_groups.reached_end); - assert_eq!(window_frame_groups.group_start_indices.len(), 0); - - window_frame_groups - .extend_window_frame_if_necessary::(&arrays, DELTA) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, DELTA); - assert_eq!(window_frame_groups.window_frame_end_idx, DELTA); - assert!(window_frame_groups.reached_end); - assert_eq!(window_frame_groups.group_start_indices.len(), 0); - - for idx in 0..NUM_ROWS { - let current_row_values = arrays - .iter() - .map(|col| ScalarValue::try_from_array(col, idx)) - .collect::>>() - .unwrap(); - let start = window_frame_groups - .process::( - ¤t_row_values, - DELTA, - &arrays, - NUM_ROWS, - ) - .unwrap(); - assert_eq!(start, NUM_ROWS); - let end = window_frame_groups - .process::(¤t_row_values, DELTA, &arrays, NUM_ROWS) - .unwrap(); - assert_eq!(end, NUM_ROWS); - } - } - - #[test] - fn test_window_frame_groups_preceding_and_following_huge_delta() { - const START: bool = true; - const END: bool = false; - const FOLLOWING: bool = false; - const PRECEDING: bool = true; - const DELTA: u64 = 10; - const NUM_ROWS: usize = 5; - - let arrays: Vec = vec![ - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), - Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), - Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), - ]; - - let mut window_frame_groups = WindowFrameGroups::default(); - window_frame_groups - .initialize::(DELTA, &arrays) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, 0); - assert_eq!(window_frame_groups.window_frame_end_idx, 0); - assert!(!window_frame_groups.reached_end); - assert_eq!(window_frame_groups.group_start_indices.len(), 0); - - window_frame_groups - .extend_window_frame_if_necessary::(&arrays, DELTA) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, 0); - assert_eq!(window_frame_groups.window_frame_end_idx, NUM_ROWS as u64); - assert!(window_frame_groups.reached_end); - assert_eq!(window_frame_groups.group_start_indices.len(), NUM_ROWS); - - for idx in 0..NUM_ROWS { - let current_row_values = arrays - .iter() - .map(|col| ScalarValue::try_from_array(col, idx)) - .collect::>>() - .unwrap(); - let start = window_frame_groups - .process::( - ¤t_row_values, - DELTA, - &arrays, - NUM_ROWS, - ) - .unwrap(); - assert_eq!(start, 0); - let end = window_frame_groups - .process::(¤t_row_values, DELTA, &arrays, NUM_ROWS) - .unwrap(); - assert_eq!(end, NUM_ROWS); - } - } - - #[test] - fn test_window_frame_groups_preceding_and_following_1() { - const START: bool = true; - const END: bool = false; - const FOLLOWING: bool = false; - const PRECEDING: bool = true; - const DELTA: u64 = 1; - const NUM_ROWS: usize = 5; - - let arrays: Vec = vec![ - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), - Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), - Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), - ]; - - let mut window_frame_groups = WindowFrameGroups::default(); - window_frame_groups - .initialize::(DELTA, &arrays) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, 0); - assert_eq!(window_frame_groups.window_frame_end_idx, 0); - assert!(!window_frame_groups.reached_end); - assert_eq!(window_frame_groups.group_start_indices.len(), 0); - - window_frame_groups - .extend_window_frame_if_necessary::(&arrays, DELTA) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, 0); - assert_eq!(window_frame_groups.window_frame_end_idx, 2 * DELTA + 1); - assert!(!window_frame_groups.reached_end); - assert_eq!( - window_frame_groups.group_start_indices.len(), - 2 * DELTA as usize + 1 - ); - - for idx in 0..NUM_ROWS { - let current_row_values = arrays - .iter() - .map(|col| ScalarValue::try_from_array(col, idx)) - .collect::>>() - .unwrap(); - let start_idx = if idx < DELTA as usize { - 0 - } else { - idx - DELTA as usize - }; - let start = window_frame_groups - .process::( - ¤t_row_values, - DELTA, - &arrays, - NUM_ROWS, - ) - .unwrap(); - assert_eq!(start, start_idx); - let end_idx = if idx + 1 + DELTA as usize > NUM_ROWS { - NUM_ROWS - } else { - idx + 1 + DELTA as usize - }; - let end = window_frame_groups - .process::(¤t_row_values, DELTA, &arrays, NUM_ROWS) - .unwrap(); - assert_eq!(end, end_idx); - } - } - - #[test] - fn test_window_frame_groups_preceding_1_and_unbounded_following() { - const START: bool = true; - const PRECEDING: bool = true; - const DELTA: u64 = 1; - const NUM_ROWS: usize = 5; - - let arrays: Vec = vec![ - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), - Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), - Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), - ]; - - let mut window_frame_groups = WindowFrameGroups::default(); - window_frame_groups - .initialize::(DELTA, &arrays) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, 0); - assert_eq!(window_frame_groups.window_frame_end_idx, 0); - assert!(!window_frame_groups.reached_end); - assert_eq!(window_frame_groups.group_start_indices.len(), 0); - - for idx in 0..NUM_ROWS { - let current_row_values = arrays - .iter() - .map(|col| ScalarValue::try_from_array(col, idx)) - .collect::>>() - .unwrap(); - let start_idx = if idx < DELTA as usize { - 0 - } else { - idx - DELTA as usize - }; - let start = window_frame_groups - .process::( - ¤t_row_values, - DELTA, - &arrays, - NUM_ROWS, - ) - .unwrap(); - assert_eq!(start, start_idx); - } - } - - #[test] - fn test_window_frame_groups_current_and_unbounded_following() { - const START: bool = true; - const PRECEDING: bool = true; - const DELTA: u64 = 0; - const NUM_ROWS: usize = 5; - - let arrays: Vec = vec![ - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), - Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), - Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), - Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), - ]; - - let mut window_frame_groups = WindowFrameGroups::default(); - window_frame_groups - .initialize::(DELTA, &arrays) - .unwrap(); - assert_eq!(window_frame_groups.window_frame_start_idx, 0); - assert_eq!(window_frame_groups.window_frame_end_idx, 1); - assert!(!window_frame_groups.reached_end); - assert_eq!(window_frame_groups.group_start_indices.len(), 1); - - for idx in 0..NUM_ROWS { - let current_row_values = arrays - .iter() - .map(|col| ScalarValue::try_from_array(col, idx)) - .collect::>>() - .unwrap(); - - let start = window_frame_groups - .process::( - ¤t_row_values, - DELTA, - &arrays, - NUM_ROWS, - ) - .unwrap(); - assert_eq!(start, idx); - } - } -} diff --git a/datafusion/physical-expr/src/window/mod.rs b/datafusion/physical-expr/src/window/mod.rs index 7de388b621cc..40ed658ee38a 100644 --- a/datafusion/physical-expr/src/window/mod.rs +++ b/datafusion/physical-expr/src/window/mod.rs @@ -19,13 +19,13 @@ mod aggregate; mod built_in; mod built_in_window_function_expr; pub(crate) mod cume_dist; -mod groups; pub(crate) mod lead_lag; pub(crate) mod nth_value; pub(crate) mod partition_evaluator; pub(crate) mod rank; pub(crate) mod row_number; mod window_expr; +mod window_frame_state; pub use aggregate::AggregateWindowExpr; pub use built_in::BuiltInWindowExpr; diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs index 45352c0409d9..67caba51dcab 100644 --- a/datafusion/physical-expr/src/window/window_expr.rs +++ b/datafusion/physical-expr/src/window/window_expr.rs @@ -20,19 +20,12 @@ use arrow::compute::kernels::partition::lexicographical_partition_ranges; use arrow::compute::kernels::sort::{SortColumn, SortOptions}; use arrow::record_batch::RecordBatch; use arrow::{array::ArrayRef, datatypes::Field}; -use datafusion_common::bisect::bisect; -use datafusion_common::{DataFusionError, Result, ScalarValue}; +use datafusion_common::{DataFusionError, Result}; use std::any::Any; -use std::cmp::min; use std::fmt::Debug; use std::ops::Range; use std::sync::Arc; -use datafusion_expr::WindowFrameBound; -use datafusion_expr::{WindowFrame, WindowFrameUnits}; - -use crate::window::groups::WindowFrameGroups; - /// A window expression that: /// * knows its resulting field pub trait WindowExpr: Send + Sync + Debug { @@ -117,317 +110,4 @@ pub trait WindowExpr: Send + Sync + Debug { sort_columns.extend(order_by_columns); Ok(sort_columns) } - - /// We use start and end bounds to calculate current row's starting and ending range. - /// This function supports different modes. - fn calculate_range( - &self, - window_frame: &Option>, - window_frame_groups: &mut WindowFrameGroups, - range_columns: &[ArrayRef], - sort_options: &[SortOptions], - length: usize, - idx: usize, - ) -> Result<(usize, usize)> { - if let Some(window_frame) = window_frame { - match window_frame.units { - WindowFrameUnits::Range => { - let start = match &window_frame.start_bound { - WindowFrameBound::Preceding(n) => { - if n.is_null() { - // UNBOUNDED PRECEDING - 0 - } else { - calculate_index_of_row::( - range_columns, - sort_options, - idx, - Some(n), - )? - } - } - WindowFrameBound::CurrentRow => { - if range_columns.is_empty() { - 0 - } else { - calculate_index_of_row::( - range_columns, - sort_options, - idx, - None, - )? - } - } - WindowFrameBound::Following(n) => { - calculate_index_of_row::( - range_columns, - sort_options, - idx, - Some(n), - )? - } - }; - let end = match &window_frame.end_bound { - WindowFrameBound::Preceding(n) => { - calculate_index_of_row::( - range_columns, - sort_options, - idx, - Some(n), - )? - } - WindowFrameBound::CurrentRow => { - if range_columns.is_empty() { - length - } else { - calculate_index_of_row::( - range_columns, - sort_options, - idx, - None, - )? - } - } - WindowFrameBound::Following(n) => { - if n.is_null() { - // UNBOUNDED FOLLOWING - length - } else { - calculate_index_of_row::( - range_columns, - sort_options, - idx, - Some(n), - )? - } - } - }; - Ok((start, end)) - } - WindowFrameUnits::Rows => { - let start = match window_frame.start_bound { - // UNBOUNDED PRECEDING - WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0, - WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { - if idx >= n as usize { - idx - n as usize - } else { - 0 - } - } - WindowFrameBound::CurrentRow => idx, - // UNBOUNDED FOLLOWING - WindowFrameBound::Following(ScalarValue::UInt64(None)) => { - return Err(DataFusionError::Internal(format!( - "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'", - window_frame - ))) - } - WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { - min(idx + n as usize, length) - } - // ERRONEOUS FRAMES - WindowFrameBound::Preceding(_) - | WindowFrameBound::Following(_) => { - return Err(DataFusionError::Internal( - "Rows should be Uint".to_string(), - )) - } - }; - let end = match window_frame.end_bound { - // UNBOUNDED PRECEDING - WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => { - return Err(DataFusionError::Internal(format!( - "Frame end cannot be UNBOUNDED PRECEDING '{:?}'", - window_frame - ))) - } - WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { - if idx >= n as usize { - idx - n as usize + 1 - } else { - 0 - } - } - WindowFrameBound::CurrentRow => idx + 1, - // UNBOUNDED FOLLOWING - WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, - WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { - min(idx + n as usize + 1, length) - } - // ERRONEOUS FRAMES - WindowFrameBound::Preceding(_) - | WindowFrameBound::Following(_) => { - return Err(DataFusionError::Internal( - "Rows should be Uint".to_string(), - )) - } - }; - Ok((start, end)) - } - WindowFrameUnits::Groups => { - if range_columns.is_empty() { - return Err(DataFusionError::Execution( - "GROUPS mode requires an ORDER BY clause".to_string(), - )); - } - let start = match window_frame.start_bound { - // UNBOUNDED PRECEDING - WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0, - WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { - calculate_index_of_group::( - range_columns, - window_frame_groups, - idx, - n, - length, - )? - } - WindowFrameBound::CurrentRow => { - calculate_index_of_group::( - range_columns, - window_frame_groups, - idx, - 0, - length, - )? - } - WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { - calculate_index_of_group::( - range_columns, - window_frame_groups, - idx, - n, - length, - )? - } - // UNBOUNDED FOLLOWING - WindowFrameBound::Following(ScalarValue::UInt64(None)) => { - return Err(DataFusionError::Internal(format!( - "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'", - window_frame - ))) - } - // ERRONEOUS FRAMES - WindowFrameBound::Preceding(_) - | WindowFrameBound::Following(_) => { - return Err(DataFusionError::Internal( - "Groups should be Uint".to_string(), - )) - } - }; - let end = match window_frame.end_bound { - // UNBOUNDED PRECEDING - WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => { - return Err(DataFusionError::Internal(format!( - "Frame end cannot be UNBOUNDED PRECEDING '{:?}'", - window_frame - ))) - } - WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { - calculate_index_of_group::( - range_columns, - window_frame_groups, - idx, - n, - length, - )? - } - WindowFrameBound::CurrentRow => { - calculate_index_of_group::( - range_columns, - window_frame_groups, - idx, - 0, - length, - )? - } - WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { - calculate_index_of_group::( - range_columns, - window_frame_groups, - idx, - n, - length, - )? - } - // UNBOUNDED FOLLOWING - WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, - // ERRONEOUS FRAMES - WindowFrameBound::Preceding(_) - | WindowFrameBound::Following(_) => { - return Err(DataFusionError::Internal( - "Groups should be Uint".to_string(), - )) - } - }; - Ok((start, end)) - } - } - } else { - Ok((0, length)) - } - } -} - -fn calculate_index_of_row( - range_columns: &[ArrayRef], - sort_options: &[SortOptions], - idx: usize, - delta: Option<&ScalarValue>, -) -> Result { - let current_row_values = range_columns - .iter() - .map(|col| ScalarValue::try_from_array(col, idx)) - .collect::>>()?; - let end_range = if let Some(delta) = delta { - let is_descending: bool = sort_options - .first() - .ok_or_else(|| DataFusionError::Internal("Array is empty".to_string()))? - .descending; - - current_row_values - .iter() - .map(|value| { - if value.is_null() { - return Ok(value.clone()); - } - if SEARCH_SIDE == is_descending { - // TODO: Handle positive overflows - value.add(delta) - } else if value.is_unsigned() && value < delta { - // NOTE: This gets a polymorphic zero without having long coercion code for ScalarValue. - // If we decide to implement a "default" construction mechanism for ScalarValue, - // change the following statement to use that. - value.sub(value) - } else { - // TODO: Handle negative overflows - value.sub(delta) - } - }) - .collect::>>()? - } else { - current_row_values - }; - // `BISECT_SIDE` true means bisect_left, false means bisect_right - bisect::(range_columns, &end_range, sort_options) -} - -fn calculate_index_of_group( - range_columns: &[ArrayRef], - window_frame_groups: &mut WindowFrameGroups, - idx: usize, - delta: u64, - length: usize, -) -> Result { - let current_row_values = range_columns - .iter() - .map(|col| ScalarValue::try_from_array(col, idx)) - .collect::>>()?; - window_frame_groups.process::( - ¤t_row_values, - delta, - range_columns, - length, - ) } diff --git a/datafusion/physical-expr/src/window/window_frame_state.rs b/datafusion/physical-expr/src/window/window_frame_state.rs new file mode 100644 index 000000000000..1e3ba4310bc2 --- /dev/null +++ b/datafusion/physical-expr/src/window/window_frame_state.rs @@ -0,0 +1,1033 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! This module provides utilities for window frame index calculations depending on the window frame mode: +//! RANGE, ROWS, GROUPS. + +use arrow::array::ArrayRef; +use arrow::compute::kernels::sort::SortOptions; +use datafusion_common::bisect::{bisect, find_bisect_point}; +use datafusion_common::{DataFusionError, Result, ScalarValue}; +use datafusion_expr::{WindowFrame, WindowFrameBound, WindowFrameUnits}; +use std::cmp::min; +use std::collections::VecDeque; +use std::fmt::Debug; +use std::sync::Arc; + +pub trait WindowFrameStateImpl: Debug { + /// We use start and end bounds to calculate current row's starting and ending range. + fn calculate_range( + &mut self, + window_frame: &Option>, + range_columns: &[ArrayRef], + sort_options: &[SortOptions], + length: usize, + idx: usize, + ) -> Result<(usize, usize)>; +} + +#[derive(Debug)] +pub enum WindowFrameState { + Rows(WindowFrameStateRows), + Range(WindowFrameStateRange), + Groups(WindowFrameStateGroups), + NoFrame(WindowFrameStateNoFrame), +} + +impl WindowFrameStateImpl for WindowFrameState { + fn calculate_range( + &mut self, + window_frame: &Option>, + range_columns: &[ArrayRef], + sort_options: &[SortOptions], + length: usize, + idx: usize, + ) -> Result<(usize, usize)> { + match *self { + WindowFrameState::Rows(ref mut frame) => frame.calculate_range( + window_frame, + range_columns, + sort_options, + length, + idx, + ), + WindowFrameState::Range(ref mut frame) => frame.calculate_range( + window_frame, + range_columns, + sort_options, + length, + idx, + ), + WindowFrameState::Groups(ref mut frame) => frame.calculate_range( + window_frame, + range_columns, + sort_options, + length, + idx, + ), + WindowFrameState::NoFrame(ref mut frame) => frame.calculate_range( + window_frame, + range_columns, + sort_options, + length, + idx, + ), + } + } +} + +impl WindowFrameState { + pub fn new(window_frame: &Option>) -> WindowFrameState { + if let Some(window_frame) = window_frame { + match window_frame.units { + WindowFrameUnits::Rows => { + WindowFrameState::Rows(WindowFrameStateRows::default()) + } + WindowFrameUnits::Range => { + WindowFrameState::Range(WindowFrameStateRange::default()) + } + WindowFrameUnits::Groups => { + WindowFrameState::Groups(WindowFrameStateGroups::default()) + } + } + } else { + WindowFrameState::NoFrame(WindowFrameStateNoFrame::default()) + } + } +} + +#[derive(Debug, Default)] +pub struct WindowFrameStateNoFrame {} + +impl WindowFrameStateImpl for WindowFrameStateNoFrame { + fn calculate_range( + &mut self, + _window_frame: &Option>, + _range_columns: &[ArrayRef], + _sort_options: &[SortOptions], + length: usize, + _idx: usize, + ) -> Result<(usize, usize)> { + Ok((0, length)) + } +} + +#[derive(Debug, Default)] +pub struct WindowFrameStateRows {} + +impl WindowFrameStateImpl for WindowFrameStateRows { + fn calculate_range( + &mut self, + window_frame: &Option>, + _range_columns: &[ArrayRef], + _sort_options: &[SortOptions], + length: usize, + idx: usize, + ) -> Result<(usize, usize)> { + if let Some(window_frame) = window_frame { + let start = match window_frame.start_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0, + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { + if idx >= n as usize { + idx - n as usize + } else { + 0 + } + } + WindowFrameBound::CurrentRow => idx, + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'", + window_frame + ))) + } + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { + min(idx + n as usize, length) + } + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal( + "Rows should be Uint".to_string(), + )) + } + }; + let end = match window_frame.end_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame end cannot be UNBOUNDED PRECEDING '{:?}'", + window_frame + ))) + } + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { + if idx >= n as usize { + idx - n as usize + 1 + } else { + 0 + } + } + WindowFrameBound::CurrentRow => idx + 1, + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { + min(idx + n as usize + 1, length) + } + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal( + "Rows should be Uint".to_string(), + )) + } + }; + Ok((start, end)) + } else { + Ok((0, length)) + } + } +} + +/// This structure encapsulates all the state information we require as we +/// scan ranges of data while processing window frames. +#[derive(Debug, Default)] +pub struct WindowFrameStateRange {} + +impl WindowFrameStateImpl for WindowFrameStateRange { + fn calculate_range( + &mut self, + window_frame: &Option>, + range_columns: &[ArrayRef], + sort_options: &[SortOptions], + length: usize, + idx: usize, + ) -> Result<(usize, usize)> { + if let Some(window_frame) = window_frame { + let start = match &window_frame.start_bound { + WindowFrameBound::Preceding(n) => { + if n.is_null() { + // UNBOUNDED PRECEDING + 0 + } else { + self.calculate_index_of_row::( + range_columns, + sort_options, + idx, + Some(n), + )? + } + } + WindowFrameBound::CurrentRow => { + if range_columns.is_empty() { + 0 + } else { + self.calculate_index_of_row::( + range_columns, + sort_options, + idx, + None, + )? + } + } + WindowFrameBound::Following(n) => self + .calculate_index_of_row::( + range_columns, + sort_options, + idx, + Some(n), + )?, + }; + let end = match &window_frame.end_bound { + WindowFrameBound::Preceding(n) => self + .calculate_index_of_row::( + range_columns, + sort_options, + idx, + Some(n), + )?, + WindowFrameBound::CurrentRow => { + if range_columns.is_empty() { + length + } else { + self.calculate_index_of_row::( + range_columns, + sort_options, + idx, + None, + )? + } + } + WindowFrameBound::Following(n) => { + if n.is_null() { + // UNBOUNDED FOLLOWING + length + } else { + self.calculate_index_of_row::( + range_columns, + sort_options, + idx, + Some(n), + )? + } + } + }; + Ok((start, end)) + } else { + Ok((0, length)) + } + } +} + +impl WindowFrameStateRange { + fn calculate_index_of_row( + &mut self, + range_columns: &[ArrayRef], + sort_options: &[SortOptions], + idx: usize, + delta: Option<&ScalarValue>, + ) -> Result { + let current_row_values = range_columns + .iter() + .map(|col| ScalarValue::try_from_array(col, idx)) + .collect::>>()?; + let end_range = if let Some(delta) = delta { + let is_descending: bool = sort_options + .first() + .ok_or_else(|| DataFusionError::Internal("Array is empty".to_string()))? + .descending; + + current_row_values + .iter() + .map(|value| { + if value.is_null() { + return Ok(value.clone()); + } + if SEARCH_SIDE == is_descending { + // TODO: Handle positive overflows + value.add(delta) + } else if value.is_unsigned() && value < delta { + // NOTE: This gets a polymorphic zero without having long coercion code for ScalarValue. + // If we decide to implement a "default" construction mechanism for ScalarValue, + // change the following statement to use that. + value.sub(value) + } else { + // TODO: Handle negative overflows + value.sub(delta) + } + }) + .collect::>>()? + } else { + current_row_values + }; + // `BISECT_SIDE` true means bisect_left, false means bisect_right + bisect::(range_columns, &end_range, sort_options) + } +} + +/// In GROUPS mode, rows with duplicate sorting values are grouped together. +/// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode. +/// The syntax is as follows: +/// GROUPS frame_start [ frame_exclusion ] +/// GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ] +/// The optional frame_exclusion specifier is not yet supported. +/// The frame_start and frame_end parameters allow us to specify which rows the window +/// frame starts and ends with. They accept the following values: +/// - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start. +/// - offset PRECEDING: When used in frame_start, it refers to the first row of the group +/// that comes "offset" groups before the current group (i.e. the group +/// containing the current row). When used in frame_end, it refers to the +/// last row of the group that comes "offset" groups before the current group. +/// - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing +/// the current row. When used in frame_end, it refers to the last row of the group +/// containing the current row. +/// - offset FOLLOWING: When used in frame_start, it refers to the first row of the group +/// that comes "offset" groups after the current group (i.e. the group +/// containing the current row). When used in frame_end, it refers to the +/// last row of the group that comes "offset" groups after the current group. +/// - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end. + +/// This structure encapsulates all the state information we require as we +/// scan groups of data while processing window frames. + +#[derive(Debug, Default)] +pub struct WindowFrameStateGroups { + current_group_idx: u64, + group_start_indices: VecDeque<(Vec, usize)>, + previous_row_values: Option>, + reached_end: bool, + window_frame_end_idx: u64, + window_frame_start_idx: u64, +} + +impl WindowFrameStateImpl for WindowFrameStateGroups { + fn calculate_range( + &mut self, + window_frame: &Option>, + range_columns: &[ArrayRef], + _sort_options: &[SortOptions], + length: usize, + idx: usize, + ) -> Result<(usize, usize)> { + if range_columns.is_empty() { + return Err(DataFusionError::Execution( + "GROUPS mode requires an ORDER BY clause".to_string(), + )); + } + if let Some(window_frame) = window_frame { + let start = match window_frame.start_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0, + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self + .calculate_index_of_group::( + range_columns, + idx, + n, + length, + )?, + WindowFrameBound::CurrentRow => self + .calculate_index_of_group::( + range_columns, + idx, + 0, + length, + )?, + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self + .calculate_index_of_group::( + range_columns, + idx, + n, + length, + )?, + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'", + window_frame + ))) + } + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal( + "Groups should be Uint".to_string(), + )) + } + }; + let end = match window_frame.end_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame end cannot be UNBOUNDED PRECEDING '{:?}'", + window_frame + ))) + } + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self + .calculate_index_of_group::( + range_columns, + idx, + n, + length, + )?, + WindowFrameBound::CurrentRow => self + .calculate_index_of_group::( + range_columns, + idx, + 0, + length, + )?, + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self + .calculate_index_of_group::( + range_columns, + idx, + n, + length, + )?, + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal( + "Groups should be Uint".to_string(), + )) + } + }; + Ok((start, end)) + } else { + Ok((0, length)) + } + } +} + +impl WindowFrameStateGroups { + /// This function is the main public interface of WindowFrameStateGroups. It is meant to be + /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE` + /// supplied as false and true, respectively). + pub fn calculate_index_of_group( + &mut self, + range_columns: &[ArrayRef], + idx: usize, + delta: u64, + length: usize, + ) -> Result { + let current_row_values = range_columns + .iter() + .map(|col| ScalarValue::try_from_array(col, idx)) + .collect::>>()?; + + if BISECT_SIDE { + // When we call this function to get the window frame start index, it tries to initialize + // the internal grouping state if this is not already done before. This initialization takes + // place only when the window frame start index is greater than or equal to zero. In this + // case, the current row is stored in group_start_indices, with row values as the group + // identifier and row index as the start index of the group. + if !self.initialized() { + self.initialize::(delta, range_columns)?; + } + } else if !self.reached_end { + // When we call this function to get the window frame end index, it extends the window + // frame one by one until the current row's window frame end index is reached by finding + // the next group. + self.extend_window_frame_if_necessary::(range_columns, delta)?; + } + // We keep track of previous row values, so that a group change can be identified. + // If there is a group change, the window frame is advanced and shifted by one group. + let group_change = match &self.previous_row_values { + None => false, + Some(values) => ¤t_row_values != values, + }; + if self.previous_row_values.is_none() || group_change { + self.previous_row_values = Some(current_row_values); + } + if group_change { + self.current_group_idx += 1; + self.advance_one_group::(range_columns)?; + self.shift_one_group::(delta); + } + Ok(if self.group_start_indices.is_empty() { + if self.reached_end { + length + } else { + 0 + } + } else if BISECT_SIDE { + match self.group_start_indices.get(0) { + Some(&(_, idx)) => idx, + None => 0, + } + } else { + match (self.reached_end, self.group_start_indices.back()) { + (false, Some(&(_, idx))) => idx, + _ => length, + } + }) + } + + fn extend_window_frame_if_necessary( + &mut self, + range_columns: &[ArrayRef], + delta: u64, + ) -> Result<()> { + let current_window_frame_end_idx = if !SEARCH_SIDE { + self.current_group_idx + delta + 1 + } else if self.current_group_idx >= delta { + self.current_group_idx - delta + 1 + } else { + 0 + }; + if current_window_frame_end_idx == 0 { + // the end index of the window frame is still before the first index + return Ok(()); + } + if self.group_start_indices.is_empty() { + self.initialize_window_frame_start(range_columns)?; + } + while !self.reached_end + && self.window_frame_end_idx <= current_window_frame_end_idx + { + self.advance_one_group::(range_columns)?; + } + Ok(()) + } + + fn initialize( + &mut self, + delta: u64, + range_columns: &[ArrayRef], + ) -> Result<()> { + if !SEARCH_SIDE { + self.window_frame_start_idx = self.current_group_idx + delta; + self.initialize_window_frame_start(range_columns) + } else if self.current_group_idx >= delta { + self.window_frame_start_idx = self.current_group_idx - delta; + self.initialize_window_frame_start(range_columns) + } else { + Ok(()) + } + } + + fn initialize_window_frame_start( + &mut self, + range_columns: &[ArrayRef], + ) -> Result<()> { + let mut group_values = range_columns + .iter() + .map(|col| ScalarValue::try_from_array(col, 0)) + .collect::>>()?; + let mut start_idx: usize = 0; + for _ in 0..self.window_frame_start_idx { + let next_group_and_start_index = + WindowFrameStateGroups::find_next_group_and_start_index( + range_columns, + &group_values, + start_idx, + )?; + if let Some(entry) = next_group_and_start_index { + (group_values, start_idx) = entry; + } else { + // not enough groups to generate a window frame + self.window_frame_end_idx = self.window_frame_start_idx; + self.reached_end = true; + return Ok(()); + } + } + self.group_start_indices + .push_back((group_values, start_idx)); + self.window_frame_end_idx = self.window_frame_start_idx + 1; + Ok(()) + } + + fn initialized(&self) -> bool { + self.reached_end || !self.group_start_indices.is_empty() + } + + /// This function advances the window frame by one group. + fn advance_one_group( + &mut self, + range_columns: &[ArrayRef], + ) -> Result<()> { + let last_group_values = self.group_start_indices.back(); + let last_group_values = if let Some(values) = last_group_values { + values + } else { + return Ok(()); + }; + let next_group_and_start_index = + WindowFrameStateGroups::find_next_group_and_start_index( + range_columns, + &last_group_values.0, + last_group_values.1, + )?; + if let Some(entry) = next_group_and_start_index { + self.group_start_indices.push_back(entry); + self.window_frame_end_idx += 1; + } else { + // not enough groups to proceed + self.reached_end = true; + } + Ok(()) + } + + /// This function drops the oldest group from the window frame. + fn shift_one_group(&mut self, delta: u64) { + let current_window_frame_start_idx = if !SEARCH_SIDE { + self.current_group_idx + delta + } else if self.current_group_idx >= delta { + self.current_group_idx - delta + } else { + 0 + }; + if current_window_frame_start_idx > self.window_frame_start_idx { + self.group_start_indices.pop_front(); + self.window_frame_start_idx += 1; + } + } + + /// This function finds the next group and its start index for a given group and start index. + /// It utilizes an exponentially growing step size to find the group boundary. + // TODO: For small group sizes, proceeding one-by-one to find the group change can be more efficient. + // Statistics about previous group sizes can be used to choose one-by-one vs. exponentially growing, + // or even to set the base step_size when exponentially growing. We can also create a benchmark + // implementation to get insights about the crossover point. + fn find_next_group_and_start_index( + range_columns: &[ArrayRef], + current_row_values: &[ScalarValue], + idx: usize, + ) -> Result, usize)>> { + let mut step_size: usize = 1; + let data_size: usize = range_columns + .get(0) + .ok_or_else(|| { + DataFusionError::Internal("Column array shouldn't be empty".to_string()) + })? + .len(); + let mut low = idx; + let mut high = idx + step_size; + while high < data_size { + let val = range_columns + .iter() + .map(|arr| ScalarValue::try_from_array(arr, high)) + .collect::>>()?; + if val == current_row_values { + low = high; + step_size *= 2; + high += step_size; + } else { + break; + } + } + low = find_bisect_point( + range_columns, + current_row_values, + |current, to_compare| Ok(current == to_compare), + low, + min(high, data_size), + )?; + if low == data_size { + return Ok(None); + } + let val = range_columns + .iter() + .map(|arr| ScalarValue::try_from_array(arr, low)) + .collect::>>()?; + Ok(Some((val, low))) + } +} + +#[cfg(test)] +mod tests { + use arrow::array::Float64Array; + use datafusion_common::ScalarValue; + use std::sync::Arc; + + use crate::from_slice::FromSlice; + + use super::*; + + #[test] + fn test_find_next_group_and_start_index() { + const NUM_ROWS: usize = 6; + const NEXT_INDICES: [usize; 5] = [1, 2, 4, 4, 5]; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 3., 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 8., 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 8., 5., 0.0])), + ]; + for (current_idx, next_idx) in NEXT_INDICES.iter().enumerate() { + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, current_idx)) + .collect::>>() + .unwrap(); + let next_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, *next_idx)) + .collect::>>() + .unwrap(); + let res = WindowFrameStateGroups::find_next_group_and_start_index( + &arrays, + ¤t_row_values, + current_idx, + ) + .unwrap(); + assert_eq!(res, Some((next_row_values, *next_idx))); + } + let current_idx = NUM_ROWS - 1; + let current_row_values = arrays + .iter() + .map(|col| ScalarValue::try_from_array(col, current_idx)) + .collect::>>() + .unwrap(); + let res = WindowFrameStateGroups::find_next_group_and_start_index( + &arrays, + ¤t_row_values, + current_idx, + ) + .unwrap(); + assert_eq!(res, None); + } + + #[test] + fn test_window_frame_groups_preceding_huge_delta() { + const START: bool = true; + const END: bool = false; + const PRECEDING: bool = true; + const DELTA: u64 = 10; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameStateGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + window_frame_groups + .extend_window_frame_if_necessary::(&arrays, DELTA) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + for idx in 0..NUM_ROWS { + let start = window_frame_groups + .calculate_index_of_group::( + &arrays, idx, DELTA, NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, 0); + let end = window_frame_groups + .calculate_index_of_group::(&arrays, idx, DELTA, NUM_ROWS) + .unwrap(); + assert_eq!(end, 0); + } + } + + #[test] + fn test_window_frame_groups_following_huge_delta() { + const START: bool = true; + const END: bool = false; + const FOLLOWING: bool = false; + const DELTA: u64 = 10; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameStateGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, DELTA); + assert_eq!(window_frame_groups.window_frame_end_idx, DELTA); + assert!(window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + window_frame_groups + .extend_window_frame_if_necessary::(&arrays, DELTA) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, DELTA); + assert_eq!(window_frame_groups.window_frame_end_idx, DELTA); + assert!(window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + for idx in 0..NUM_ROWS { + let start = window_frame_groups + .calculate_index_of_group::( + &arrays, idx, DELTA, NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, NUM_ROWS); + let end = window_frame_groups + .calculate_index_of_group::(&arrays, idx, DELTA, NUM_ROWS) + .unwrap(); + assert_eq!(end, NUM_ROWS); + } + } + + #[test] + fn test_window_frame_groups_preceding_and_following_huge_delta() { + const START: bool = true; + const END: bool = false; + const FOLLOWING: bool = false; + const PRECEDING: bool = true; + const DELTA: u64 = 10; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameStateGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + window_frame_groups + .extend_window_frame_if_necessary::(&arrays, DELTA) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, NUM_ROWS as u64); + assert!(window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), NUM_ROWS); + + for idx in 0..NUM_ROWS { + let start = window_frame_groups + .calculate_index_of_group::( + &arrays, idx, DELTA, NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, 0); + let end = window_frame_groups + .calculate_index_of_group::(&arrays, idx, DELTA, NUM_ROWS) + .unwrap(); + assert_eq!(end, NUM_ROWS); + } + } + + #[test] + fn test_window_frame_groups_preceding_and_following_1() { + const START: bool = true; + const END: bool = false; + const FOLLOWING: bool = false; + const PRECEDING: bool = true; + const DELTA: u64 = 1; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameStateGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + window_frame_groups + .extend_window_frame_if_necessary::(&arrays, DELTA) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 2 * DELTA + 1); + assert!(!window_frame_groups.reached_end); + assert_eq!( + window_frame_groups.group_start_indices.len(), + 2 * DELTA as usize + 1 + ); + + for idx in 0..NUM_ROWS { + let start_idx = if idx < DELTA as usize { + 0 + } else { + idx - DELTA as usize + }; + let start = window_frame_groups + .calculate_index_of_group::( + &arrays, idx, DELTA, NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, start_idx); + let end_idx = if idx + 1 + DELTA as usize > NUM_ROWS { + NUM_ROWS + } else { + idx + 1 + DELTA as usize + }; + let end = window_frame_groups + .calculate_index_of_group::(&arrays, idx, DELTA, NUM_ROWS) + .unwrap(); + assert_eq!(end, end_idx); + } + } + + #[test] + fn test_window_frame_groups_preceding_1_and_unbounded_following() { + const START: bool = true; + const PRECEDING: bool = true; + const DELTA: u64 = 1; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameStateGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 0); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 0); + + for idx in 0..NUM_ROWS { + let start_idx = if idx < DELTA as usize { + 0 + } else { + idx - DELTA as usize + }; + let start = window_frame_groups + .calculate_index_of_group::( + &arrays, idx, DELTA, NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, start_idx); + } + } + + #[test] + fn test_window_frame_groups_current_and_unbounded_following() { + const START: bool = true; + const PRECEDING: bool = true; + const DELTA: u64 = 0; + const NUM_ROWS: usize = 5; + + let arrays: Vec = vec![ + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 9., 10.])), + Arc::new(Float64Array::from_slice([2.0, 3.0, 3.0, 4.0, 5.0])), + Arc::new(Float64Array::from_slice([5.0, 7.0, 8.0, 10., 11.0])), + Arc::new(Float64Array::from_slice([15.0, 13.0, 8.0, 5., 0.0])), + ]; + + let mut window_frame_groups = WindowFrameStateGroups::default(); + window_frame_groups + .initialize::(DELTA, &arrays) + .unwrap(); + assert_eq!(window_frame_groups.window_frame_start_idx, 0); + assert_eq!(window_frame_groups.window_frame_end_idx, 1); + assert!(!window_frame_groups.reached_end); + assert_eq!(window_frame_groups.group_start_indices.len(), 1); + + for idx in 0..NUM_ROWS { + let start = window_frame_groups + .calculate_index_of_group::( + &arrays, idx, DELTA, NUM_ROWS, + ) + .unwrap(); + assert_eq!(start, idx); + } + } +} From b757d626c7dcc2bf16d1ec3d5526ebaa93eacd8b Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Wed, 9 Nov 2022 16:39:39 +0300 Subject: [PATCH 6/7] Do not use unnecessary traits and structs for window frame states --- .../physical-expr/src/window/aggregate.rs | 2 +- .../physical-expr/src/window/built_in.rs | 2 +- .../src/window/window_frame_state.rs | 124 +++++++----------- 3 files changed, 49 insertions(+), 79 deletions(-) diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs index 0fc13c1f4683..672bd539036d 100644 --- a/datafusion/physical-expr/src/window/aggregate.rs +++ b/datafusion/physical-expr/src/window/aggregate.rs @@ -33,7 +33,7 @@ use datafusion_expr::WindowFrame; use crate::{expressions::PhysicalSortExpr, PhysicalExpr}; use crate::{window::WindowExpr, AggregateExpr}; -use super::window_frame_state::{WindowFrameState, WindowFrameStateImpl}; +use super::window_frame_state::WindowFrameState; /// A window expr that takes the form of an aggregate function #[derive(Debug)] diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs index 04cbf4f523ad..126b77fcf2f6 100644 --- a/datafusion/physical-expr/src/window/built_in.rs +++ b/datafusion/physical-expr/src/window/built_in.rs @@ -17,7 +17,7 @@ //! Physical exec for built-in window function expressions. -use super::window_frame_state::{WindowFrameState, WindowFrameStateImpl}; +use super::window_frame_state::WindowFrameState; use super::BuiltInWindowFunctionExpr; use super::WindowExpr; use crate::{expressions::PhysicalSortExpr, PhysicalExpr}; diff --git a/datafusion/physical-expr/src/window/window_frame_state.rs b/datafusion/physical-expr/src/window/window_frame_state.rs index 1e3ba4310bc2..931ee232aac3 100644 --- a/datafusion/physical-expr/src/window/window_frame_state.rs +++ b/datafusion/physical-expr/src/window/window_frame_state.rs @@ -28,28 +28,18 @@ use std::collections::VecDeque; use std::fmt::Debug; use std::sync::Arc; -pub trait WindowFrameStateImpl: Debug { - /// We use start and end bounds to calculate current row's starting and ending range. - fn calculate_range( - &mut self, - window_frame: &Option>, - range_columns: &[ArrayRef], - sort_options: &[SortOptions], - length: usize, - idx: usize, - ) -> Result<(usize, usize)>; -} - +/// This object stores the window frame state for use in incremental calculations. #[derive(Debug)] pub enum WindowFrameState { Rows(WindowFrameStateRows), Range(WindowFrameStateRange), Groups(WindowFrameStateGroups), - NoFrame(WindowFrameStateNoFrame), + Default, } -impl WindowFrameStateImpl for WindowFrameState { - fn calculate_range( +impl WindowFrameState { + /// This function calculates beginning/ending indices for the frame of the current row. + pub fn calculate_range( &mut self, window_frame: &Option>, range_columns: &[ArrayRef], @@ -79,19 +69,12 @@ impl WindowFrameStateImpl for WindowFrameState { length, idx, ), - WindowFrameState::NoFrame(ref mut frame) => frame.calculate_range( - window_frame, - range_columns, - sort_options, - length, - idx, - ), + WindowFrameState::Default => Ok((0, length)), } } -} -impl WindowFrameState { - pub fn new(window_frame: &Option>) -> WindowFrameState { + /// Create a new default state for the given window frame. + pub fn new(window_frame: &Option>) -> Self { if let Some(window_frame) = window_frame { match window_frame.units { WindowFrameUnits::Rows => { @@ -105,31 +88,17 @@ impl WindowFrameState { } } } else { - WindowFrameState::NoFrame(WindowFrameStateNoFrame::default()) + WindowFrameState::Default } } } -#[derive(Debug, Default)] -pub struct WindowFrameStateNoFrame {} - -impl WindowFrameStateImpl for WindowFrameStateNoFrame { - fn calculate_range( - &mut self, - _window_frame: &Option>, - _range_columns: &[ArrayRef], - _sort_options: &[SortOptions], - length: usize, - _idx: usize, - ) -> Result<(usize, usize)> { - Ok((0, length)) - } -} - +/// This empty struct reflects the stateless nature of row-based window frames. #[derive(Debug, Default)] pub struct WindowFrameStateRows {} -impl WindowFrameStateImpl for WindowFrameStateRows { +impl WindowFrameStateRows { + /// This function calculates beginning/ending indices for the frame of the current row. fn calculate_range( &mut self, window_frame: &Option>, @@ -203,11 +172,13 @@ impl WindowFrameStateImpl for WindowFrameStateRows { } /// This structure encapsulates all the state information we require as we -/// scan ranges of data while processing window frames. +/// scan ranges of data while processing window frames. Currently we calculate +/// things from scratch every time, but we will make this incremental in the future. #[derive(Debug, Default)] pub struct WindowFrameStateRange {} -impl WindowFrameStateImpl for WindowFrameStateRange { +impl WindowFrameStateRange { + /// This function calculates beginning/ending indices for the frame of the current row. fn calculate_range( &mut self, window_frame: &Option>, @@ -290,9 +261,10 @@ impl WindowFrameStateImpl for WindowFrameStateRange { Ok((0, length)) } } -} -impl WindowFrameStateRange { + /// This function does the heavy lifting when finding range boundaries. It is meant to be + /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE` + /// supplied as false and true, respectively). fn calculate_index_of_row( &mut self, range_columns: &[ArrayRef], @@ -338,31 +310,30 @@ impl WindowFrameStateRange { } } -/// In GROUPS mode, rows with duplicate sorting values are grouped together. -/// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode. -/// The syntax is as follows: -/// GROUPS frame_start [ frame_exclusion ] -/// GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ] -/// The optional frame_exclusion specifier is not yet supported. -/// The frame_start and frame_end parameters allow us to specify which rows the window -/// frame starts and ends with. They accept the following values: -/// - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start. -/// - offset PRECEDING: When used in frame_start, it refers to the first row of the group -/// that comes "offset" groups before the current group (i.e. the group -/// containing the current row). When used in frame_end, it refers to the -/// last row of the group that comes "offset" groups before the current group. -/// - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing -/// the current row. When used in frame_end, it refers to the last row of the group -/// containing the current row. -/// - offset FOLLOWING: When used in frame_start, it refers to the first row of the group -/// that comes "offset" groups after the current group (i.e. the group -/// containing the current row). When used in frame_end, it refers to the -/// last row of the group that comes "offset" groups after the current group. -/// - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end. - -/// This structure encapsulates all the state information we require as we -/// scan groups of data while processing window frames. - +// In GROUPS mode, rows with duplicate sorting values are grouped together. +// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode. +// The syntax is as follows: +// GROUPS frame_start [ frame_exclusion ] +// GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ] +// The optional frame_exclusion specifier is not yet supported. +// The frame_start and frame_end parameters allow us to specify which rows the window +// frame starts and ends with. They accept the following values: +// - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start. +// - offset PRECEDING: When used in frame_start, it refers to the first row of the group +// that comes "offset" groups before the current group (i.e. the group +// containing the current row). When used in frame_end, it refers to the +// last row of the group that comes "offset" groups before the current group. +// - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing +// the current row. When used in frame_end, it refers to the last row of the group +// containing the current row. +// - offset FOLLOWING: When used in frame_start, it refers to the first row of the group +// that comes "offset" groups after the current group (i.e. the group +// containing the current row). When used in frame_end, it refers to the +// last row of the group that comes "offset" groups after the current group. +// - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end. + +// This structure encapsulates all the state information we require as we +// scan groups of data while processing window frames. #[derive(Debug, Default)] pub struct WindowFrameStateGroups { current_group_idx: u64, @@ -373,7 +344,8 @@ pub struct WindowFrameStateGroups { window_frame_start_idx: u64, } -impl WindowFrameStateImpl for WindowFrameStateGroups { +impl WindowFrameStateGroups { + /// This function calculates beginning/ending indices for the frame of the current row. fn calculate_range( &mut self, window_frame: &Option>, @@ -469,13 +441,11 @@ impl WindowFrameStateImpl for WindowFrameStateGroups { Ok((0, length)) } } -} -impl WindowFrameStateGroups { - /// This function is the main public interface of WindowFrameStateGroups. It is meant to be + /// This function does the heavy lifting when finding group boundaries. It is meant to be /// called twice, in succession, to get window frame start and end indices (with `BISECT_SIDE` /// supplied as false and true, respectively). - pub fn calculate_index_of_group( + fn calculate_index_of_group( &mut self, range_columns: &[ArrayRef], idx: usize, From 1e952890b33d90b3bfad9ea019b9a6eb5af13750 Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Thu, 10 Nov 2022 10:50:55 +0300 Subject: [PATCH 7/7] Refactor to avoid carrying the window frame object around --- .../physical-expr/src/window/aggregate.rs | 7 +- .../physical-expr/src/window/built_in.rs | 7 +- .../src/window/window_frame_state.rs | 469 ++++++++---------- 3 files changed, 225 insertions(+), 258 deletions(-) diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs index 672bd539036d..69b0812c1aa6 100644 --- a/datafusion/physical-expr/src/window/aggregate.rs +++ b/datafusion/physical-expr/src/window/aggregate.rs @@ -33,7 +33,7 @@ use datafusion_expr::WindowFrame; use crate::{expressions::PhysicalSortExpr, PhysicalExpr}; use crate::{window::WindowExpr, AggregateExpr}; -use super::window_frame_state::WindowFrameState; +use super::window_frame_state::WindowFrameContext; /// A window expr that takes the form of an aggregate function #[derive(Debug)] @@ -116,14 +116,13 @@ impl WindowExpr for AggregateWindowExpr { .map(|v| v.slice(partition_range.start, length)) .collect::>(); - let mut window_frame_state = WindowFrameState::new(&window_frame); + let mut window_frame_ctx = WindowFrameContext::new(&window_frame); let mut last_range: (usize, usize) = (0, 0); // We iterate on each row to perform a running calculation. // First, cur_range is calculated, then it is compared with last_range. for i in 0..length { - let cur_range = window_frame_state.calculate_range( - &window_frame, + let cur_range = window_frame_ctx.calculate_range( &slice_order_bys, &sort_options, length, diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs index 126b77fcf2f6..da551d5347d4 100644 --- a/datafusion/physical-expr/src/window/built_in.rs +++ b/datafusion/physical-expr/src/window/built_in.rs @@ -17,7 +17,7 @@ //! Physical exec for built-in window function expressions. -use super::window_frame_state::WindowFrameState; +use super::window_frame_state::WindowFrameContext; use super::BuiltInWindowFunctionExpr; use super::WindowExpr; use crate::{expressions::PhysicalSortExpr, PhysicalExpr}; @@ -114,11 +114,10 @@ impl WindowExpr for BuiltInWindowExpr { .iter() .map(|v| v.slice(partition_range.start, length)) .collect::>(); - let mut window_frame_state = WindowFrameState::new(&window_frame); + let mut window_frame_ctx = WindowFrameContext::new(&window_frame); // We iterate on each row to calculate window frame range and and window function result for idx in 0..length { - let range = window_frame_state.calculate_range( - &window_frame, + let range = window_frame_ctx.calculate_range( &slice_order_bys, &sort_options, num_rows, diff --git a/datafusion/physical-expr/src/window/window_frame_state.rs b/datafusion/physical-expr/src/window/window_frame_state.rs index 931ee232aac3..6ccab1d8df48 100644 --- a/datafusion/physical-expr/src/window/window_frame_state.rs +++ b/datafusion/physical-expr/src/window/window_frame_state.rs @@ -30,144 +30,137 @@ use std::sync::Arc; /// This object stores the window frame state for use in incremental calculations. #[derive(Debug)] -pub enum WindowFrameState { - Rows(WindowFrameStateRows), - Range(WindowFrameStateRange), - Groups(WindowFrameStateGroups), +pub enum WindowFrameContext<'a> { + // ROWS-frames are inherently stateless: + Rows(&'a Arc), + // RANGE-frames will soon have a stateful implementation that is more efficient than a stateless one: + Range { + window_frame: &'a Arc, + state: WindowFrameStateRange, + }, + // GROUPS-frames have a stateful implementation that is more efficient than a stateless one: + Groups { + window_frame: &'a Arc, + state: WindowFrameStateGroups, + }, Default, } -impl WindowFrameState { +impl<'a> WindowFrameContext<'a> { + /// Create a new default state for the given window frame. + pub fn new(window_frame: &'a Option>) -> Self { + if let Some(window_frame) = window_frame { + match window_frame.units { + WindowFrameUnits::Rows => WindowFrameContext::Rows(window_frame), + WindowFrameUnits::Range => WindowFrameContext::Range { + window_frame, + state: WindowFrameStateRange::default(), + }, + WindowFrameUnits::Groups => WindowFrameContext::Groups { + window_frame, + state: WindowFrameStateGroups::default(), + }, + } + } else { + WindowFrameContext::Default + } + } + /// This function calculates beginning/ending indices for the frame of the current row. pub fn calculate_range( &mut self, - window_frame: &Option>, range_columns: &[ArrayRef], sort_options: &[SortOptions], length: usize, idx: usize, ) -> Result<(usize, usize)> { match *self { - WindowFrameState::Rows(ref mut frame) => frame.calculate_range( + WindowFrameContext::Rows(window_frame) => { + Self::calculate_range_rows(window_frame, length, idx) + } + WindowFrameContext::Range { window_frame, - range_columns, - sort_options, - length, - idx, - ), - WindowFrameState::Range(ref mut frame) => frame.calculate_range( + ref mut state, + } => state.calculate_range( window_frame, range_columns, sort_options, length, idx, ), - WindowFrameState::Groups(ref mut frame) => frame.calculate_range( + WindowFrameContext::Groups { + window_frame, + ref mut state, + } => state.calculate_range( window_frame, range_columns, sort_options, length, idx, ), - WindowFrameState::Default => Ok((0, length)), + WindowFrameContext::Default => Ok((0, length)), } } - /// Create a new default state for the given window frame. - pub fn new(window_frame: &Option>) -> Self { - if let Some(window_frame) = window_frame { - match window_frame.units { - WindowFrameUnits::Rows => { - WindowFrameState::Rows(WindowFrameStateRows::default()) - } - WindowFrameUnits::Range => { - WindowFrameState::Range(WindowFrameStateRange::default()) - } - WindowFrameUnits::Groups => { - WindowFrameState::Groups(WindowFrameStateGroups::default()) - } - } - } else { - WindowFrameState::Default - } - } -} - -/// This empty struct reflects the stateless nature of row-based window frames. -#[derive(Debug, Default)] -pub struct WindowFrameStateRows {} - -impl WindowFrameStateRows { /// This function calculates beginning/ending indices for the frame of the current row. - fn calculate_range( - &mut self, - window_frame: &Option>, - _range_columns: &[ArrayRef], - _sort_options: &[SortOptions], + fn calculate_range_rows( + window_frame: &Arc, length: usize, idx: usize, ) -> Result<(usize, usize)> { - if let Some(window_frame) = window_frame { - let start = match window_frame.start_bound { - // UNBOUNDED PRECEDING - WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0, - WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { - if idx >= n as usize { - idx - n as usize - } else { - 0 - } - } - WindowFrameBound::CurrentRow => idx, - // UNBOUNDED FOLLOWING - WindowFrameBound::Following(ScalarValue::UInt64(None)) => { - return Err(DataFusionError::Internal(format!( - "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'", - window_frame - ))) - } - WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { - min(idx + n as usize, length) - } - // ERRONEOUS FRAMES - WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { - return Err(DataFusionError::Internal( - "Rows should be Uint".to_string(), - )) + let start = match window_frame.start_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0, + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { + if idx >= n as usize { + idx - n as usize + } else { + 0 } - }; - let end = match window_frame.end_bound { - // UNBOUNDED PRECEDING - WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => { - return Err(DataFusionError::Internal(format!( - "Frame end cannot be UNBOUNDED PRECEDING '{:?}'", - window_frame - ))) - } - WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { - if idx >= n as usize { - idx - n as usize + 1 - } else { - 0 - } - } - WindowFrameBound::CurrentRow => idx + 1, - // UNBOUNDED FOLLOWING - WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, - WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { - min(idx + n as usize + 1, length) - } - // ERRONEOUS FRAMES - WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { - return Err(DataFusionError::Internal( - "Rows should be Uint".to_string(), - )) + } + WindowFrameBound::CurrentRow => idx, + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'", + window_frame + ))) + } + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { + min(idx + n as usize, length) + } + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal("Rows should be Uint".to_string())) + } + }; + let end = match window_frame.end_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame end cannot be UNBOUNDED PRECEDING '{:?}'", + window_frame + ))) + } + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => { + if idx >= n as usize { + idx - n as usize + 1 + } else { + 0 } - }; - Ok((start, end)) - } else { - Ok((0, length)) - } + } + WindowFrameBound::CurrentRow => idx + 1, + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => { + min(idx + n as usize + 1, length) + } + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal("Rows should be Uint".to_string())) + } + }; + Ok((start, end)) } } @@ -181,85 +174,81 @@ impl WindowFrameStateRange { /// This function calculates beginning/ending indices for the frame of the current row. fn calculate_range( &mut self, - window_frame: &Option>, + window_frame: &Arc, range_columns: &[ArrayRef], sort_options: &[SortOptions], length: usize, idx: usize, ) -> Result<(usize, usize)> { - if let Some(window_frame) = window_frame { - let start = match &window_frame.start_bound { - WindowFrameBound::Preceding(n) => { - if n.is_null() { - // UNBOUNDED PRECEDING - 0 - } else { - self.calculate_index_of_row::( - range_columns, - sort_options, - idx, - Some(n), - )? - } + let start = match window_frame.start_bound { + WindowFrameBound::Preceding(ref n) => { + if n.is_null() { + // UNBOUNDED PRECEDING + 0 + } else { + self.calculate_index_of_row::( + range_columns, + sort_options, + idx, + Some(n), + )? } - WindowFrameBound::CurrentRow => { - if range_columns.is_empty() { - 0 - } else { - self.calculate_index_of_row::( - range_columns, - sort_options, - idx, - None, - )? - } + } + WindowFrameBound::CurrentRow => { + if range_columns.is_empty() { + 0 + } else { + self.calculate_index_of_row::( + range_columns, + sort_options, + idx, + None, + )? } - WindowFrameBound::Following(n) => self - .calculate_index_of_row::( + } + WindowFrameBound::Following(ref n) => self + .calculate_index_of_row::( + range_columns, + sort_options, + idx, + Some(n), + )?, + }; + let end = match window_frame.end_bound { + WindowFrameBound::Preceding(ref n) => self + .calculate_index_of_row::( + range_columns, + sort_options, + idx, + Some(n), + )?, + WindowFrameBound::CurrentRow => { + if range_columns.is_empty() { + length + } else { + self.calculate_index_of_row::( range_columns, sort_options, idx, - Some(n), - )?, - }; - let end = match &window_frame.end_bound { - WindowFrameBound::Preceding(n) => self - .calculate_index_of_row::( + None, + )? + } + } + WindowFrameBound::Following(ref n) => { + if n.is_null() { + // UNBOUNDED FOLLOWING + length + } else { + self.calculate_index_of_row::( range_columns, sort_options, idx, Some(n), - )?, - WindowFrameBound::CurrentRow => { - if range_columns.is_empty() { - length - } else { - self.calculate_index_of_row::( - range_columns, - sort_options, - idx, - None, - )? - } - } - WindowFrameBound::Following(n) => { - if n.is_null() { - // UNBOUNDED FOLLOWING - length - } else { - self.calculate_index_of_row::( - range_columns, - sort_options, - idx, - Some(n), - )? - } + )? } - }; - Ok((start, end)) - } else { - Ok((0, length)) - } + } + }; + Ok((start, end)) } /// This function does the heavy lifting when finding range boundaries. It is meant to be @@ -348,7 +337,7 @@ impl WindowFrameStateGroups { /// This function calculates beginning/ending indices for the frame of the current row. fn calculate_range( &mut self, - window_frame: &Option>, + window_frame: &Arc, range_columns: &[ArrayRef], _sort_options: &[SortOptions], length: usize, @@ -359,87 +348,67 @@ impl WindowFrameStateGroups { "GROUPS mode requires an ORDER BY clause".to_string(), )); } - if let Some(window_frame) = window_frame { - let start = match window_frame.start_bound { - // UNBOUNDED PRECEDING - WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0, - WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self - .calculate_index_of_group::( - range_columns, - idx, - n, - length, - )?, - WindowFrameBound::CurrentRow => self - .calculate_index_of_group::( - range_columns, - idx, - 0, - length, - )?, - WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self - .calculate_index_of_group::( - range_columns, - idx, - n, - length, - )?, - // UNBOUNDED FOLLOWING - WindowFrameBound::Following(ScalarValue::UInt64(None)) => { - return Err(DataFusionError::Internal(format!( - "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'", - window_frame - ))) - } - // ERRONEOUS FRAMES - WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { - return Err(DataFusionError::Internal( - "Groups should be Uint".to_string(), - )) - } - }; - let end = match window_frame.end_bound { - // UNBOUNDED PRECEDING - WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => { - return Err(DataFusionError::Internal(format!( - "Frame end cannot be UNBOUNDED PRECEDING '{:?}'", - window_frame - ))) - } - WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self - .calculate_index_of_group::( - range_columns, - idx, - n, - length, - )?, - WindowFrameBound::CurrentRow => self - .calculate_index_of_group::( - range_columns, - idx, - 0, - length, - )?, - WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self - .calculate_index_of_group::( - range_columns, - idx, - n, - length, - )?, - // UNBOUNDED FOLLOWING - WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, - // ERRONEOUS FRAMES - WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { - return Err(DataFusionError::Internal( - "Groups should be Uint".to_string(), - )) - } - }; - Ok((start, end)) - } else { - Ok((0, length)) - } + let start = match window_frame.start_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0, + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self + .calculate_index_of_group::(range_columns, idx, n, length)?, + WindowFrameBound::CurrentRow => self.calculate_index_of_group::( + range_columns, + idx, + 0, + length, + )?, + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self + .calculate_index_of_group::(range_columns, idx, n, length)?, + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame start cannot be UNBOUNDED FOLLOWING '{:?}'", + window_frame + ))) + } + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal( + "Groups should be Uint".to_string(), + )) + } + }; + let end = match window_frame.end_bound { + // UNBOUNDED PRECEDING + WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => { + return Err(DataFusionError::Internal(format!( + "Frame end cannot be UNBOUNDED PRECEDING '{:?}'", + window_frame + ))) + } + WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => self + .calculate_index_of_group::(range_columns, idx, n, length)?, + WindowFrameBound::CurrentRow => self + .calculate_index_of_group::( + range_columns, + idx, + 0, + length, + )?, + WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => self + .calculate_index_of_group::( + range_columns, + idx, + n, + length, + )?, + // UNBOUNDED FOLLOWING + WindowFrameBound::Following(ScalarValue::UInt64(None)) => length, + // ERRONEOUS FRAMES + WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => { + return Err(DataFusionError::Internal( + "Groups should be Uint".to_string(), + )) + } + }; + Ok((start, end)) } /// This function does the heavy lifting when finding group boundaries. It is meant to be