From e99e02b9b9093ceb0c13a2dd32a2a89beba47930 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 24 Dec 2024 16:06:04 -0500 Subject: [PATCH] Fix `recursive-protection` feature flag (#13887) * Fix recursive-protection feature flag * rename feature flag to be consistent * Make default * taplo format --- README.md | 2 +- datafusion-cli/Cargo.lock | 1 + datafusion-cli/Cargo.toml | 1 + datafusion/common/Cargo.toml | 3 +- datafusion/common/src/tree_node.rs | 14 ++--- datafusion/core/Cargo.toml | 8 +++ datafusion/expr/Cargo.toml | 3 +- datafusion/expr/src/expr_schema.rs | 2 +- datafusion/expr/src/logical_plan/tree_node.rs | 12 ++-- datafusion/optimizer/Cargo.toml | 3 +- datafusion/optimizer/src/analyzer/subquery.rs | 2 +- .../optimizer/src/common_subexpr_eliminate.rs | 2 +- .../optimizer/src/eliminate_cross_join.rs | 2 +- .../optimizer/src/optimize_projections/mod.rs | 2 +- datafusion/physical-optimizer/Cargo.toml | 3 +- .../src/aggregate_statistics.rs | 2 +- datafusion/sql/Cargo.toml | 4 +- datafusion/sql/src/expr/mod.rs | 2 +- datafusion/sql/src/lib.rs | 1 + datafusion/sql/src/query.rs | 10 +-- datafusion/sql/src/set_expr.rs | 2 +- datafusion/sql/src/stack.rs | 63 +++++++++++++++++++ 22 files changed, 108 insertions(+), 36 deletions(-) create mode 100644 datafusion/sql/src/stack.rs diff --git a/README.md b/README.md index c2ede4833e9b..e0fc6854ecff 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ Default features: - `regex_expressions`: regular expression functions, such as `regexp_match` - `unicode_expressions`: Include unicode aware functions such as `character_length` - `unparser`: enables support to reverse LogicalPlans back into SQL -- `recursive-protection`: uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection. +- `recursive_protection`: uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection. Optional features: diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 34505bee2e13..1a42673cd31f 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1543,6 +1543,7 @@ dependencies = [ "indexmap", "itertools", "log", + "recursive", "regex", "regex-syntax", ] diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 4cdc2120a029..e0192037dedc 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -45,6 +45,7 @@ datafusion = { path = "../datafusion/core", version = "43.0.0", features = [ "datetime_expressions", "encoding_expressions", "parquet", + "recursive_protection", "regex_expressions", "unicode_expressions", "compression", diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 918f0cd583f7..b331a55a98d0 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -36,12 +36,11 @@ name = "datafusion_common" path = "src/lib.rs" [features] -default = ["recursive-protection"] avro = ["apache-avro"] backtrace = [] pyarrow = ["pyo3", "arrow/pyarrow", "parquet"] force_hash_collisions = [] -recursive-protection = ["dep:recursive"] +recursive_protection = ["dep:recursive"] [dependencies] ahash = { workspace = true } diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs index 9c59652e0d70..c70389b63177 100644 --- a/datafusion/common/src/tree_node.rs +++ b/datafusion/common/src/tree_node.rs @@ -124,7 +124,7 @@ pub trait TreeNode: Sized { /// TreeNodeVisitor::f_up(ChildNode2) /// TreeNodeVisitor::f_up(ParentNode) /// ``` - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn visit<'n, V: TreeNodeVisitor<'n, Node = Self>>( &'n self, visitor: &mut V, @@ -174,7 +174,7 @@ pub trait TreeNode: Sized { /// TreeNodeRewriter::f_up(ChildNode2) /// TreeNodeRewriter::f_up(ParentNode) /// ``` - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn rewrite>( self, rewriter: &mut R, @@ -197,7 +197,7 @@ pub trait TreeNode: Sized { &'n self, mut f: F, ) -> Result { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn apply_impl<'n, N: TreeNode, F: FnMut(&'n N) -> Result>( node: &'n N, f: &mut F, @@ -232,7 +232,7 @@ pub trait TreeNode: Sized { self, mut f: F, ) -> Result> { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn transform_down_impl Result>>( node: N, f: &mut F, @@ -256,7 +256,7 @@ pub trait TreeNode: Sized { self, mut f: F, ) -> Result> { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn transform_up_impl Result>>( node: N, f: &mut F, @@ -371,7 +371,7 @@ pub trait TreeNode: Sized { mut f_down: FD, mut f_up: FU, ) -> Result> { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn transform_down_up_impl< N: TreeNode, FD: FnMut(N) -> Result>, @@ -2349,7 +2349,7 @@ pub(crate) mod tests { Ok(()) } - #[cfg(feature = "recursive-protection")] + #[cfg(feature = "recursive_protection")] #[test] fn test_large_tree() { let mut item = TestTreeNode::new_leaf("initial".to_string()); diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index dca40ab3d67a..64ad8f2ba152 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -59,6 +59,7 @@ default = [ "unicode_expressions", "compression", "parquet", + "recursive_protection", ] encoding_expressions = ["datafusion-functions/encoding_expressions"] # Used for testing ONLY: causes all values to hash to the same value (test for collisions) @@ -69,6 +70,13 @@ pyarrow = ["datafusion-common/pyarrow", "parquet"] regex_expressions = [ "datafusion-functions/regex_expressions", ] +recursive_protection = [ + "datafusion-common/recursive_protection", + "datafusion-expr/recursive_protection", + "datafusion-optimizer/recursive_protection", + "datafusion-physical-optimizer/recursive_protection", + "datafusion-sql/recursive_protection", +] serde = ["arrow-schema/serde"] string_expressions = ["datafusion-functions/string_expressions"] unicode_expressions = [ diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 403a80972c3b..b4f3f7fb680f 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -36,8 +36,7 @@ name = "datafusion_expr" path = "src/lib.rs" [features] -default = ["recursive-protection"] -recursive-protection = ["dep:recursive"] +recursive_protection = ["dep:recursive"] [dependencies] arrow = { workspace = true } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index e61904e24918..d5c2ac396eb9 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -99,7 +99,7 @@ impl ExprSchemable for Expr { /// expression refers to a column that does not exist in the /// schema, or when the expression is incorrectly typed /// (e.g. `[utf8] + [bool]`). - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn get_type(&self, schema: &dyn ExprSchema) -> Result { match self { Expr::Alias(Alias { expr, name, .. }) => match &**expr { diff --git a/datafusion/expr/src/logical_plan/tree_node.rs b/datafusion/expr/src/logical_plan/tree_node.rs index cdc95b84d837..9a6103afd4b4 100644 --- a/datafusion/expr/src/logical_plan/tree_node.rs +++ b/datafusion/expr/src/logical_plan/tree_node.rs @@ -668,7 +668,7 @@ impl LogicalPlan { /// Visits a plan similarly to [`Self::visit`], including subqueries that /// may appear in expressions such as `IN (SELECT ...)`. - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] pub fn visit_with_subqueries TreeNodeVisitor<'n, Node = Self>>( &self, visitor: &mut V, @@ -687,7 +687,7 @@ impl LogicalPlan { /// Similarly to [`Self::rewrite`], rewrites this node and its inputs using `f`, /// including subqueries that may appear in expressions such as `IN (SELECT /// ...)`. - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] pub fn rewrite_with_subqueries>( self, rewriter: &mut R, @@ -706,7 +706,7 @@ impl LogicalPlan { &self, mut f: F, ) -> Result { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn apply_with_subqueries_impl< F: FnMut(&LogicalPlan) -> Result, >( @@ -741,7 +741,7 @@ impl LogicalPlan { self, mut f: F, ) -> Result> { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn transform_down_with_subqueries_impl< F: FnMut(LogicalPlan) -> Result>, >( @@ -766,7 +766,7 @@ impl LogicalPlan { self, mut f: F, ) -> Result> { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn transform_up_with_subqueries_impl< F: FnMut(LogicalPlan) -> Result>, >( @@ -794,7 +794,7 @@ impl LogicalPlan { mut f_down: FD, mut f_up: FU, ) -> Result> { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn transform_down_up_with_subqueries_impl< FD: FnMut(LogicalPlan) -> Result>, FU: FnMut(LogicalPlan) -> Result>, diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index 3032c67682b1..ba0dedc57675 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -36,8 +36,7 @@ name = "datafusion_optimizer" path = "src/lib.rs" [features] -default = ["recursive-protection"] -recursive-protection = ["dep:recursive"] +recursive_protection = ["dep:recursive"] [dependencies] arrow = { workspace = true } diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs index 0d04efbcf36a..7129da85f375 100644 --- a/datafusion/optimizer/src/analyzer/subquery.rs +++ b/datafusion/optimizer/src/analyzer/subquery.rs @@ -128,7 +128,7 @@ fn check_correlations_in_subquery(inner_plan: &LogicalPlan) -> Result<()> { } // Recursively check the unsupported outer references in the sub query plan. -#[cfg_attr(feature = "recursive-protection", recursive::recursive)] +#[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn check_inner_plan(inner_plan: &LogicalPlan, can_contain_outer_ref: bool) -> Result<()> { if !can_contain_outer_ref && inner_plan.contains_outer_reference() { return plan_err!("Accessing outer reference columns is not allowed in the plan"); diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 92e6dd1ad4d9..4b9a83fd3e4c 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -531,7 +531,7 @@ impl OptimizerRule for CommonSubexprEliminate { None } - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn rewrite( &self, plan: LogicalPlan, diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs index 64d24016f425..d35572e6d34a 100644 --- a/datafusion/optimizer/src/eliminate_cross_join.rs +++ b/datafusion/optimizer/src/eliminate_cross_join.rs @@ -79,7 +79,7 @@ impl OptimizerRule for EliminateCrossJoin { true } - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn rewrite( &self, plan: LogicalPlan, diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index f6e3eec6743c..b7dd391586a1 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -109,7 +109,7 @@ impl OptimizerRule for OptimizeProjections { /// columns. /// - `Ok(None)`: Signal that the given logical plan did not require any change. /// - `Err(error)`: An error occurred during the optimization process. -#[cfg_attr(feature = "recursive-protection", recursive::recursive)] +#[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn optimize_projections( plan: LogicalPlan, config: &dyn OptimizerConfig, diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml index c964ca47e6a0..3454209445dc 100644 --- a/datafusion/physical-optimizer/Cargo.toml +++ b/datafusion/physical-optimizer/Cargo.toml @@ -32,8 +32,7 @@ rust-version = { workspace = true } workspace = true [features] -default = ["recursive-protection"] -recursive-protection = ["dep:recursive"] +recursive_protection = ["dep:recursive"] [dependencies] arrow = { workspace = true } diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs index 0849a3d97a83..a00bc4b1d571 100644 --- a/datafusion/physical-optimizer/src/aggregate_statistics.rs +++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs @@ -41,7 +41,7 @@ impl AggregateStatistics { } impl PhysicalOptimizerRule for AggregateStatistics { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn optimize( &self, plan: Arc, diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index c6500e974206..224c7cb191a3 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -36,10 +36,10 @@ name = "datafusion_sql" path = "src/lib.rs" [features] -default = ["unicode_expressions", "unparser", "recursive-protection"] +default = ["unicode_expressions", "unparser"] unicode_expressions = [] unparser = [] -recursive-protection = ["dep:recursive"] +recursive_protection = ["dep:recursive"] [dependencies] arrow = { workspace = true } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 7c4d8dd21d66..9b40ebdaf6a5 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -196,7 +196,7 @@ impl SqlToRel<'_, S> { /// Internal implementation. Use /// [`Self::sql_expr_to_logical_expr`] to plan exprs. - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] fn sql_expr_to_logical_expr_internal( &self, sql: SQLExpr, diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs index a5d538989453..f20560fe7c90 100644 --- a/datafusion/sql/src/lib.rs +++ b/datafusion/sql/src/lib.rs @@ -43,6 +43,7 @@ mod query; mod relation; mod select; mod set_expr; +mod stack; mod statement; #[cfg(feature = "unparser")] pub mod unparser; diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs index 2e115d140ea8..9d5a54d90b2c 100644 --- a/datafusion/sql/src/query.rs +++ b/datafusion/sql/src/query.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; +use crate::stack::StackGuard; use datafusion_common::{not_impl_err, Constraints, DFSchema, Result}; use datafusion_expr::expr::Sort; use datafusion_expr::{ @@ -62,10 +63,11 @@ impl SqlToRel<'_, S> { // The functions called from `set_expr_to_plan()` need more than 128KB // stack in debug builds as investigated in: // https://github.com/apache/datafusion/pull/13310#discussion_r1836813902 - let min_stack_size = recursive::get_minimum_stack_size(); - recursive::set_minimum_stack_size(256 * 1024); - let plan = self.set_expr_to_plan(other, planner_context)?; - recursive::set_minimum_stack_size(min_stack_size); + let plan = { + // scope for dropping _guard + let _guard = StackGuard::new(256 * 1024); + self.set_expr_to_plan(other, planner_context) + }?; let oby_exprs = to_order_by_exprs(query.order_by)?; let order_by_rex = self.order_by_to_sort_expr( oby_exprs, diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs index d1569c81d350..75fdbd20e840 100644 --- a/datafusion/sql/src/set_expr.rs +++ b/datafusion/sql/src/set_expr.rs @@ -21,7 +21,7 @@ use datafusion_expr::{LogicalPlan, LogicalPlanBuilder}; use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier}; impl SqlToRel<'_, S> { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + #[cfg_attr(feature = "recursive_protection", recursive::recursive)] pub(super) fn set_expr_to_plan( &self, set_expr: SetExpr, diff --git a/datafusion/sql/src/stack.rs b/datafusion/sql/src/stack.rs new file mode 100644 index 000000000000..b7d5eebdd718 --- /dev/null +++ b/datafusion/sql/src/stack.rs @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub use inner::StackGuard; + +/// A guard that sets the minimum stack size for the current thread to `min_stack_size` bytes. +#[cfg(feature = "recursive_protection")] +mod inner { + /// Sets the stack size to `min_stack_size` bytes on call to `new()` and + /// resets to the previous value when this structure is dropped. + pub struct StackGuard { + previous_stack_size: usize, + } + + impl StackGuard { + /// Sets the stack size to `min_stack_size` bytes on call to `new()` and + /// resets to the previous value when this structure is dropped. + pub fn new(min_stack_size: usize) -> Self { + let previous_stack_size = recursive::get_minimum_stack_size(); + recursive::set_minimum_stack_size(min_stack_size); + Self { + previous_stack_size, + } + } + } + + impl Drop for StackGuard { + fn drop(&mut self) { + recursive::set_minimum_stack_size(self.previous_stack_size); + } + } +} + +/// A stub implementation of the stack guard when the recursive protection +/// feature is not enabled +#[cfg(not(feature = "recursive_protection"))] +mod inner { + /// A stub implementation of the stack guard when the recursive protection + /// feature is not enabled that does nothing + pub struct StackGuard; + + impl StackGuard { + /// A stub implementation of the stack guard when the recursive protection + /// feature is not enabled + pub fn new(_min_stack_size: usize) -> Self { + Self + } + } +}