From c6790f5011d7bfeb07020161beb21b6efddc924b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 17 Dec 2024 11:37:39 -0500 Subject: [PATCH 1/3] Rename `TypeSignature::NullAry` --> `TypeSignature::Nullary` and improve comments --- datafusion/expr-common/src/signature.rs | 76 +++++++++++++------ .../expr-common/src/type_coercion/binary.rs | 2 +- .../expr/src/type_coercion/functions.rs | 4 +- datafusion/functions-aggregate/src/count.rs | 2 +- datafusion/functions-nested/src/make_array.rs | 2 +- 5 files changed, 57 insertions(+), 29 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 148ddac73a57..97b6d9d90f7e 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -74,6 +74,8 @@ pub enum Volatility { /// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning. /// /// # Data Types +/// +/// ## Timestamps /// Types to match are represented using Arrow's [`DataType`]. [`DataType::Timestamp`] has an optional variable /// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use /// the [`TIMEZONE_WILDCARD`]. For example: @@ -93,6 +95,8 @@ pub enum Volatility { pub enum TypeSignature { /// One or more arguments of a common type out of a list of valid types. /// + /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). + /// /// # Examples /// A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])` Variadic(Vec), @@ -102,51 +106,75 @@ pub enum TypeSignature { UserDefined, /// One or more arguments with arbitrary types VariadicAny, - /// Fixed number of arguments of an arbitrary but equal type out of a list of valid types. + /// One or more arguments of an arbitrary but equal type out of a list of valid types. /// /// # Examples /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])` /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])` Uniform(usize, Vec), - /// Exact number of arguments of an exact type + /// One or more arguments with exactly the specified types in order. + /// + /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). Exact(Vec), - /// The number of arguments that can be coerced to in order + /// One or more arguments belonging to the [`TypeSignatureClass`], in order. + /// /// For example, `Coercible(vec![logical_float64()])` accepts /// arguments like `vec![DataType::Int32]` or `vec![DataType::Float32]` - /// since i32 and f32 can be casted to f64 + /// since i32 and f32 can be cast to f64 + /// + /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). Coercible(Vec), - /// The arguments will be coerced to a single type based on the comparison rules. - /// For example, i32 and i64 has coerced type Int64. + /// One or more arguments that can be "compared" + /// + /// Each argument will be coerced to a single type based on comparison rules. + /// For example a function called with `i32` and `i64` has coerced type `Int64` so + /// each argument will be coerced to `Int64` before the function is invoked. /// /// Note: - /// - If compares with numeric and string, numeric is preferred for numeric string cases. For example, nullif('2', 1) has coerced types Int64. + /// - If compares with numeric and string, numeric is preferred for numeric string cases. For example, `nullif('2', 1)` has coerced types `Int64`. /// - If the result is Null, it will be coerced to String (Utf8View). + /// - See [`comparison_coercion`] for more details. + /// - For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). /// - /// See `comparison_coercion_numeric` for more details. + /// [`comparison_coercion`]: crate::type_coercion::binary::comparison_coercion Comparable(usize), - /// Fixed number of arguments of arbitrary types, number should be larger than 0 + /// One or more arguments of arbitrary types. + /// + /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). Any(usize), - /// Matches exactly one of a list of [`TypeSignature`]s. Coercion is attempted to match - /// the signatures in order, and stops after the first success, if any. + /// Matches exactly one of a list of [`TypeSignature`]s. + /// + /// Coercion is attempted to match the signatures in order, and stops after + /// the first success, if any. /// /// # Examples - /// Function `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature` + /// + /// Since `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature` /// is `OneOf(vec![Any(0), VariadicAny])`. OneOf(Vec), - /// Specifies Signatures for array functions + /// A function that has an [`ArrayFunctionSignature`] ArraySignature(ArrayFunctionSignature), - /// Fixed number of arguments of numeric types. + /// One or more arguments of numeric types. + /// /// See [`NativeType::is_numeric`] to know which type is considered numeric /// - /// [`NativeType::is_numeric`]: datafusion_common + /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). + /// + /// [`NativeType::is_numeric`]: datafusion_common::types::NativeType::is_numeric Numeric(usize), - /// Fixed number of arguments of all the same string types. + /// One or arguments of all the same string types. + /// /// The precedence of type from high to low is Utf8View, LargeUtf8 and Utf8. /// Null is considered as `Utf8` by default /// Dictionary with string value type is also handled. + /// + /// For example, if a function is called with (utf8, large_utf8), all + /// arguments will be coerced to `LargeUtf8` + /// + /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). String(usize), - /// Zero argument - NullAry, + /// No arguments + Nullary, } impl TypeSignature { @@ -237,7 +265,7 @@ impl Display for ArrayFunctionSignature { impl TypeSignature { pub fn to_string_repr(&self) -> Vec { match self { - TypeSignature::NullAry => { + TypeSignature::Nullary => { vec!["NullAry()".to_string()] } TypeSignature::Variadic(types) => { @@ -296,7 +324,7 @@ impl TypeSignature { pub fn supports_zero_argument(&self) -> bool { match &self { TypeSignature::Exact(vec) => vec.is_empty(), - TypeSignature::NullAry => true, + TypeSignature::Nullary => true, TypeSignature::OneOf(types) => types .iter() .any(|type_sig| type_sig.supports_zero_argument()), @@ -362,7 +390,7 @@ impl TypeSignature { // TODO: Implement for other types TypeSignature::Any(_) | TypeSignature::Comparable(_) - | TypeSignature::NullAry + | TypeSignature::Nullary | TypeSignature::VariadicAny | TypeSignature::ArraySignature(_) | TypeSignature::UserDefined => vec![], @@ -496,7 +524,7 @@ impl Signature { pub fn nullary(volatility: Volatility) -> Self { Signature { - type_signature: TypeSignature::NullAry, + type_signature: TypeSignature::Nullary, volatility, } } @@ -573,10 +601,10 @@ mod tests { TypeSignature::Exact(vec![]), TypeSignature::OneOf(vec![ TypeSignature::Exact(vec![DataType::Int8]), - TypeSignature::NullAry, + TypeSignature::Nullary, TypeSignature::Uniform(1, vec![DataType::Int8]), ]), - TypeSignature::NullAry, + TypeSignature::Nullary, ]; for case in positive_cases { diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 7a6e9841e22c..49c1ccff3814 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -642,7 +642,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option bool { | TypeSignature::String(_) | TypeSignature::Coercible(_) | TypeSignature::Any(_) - | TypeSignature::NullAry + | TypeSignature::Nullary | TypeSignature::Comparable(_) ) } @@ -698,7 +698,7 @@ fn get_valid_types( } } }, - TypeSignature::NullAry => { + TypeSignature::Nullary => { if !current_types.is_empty() { return plan_err!( "The function expected zero argument but received {}", diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs index b4164c211c35..f2621b48be5c 100644 --- a/datafusion/functions-aggregate/src/count.rs +++ b/datafusion/functions-aggregate/src/count.rs @@ -123,7 +123,7 @@ impl Count { pub fn new() -> Self { Self { signature: Signature::one_of( - vec![TypeSignature::VariadicAny, TypeSignature::NullAry], + vec![TypeSignature::VariadicAny, TypeSignature::Nullary], Volatility::Immutable, ), } diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index 22870dd85f0c..efedd897de87 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -64,7 +64,7 @@ impl MakeArray { pub fn new() -> Self { Self { signature: Signature::one_of( - vec![TypeSignature::NullAry, TypeSignature::UserDefined], + vec![TypeSignature::Nullary, TypeSignature::UserDefined], Volatility::Immutable, ), aliases: vec![String::from("make_list")], From f8b78a982706429c39352268b49541935158b9b6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 18 Dec 2024 18:51:21 -0500 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Piotr Findeisen --- datafusion/expr-common/src/signature.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 97b6d9d90f7e..b2225465b349 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -114,7 +114,7 @@ pub enum TypeSignature { Uniform(usize, Vec), /// One or more arguments with exactly the specified types in order. /// - /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). + /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`]. Exact(Vec), /// One or more arguments belonging to the [`TypeSignatureClass`], in order. /// @@ -122,7 +122,7 @@ pub enum TypeSignature { /// arguments like `vec![DataType::Int32]` or `vec![DataType::Float32]` /// since i32 and f32 can be cast to f64 /// - /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). + /// For functions that take no arguments (e.g. `random()`) see [`TypeSignature::Nullary`]. Coercible(Vec), /// One or more arguments that can be "compared" /// @@ -140,7 +140,7 @@ pub enum TypeSignature { Comparable(usize), /// One or more arguments of arbitrary types. /// - /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). + /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`]. Any(usize), /// Matches exactly one of a list of [`TypeSignature`]s. /// @@ -158,7 +158,7 @@ pub enum TypeSignature { /// /// See [`NativeType::is_numeric`] to know which type is considered numeric /// - /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). + /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`]. /// /// [`NativeType::is_numeric`]: datafusion_common::types::NativeType::is_numeric Numeric(usize), From c000cf8409c03c6a80124494e88c1c367bfb47ed Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 18 Dec 2024 18:51:33 -0500 Subject: [PATCH 3/3] improve docs --- datafusion/expr-common/src/signature.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 97b6d9d90f7e..a8c9c3cd24b1 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -76,6 +76,7 @@ pub enum Volatility { /// # Data Types /// /// ## Timestamps +/// /// Types to match are represented using Arrow's [`DataType`]. [`DataType::Timestamp`] has an optional variable /// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use /// the [`TIMEZONE_WILDCARD`]. For example: @@ -98,7 +99,9 @@ pub enum TypeSignature { /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). /// /// # Examples - /// A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])` + /// + /// A function such as `concat` is `Variadic(vec![DataType::Utf8, + /// DataType::LargeUtf8])` Variadic(Vec), /// The acceptable signature and coercions rules to coerce arguments to this /// signature are special for this function. If this signature is specified, @@ -109,6 +112,7 @@ pub enum TypeSignature { /// One or more arguments of an arbitrary but equal type out of a list of valid types. /// /// # Examples + /// /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])` /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])` Uniform(usize, Vec), @@ -171,7 +175,7 @@ pub enum TypeSignature { /// For example, if a function is called with (utf8, large_utf8), all /// arguments will be coerced to `LargeUtf8` /// - /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]). + /// For functions that take no arguments (e.g. `random()` use [`TypeSignature::Nullary`]). String(usize), /// No arguments Nullary,