Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port the whole crypto_expressions #9588

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 1 addition & 58 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ pub enum BuiltinScalarFunction {
Cosh,
/// degrees
Degrees,
/// Digest
Digest,
/// exp
Exp,
/// factorial
Expand Down Expand Up @@ -204,8 +202,6 @@ pub enum BuiltinScalarFunction {
Lower,
/// ltrim
Ltrim,
/// md5
MD5,
/// octet_length
OctetLength,
/// random
Expand All @@ -222,14 +218,6 @@ pub enum BuiltinScalarFunction {
Rpad,
/// rtrim
Rtrim,
/// sha224
SHA224,
/// sha256
SHA256,
/// sha384
SHA384,
/// Sha512
SHA512,
/// split_part
SplitPart,
/// string_to_array
Expand Down Expand Up @@ -400,7 +388,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Lpad => Volatility::Immutable,
BuiltinScalarFunction::Lower => Volatility::Immutable,
BuiltinScalarFunction::Ltrim => Volatility::Immutable,
BuiltinScalarFunction::MD5 => Volatility::Immutable,
BuiltinScalarFunction::OctetLength => Volatility::Immutable,
BuiltinScalarFunction::Radians => Volatility::Immutable,
BuiltinScalarFunction::Repeat => Volatility::Immutable,
Expand All @@ -409,11 +396,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Right => Volatility::Immutable,
BuiltinScalarFunction::Rpad => Volatility::Immutable,
BuiltinScalarFunction::Rtrim => Volatility::Immutable,
BuiltinScalarFunction::SHA224 => Volatility::Immutable,
BuiltinScalarFunction::SHA256 => Volatility::Immutable,
BuiltinScalarFunction::SHA384 => Volatility::Immutable,
BuiltinScalarFunction::SHA512 => Volatility::Immutable,
BuiltinScalarFunction::Digest => Volatility::Immutable,
BuiltinScalarFunction::SplitPart => Volatility::Immutable,
BuiltinScalarFunction::StringToArray => Volatility::Immutable,
BuiltinScalarFunction::StartsWith => Volatility::Immutable,
Expand Down Expand Up @@ -649,7 +631,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Ltrim => {
utf8_to_str_type(&input_expr_types[0], "ltrim")
}
BuiltinScalarFunction::MD5 => utf8_to_str_type(&input_expr_types[0], "md5"),
BuiltinScalarFunction::OctetLength => {
utf8_to_int_type(&input_expr_types[0], "octet_length")
}
Expand All @@ -672,21 +653,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Rtrim => {
utf8_to_str_type(&input_expr_types[0], "rtrim")
}
BuiltinScalarFunction::SHA224 => {
utf8_or_binary_to_binary_type(&input_expr_types[0], "sha224")
}
BuiltinScalarFunction::SHA256 => {
utf8_or_binary_to_binary_type(&input_expr_types[0], "sha256")
}
BuiltinScalarFunction::SHA384 => {
utf8_or_binary_to_binary_type(&input_expr_types[0], "sha384")
}
BuiltinScalarFunction::SHA512 => {
utf8_or_binary_to_binary_type(&input_expr_types[0], "sha512")
}
BuiltinScalarFunction::Digest => {
utf8_or_binary_to_binary_type(&input_expr_types[0], "digest")
}
BuiltinScalarFunction::SplitPart => {
utf8_to_str_type(&input_expr_types[0], "split_part")
}
Expand Down Expand Up @@ -882,15 +848,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Coalesce => {
Signature::variadic_equal(self.volatility())
}
BuiltinScalarFunction::SHA224
| BuiltinScalarFunction::SHA256
| BuiltinScalarFunction::SHA384
| BuiltinScalarFunction::SHA512
| BuiltinScalarFunction::MD5 => Signature::uniform(
1,
vec![Utf8, LargeUtf8, Binary, LargeBinary],
self.volatility(),
),
BuiltinScalarFunction::Ascii
| BuiltinScalarFunction::BitLength
| BuiltinScalarFunction::CharacterLength
Expand Down Expand Up @@ -968,15 +925,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::FromUnixtime => {
Signature::uniform(1, vec![Int64], self.volatility())
}
BuiltinScalarFunction::Digest => Signature::one_of(
vec![
Exact(vec![Utf8, Utf8]),
Exact(vec![LargeUtf8, Utf8]),
Exact(vec![Binary, Utf8]),
Exact(vec![LargeBinary, Utf8]),
],
self.volatility(),
),
BuiltinScalarFunction::DateTrunc => Signature::one_of(
vec![
Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
Expand Down Expand Up @@ -1374,12 +1322,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::FromUnixtime => &["from_unixtime"],

// hashing functions
BuiltinScalarFunction::Digest => &["digest"],
BuiltinScalarFunction::MD5 => &["md5"],
BuiltinScalarFunction::SHA224 => &["sha224"],
BuiltinScalarFunction::SHA256 => &["sha256"],
BuiltinScalarFunction::SHA384 => &["sha384"],
BuiltinScalarFunction::SHA512 => &["sha512"],

// other functions
BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"],
Expand Down Expand Up @@ -1517,6 +1459,7 @@ get_optimal_return_type!(utf8_to_str_type, DataType::LargeUtf8, DataType::Utf8);
// `utf8_to_int_type`: returns either a Int32 or Int64 based on the input type size.
get_optimal_return_type!(utf8_to_int_type, DataType::Int64, DataType::Int32);

#[warn(dead_code)]
fn utf8_or_binary_to_binary_type(arg_type: &DataType, name: &str) -> Result<DataType> {
Ok(match arg_type {
DataType::LargeUtf8
Expand Down
21 changes: 0 additions & 21 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,6 @@ scalar_expr!(
code_point,
"converts the Unicode code point to a UTF8 character"
);
scalar_expr!(Digest, digest, input algorithm, "compute the binary hash of `input`, using the `algorithm`");
scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase");
scalar_expr!(Left, left, string n, "returns the first `n` characters in the `string`");
scalar_expr!(Lower, lower, string, "convert the string to lower case");
Expand All @@ -770,7 +769,6 @@ scalar_expr!(
string,
"removes all characters, spaces by default, from the beginning of a string"
);
scalar_expr!(MD5, md5, string, "returns the MD5 hash of a string");
scalar_expr!(
OctetLength,
octet_length,
Expand All @@ -787,10 +785,6 @@ scalar_expr!(
string,
"removes all characters, spaces by default, from the end of a string"
);
scalar_expr!(SHA224, sha224, string, "SHA-224 hash");
scalar_expr!(SHA256, sha256, string, "SHA-256 hash");
scalar_expr!(SHA384, sha384, string, "SHA-384 hash");
scalar_expr!(SHA512, sha512, string, "SHA-512 hash");
scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index.");
scalar_expr!(StringToArray, string_to_array, string delimiter null_string, "splits a `string` based on a `delimiter` and returns an array of parts. Any parts matching the optional `null_string` will be replaced with `NULL`");
scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`");
Expand Down Expand Up @@ -1371,19 +1365,4 @@ mod test {
unreachable!();
}
}

#[test]
fn digest_function_definitions() {
if let Expr::ScalarFunction(ScalarFunction {
func_def: ScalarFunctionDefinition::BuiltIn(fun),
args,
}) = digest(col("tableA.a"), lit("md5"))
{
let name = BuiltinScalarFunction::Digest;
assert_eq!(name, fun);
assert_eq!(2, args.len());
} else {
unreachable!();
}
}
}
9 changes: 6 additions & 3 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ core_expressions = []
# enable datetime functions
datetime_expressions = []
# Enable encoding by default so the doctests work. In general don't automatically enable all packages.
default = ["core_expressions", "datetime_expressions", "encoding_expressions", "math_expressions", "regex_expressions"]
default = ["core_expressions", "datetime_expressions", "encoding_expressions", "math_expressions", "regex_expressions", "crypto_expressions"]
# enable encode/decode functions
encoding_expressions = ["base64", "hex"]
# enable math functions
math_expressions = []
# enable regular expressions
regex_expressions = ["regex"]

crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
[lib]
name = "datafusion_functions"
path = "src/lib.rs"
Expand All @@ -61,7 +61,10 @@ hex = { version = "0.4", optional = true }
itertools = { workspace = true }
log = { workspace = true }
regex = { version = "1.8", optional = true }

blake2 = { version = "^0.10.2", optional = true }
blake3 = { version = "1.0", optional = true }
md-5 = { version = "^0.10.0", optional = true }
sha2 = { version = "^0.10.1", optional = true }
[dev-dependencies]
criterion = "0.5"
rand = { workspace = true }
Expand Down
Loading