From f2d9ad42d6643f970a3e7cc9d0698a28457f888e Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Wed, 30 Oct 2024 15:50:11 -0700 Subject: [PATCH] add cory tests --- src/daft-sql/src/modules/hashing.rs | 7 +++++-- tests/sql/test_exprs.py | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/daft-sql/src/modules/hashing.rs b/src/daft-sql/src/modules/hashing.rs index 37fbceb3a0..da5da1e66c 100644 --- a/src/daft-sql/src/modules/hashing.rs +++ b/src/daft-sql/src/modules/hashing.rs @@ -115,8 +115,11 @@ impl SQLFunction for SQLMinhash { match inputs { [input, args @ ..] => { let input = planner.plan_function_arg(input)?; - let args: MinHashFunction = - planner.plan_function_args(args, &["num_hashes", "ngram_size", "seed"], 0)?; + let args: MinHashFunction = planner.plan_function_args( + args, + &["num_hashes", "ngram_size", "seed", "hash_function"], + 0, + )?; Ok(minhash( input, diff --git a/tests/sql/test_exprs.py b/tests/sql/test_exprs.py index 595a486a31..9ae7d43870 100644 --- a/tests/sql/test_exprs.py +++ b/tests/sql/test_exprs.py @@ -45,6 +45,7 @@ def test_hash_exprs(): hash(a, seed:=0) as hash_a_seed_0, minhash(a, num_hashes:=10, ngram_size:= 100, seed:=10) as minhash_a, minhash(a, num_hashes:=10, ngram_size:= 100) as minhash_a_no_seed, + minhash(a, num_hashes:=10, ngram_size:= 100, seed:=10, hash_function:='xxhash') as minhash_a_xxhash, FROM df """) .collect() @@ -58,6 +59,7 @@ def test_hash_exprs(): col("a").hash(seed=0).alias("hash_a_seed_0"), col("a").minhash(num_hashes=10, ngram_size=100, seed=10).alias("minhash_a"), col("a").minhash(num_hashes=10, ngram_size=100).alias("minhash_a_no_seed"), + col("a").minhash(num_hashes=10, ngram_size=100, seed=10, hash_function="xxhash").alias("minhash_a_xxhash"), ) .collect() .to_pydict()