From 6887833d73d09cc3990c93175d10eddce68c3218 Mon Sep 17 00:00:00 2001 From: parkma99 Date: Tue, 9 May 2023 20:37:48 +0800 Subject: [PATCH 1/5] Port some tests in joins.rs to sqllogictest --- datafusion/core/tests/sql/joins.rs | 415 ------------ datafusion/core/tests/sql/mod.rs | 94 --- .../tests/sqllogictests/test_files/joins.slt | 617 ++++++++++++++++++ 3 files changed, 617 insertions(+), 509 deletions(-) create mode 100644 datafusion/core/tests/sqllogictests/test_files/joins.slt diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs index 3213ae3e0595..54ecfe4951dc 100644 --- a/datafusion/core/tests/sql/joins.rs +++ b/datafusion/core/tests/sql/joins.rs @@ -18,392 +18,6 @@ use super::*; use datafusion::from_slice::FromSlice; -#[tokio::test] -async fn equijoin() -> Result<()> { - let ctx = create_join_context_qualified("t1", "t2")?; - let equivalent_sql = [ - "SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t1.a = t2.a ORDER BY t1.a", - "SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t2.a = t1.a ORDER BY t1.a", - ]; - let expected = vec![ - "+---+-----+", - "| a | b |", - "+---+-----+", - "| 1 | 100 |", - "| 2 | 200 |", - "| 4 | 400 |", - "+---+-----+", - ]; - for sql in equivalent_sql.iter() { - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - } - - Ok(()) -} - -#[tokio::test] -async fn left_join_unbalanced() -> Result<()> { - // the t1_id is larger than t2_id so the join_selection optimizer should kick in - let ctx = create_join_context_unbalanced("t1_id", "t2_id")?; - let equivalent_sql = [ - "SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id ORDER BY t1_id", - "SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t2_id = t1_id ORDER BY t1_id", - ]; - let expected = vec![ - "+-------+---------+---------+", - "| t1_id | t1_name | t2_name |", - "+-------+---------+---------+", - "| 11 | a | z |", - "| 22 | b | y |", - "| 33 | c | |", - "| 44 | d | x |", - "| 77 | e | |", - "+-------+---------+---------+", - ]; - for sql in equivalent_sql.iter() { - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - } - Ok(()) -} - -#[tokio::test] -async fn left_join_null_filter() -> Result<()> { - // Since t2 is the non-preserved side of the join, we cannot push down a NULL filter. - // Note that this is only true because IS NULL does not remove nulls. For filters that - // remove nulls, we can rewrite the join as an inner join and then push down the filter. - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_name IS NULL ORDER BY t1_id"; - let expected = vec![ - "+-------+-------+---------+", - "| t1_id | t2_id | t2_name |", - "+-------+-------+---------+", - "| 22 | 22 | |", - "| 33 | | |", - "| 77 | | |", - "| 88 | | |", - "+-------+-------+---------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn left_join_null_filter_on_join_column() -> Result<()> { - // Again, since t2 is the non-preserved side of the join, we cannot push down a NULL filter. - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_id IS NULL ORDER BY t1_id"; - let expected = vec![ - "+-------+-------+---------+", - "| t1_id | t2_id | t2_name |", - "+-------+-------+---------+", - "| 33 | | |", - "| 77 | | |", - "| 88 | | |", - "+-------+-------+---------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn left_join_not_null_filter() -> Result<()> { - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_name IS NOT NULL ORDER BY t1_id"; - let expected = vec![ - "+-------+-------+---------+", - "| t1_id | t2_id | t2_name |", - "+-------+-------+---------+", - "| 11 | 11 | z |", - "| 44 | 44 | x |", - "| 99 | 99 | u |", - "+-------+-------+---------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn left_join_not_null_filter_on_join_column() -> Result<()> { - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_id IS NOT NULL ORDER BY t1_id"; - let expected = vec![ - "+-------+-------+---------+", - "| t1_id | t2_id | t2_name |", - "+-------+-------+---------+", - "| 11 | 11 | z |", - "| 22 | 22 | |", - "| 44 | 44 | x |", - "| 99 | 99 | u |", - "+-------+-------+---------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn self_join_non_equijoin() -> Result<()> { - let ctx = create_join_context_with_nulls()?; - let sql = - "SELECT x.t1_id, y.t1_id FROM t1 x JOIN t1 y ON x.t1_id = 11 AND y.t1_id = 44"; - let expected = vec![ - "+-------+-------+", - "| t1_id | t1_id |", - "+-------+-------+", - "| 11 | 44 |", - "+-------+-------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn right_join_null_filter() -> Result<()> { - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE t1_name IS NULL ORDER BY t2_id"; - let expected = vec![ - "+-------+---------+-------+", - "| t1_id | t1_name | t2_id |", - "+-------+---------+-------+", - "| | | 55 |", - "| 99 | | 99 |", - "+-------+---------+-------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn right_join_null_filter_on_join_column() -> Result<()> { - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE t1_id IS NULL ORDER BY t2_id"; - let expected = vec![ - "+-------+---------+-------+", - "| t1_id | t1_name | t2_id |", - "+-------+---------+-------+", - "| | | 55 |", - "+-------+---------+-------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn right_join_not_null_filter() -> Result<()> { - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE t1_name IS NOT NULL ORDER BY t2_id"; - let expected = vec![ - "+-------+---------+-------+", - "| t1_id | t1_name | t2_id |", - "+-------+---------+-------+", - "| 11 | a | 11 |", - "| 22 | b | 22 |", - "| 44 | d | 44 |", - "+-------+---------+-------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn right_join_not_null_filter_on_join_column() -> Result<()> { - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE t1_id IS NOT NULL ORDER BY t2_id"; - let expected = vec![ - "+-------+---------+-------+", - "| t1_id | t1_name | t2_id |", - "+-------+---------+-------+", - "| 11 | a | 11 |", - "| 22 | b | 22 |", - "| 44 | d | 44 |", - "| 99 | | 99 |", - "+-------+---------+-------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn full_join_null_filter() -> Result<()> { - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON t1_id = t2_id WHERE t1_name IS NULL ORDER BY t1_id"; - let expected = vec![ - "+-------+---------+-------+", - "| t1_id | t1_name | t2_id |", - "+-------+---------+-------+", - "| 88 | | |", - "| 99 | | 99 |", - "| | | 55 |", - "+-------+---------+-------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn full_join_not_null_filter() -> Result<()> { - let ctx = create_join_context_with_nulls()?; - let sql = "SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON t1_id = t2_id WHERE t1_name IS NOT NULL ORDER BY t1_id"; - let expected = vec![ - "+-------+---------+-------+", - "| t1_id | t1_name | t2_id |", - "+-------+---------+-------+", - "| 11 | a | 11 |", - "| 22 | b | 22 |", - "| 33 | c | |", - "| 44 | d | 44 |", - "| 77 | e | |", - "+-------+---------+-------+", - ]; - - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn left_join_using() -> Result<()> { - let test_repartition_joins = vec![true, false]; - for repartition_joins in test_repartition_joins { - let ctx = create_join_context("id", "id", repartition_joins)?; - let sql = - "SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER BY id"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+----+---------+---------+", - "| id | t1_name | t2_name |", - "+----+---------+---------+", - "| 11 | a | z |", - "| 22 | b | y |", - "| 33 | c | |", - "| 44 | d | x |", - "+----+---------+---------+", - ]; - assert_batches_eq!(expected, &actual); - } - Ok(()) -} - -#[tokio::test] -async fn cross_join() { - let test_repartition_joins = vec![true, false]; - for repartition_joins in test_repartition_joins { - let ctx = create_join_context("t1_id", "t2_id", repartition_joins).unwrap(); - - let sql = "SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id"; - let actual = execute(&ctx, sql).await; - - assert_eq!(4 * 4, actual.len()); - - let sql = "SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id"; - let actual = execute(&ctx, sql).await; - - assert_eq!(4 * 4, actual.len()); - - let sql = "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id"; - - let actual = execute(&ctx, sql).await; - assert_eq!(4 * 4, actual.len()); - - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-------+---------+---------+", - "| t1_id | t1_name | t2_name |", - "+-------+---------+---------+", - "| 11 | a | z |", - "| 11 | a | y |", - "| 11 | a | x |", - "| 11 | a | w |", - "| 22 | b | z |", - "| 22 | b | y |", - "| 22 | b | x |", - "| 22 | b | w |", - "| 33 | c | z |", - "| 33 | c | y |", - "| 33 | c | x |", - "| 33 | c | w |", - "| 44 | d | z |", - "| 44 | d | y |", - "| 44 | d | x |", - "| 44 | d | w |", - "+-------+---------+---------+", - ]; - - assert_batches_eq!(expected, &actual); - - // Two partitions (from UNION) on the left - let sql = "SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name FROM t1) AS t1 CROSS JOIN t2"; - let actual = execute(&ctx, sql).await; - - assert_eq!(4 * 4 * 2, actual.len()); - - // Two partitions (from UNION) on the right - let sql = "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN (SELECT t2_name FROM t2 UNION ALL SELECT t2_name FROM t2) AS t2"; - let actual = execute(&ctx, sql).await; - - assert_eq!(4 * 4 * 2, actual.len()); - } -} - -#[tokio::test] -async fn cross_join_unbalanced() { - // the t1_id is larger than t2_id so the join_selection optimizer should kick in - let ctx = create_join_context_unbalanced("t1_id", "t2_id").unwrap(); - - // the order of the values is not determinisitic, so we need to sort to check the values - let sql = - "SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id, t1_name, t2_name"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+-------+---------+---------+", - "| t1_id | t1_name | t2_name |", - "+-------+---------+---------+", - "| 11 | a | w |", - "| 11 | a | x |", - "| 11 | a | y |", - "| 11 | a | z |", - "| 22 | b | w |", - "| 22 | b | x |", - "| 22 | b | y |", - "| 22 | b | z |", - "| 33 | c | w |", - "| 33 | c | x |", - "| 33 | c | y |", - "| 33 | c | z |", - "| 44 | d | w |", - "| 44 | d | x |", - "| 44 | d | y |", - "| 44 | d | z |", - "| 77 | e | w |", - "| 77 | e | x |", - "| 77 | e | y |", - "| 77 | e | z |", - "+-------+---------+---------+", - ]; - assert_batches_eq!(expected, &actual); -} - #[tokio::test] async fn test_join_timestamp() -> Result<()> { let ctx = SessionContext::new(); @@ -595,35 +209,6 @@ async fn nestedjoin_without_alias() -> Result<()> { Ok(()) } -#[tokio::test] -async fn issue_3002() -> Result<()> { - // repro case for https://github.com/apache/arrow-datafusion/issues/3002 - let sql = "select a.a, b.b from a join b on a.a = b.b"; - let expected = vec!["++", "++"]; - let ctx = create_join_context_qualified("a", "b")?; - let actual = execute_to_batches(&ctx, sql).await; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn inner_join_nulls() { - let sql = "SELECT * FROM (SELECT null AS id1) t1 - INNER JOIN (SELECT null AS id2) t2 ON id1 = id2"; - - #[rustfmt::skip] - let expected = vec![ - "++", - "++", - ]; - - let ctx = create_join_context_qualified("t1", "t2").unwrap(); - let actual = execute_to_batches(&ctx, sql).await; - - // left and right shouldn't match anything - assert_batches_eq!(expected, &actual); -} - #[tokio::test] async fn join_tables_with_duplicated_column_name_not_in_on_constraint() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index fba64e8b0124..7088f045d6d7 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -522,100 +522,6 @@ fn create_hashjoin_datatype_context() -> Result { Ok(ctx) } -/// the table column_left has more rows than the table column_right -fn create_join_context_unbalanced( - column_left: &str, - column_right: &str, -) -> Result { - let ctx = SessionContext::new(); - - let t1_schema = Arc::new(Schema::new(vec![ - Field::new(column_left, DataType::UInt32, true), - Field::new("t1_name", DataType::Utf8, true), - ])); - let t1_data = RecordBatch::try_new( - t1_schema, - vec![ - Arc::new(UInt32Array::from_slice([11, 22, 33, 44, 77])), - Arc::new(StringArray::from(vec![ - Some("a"), - Some("b"), - Some("c"), - Some("d"), - Some("e"), - ])), - ], - )?; - ctx.register_batch("t1", t1_data)?; - - let t2_schema = Arc::new(Schema::new(vec![ - Field::new(column_right, DataType::UInt32, true), - Field::new("t2_name", DataType::Utf8, true), - ])); - let t2_data = RecordBatch::try_new( - t2_schema, - vec![ - Arc::new(UInt32Array::from_slice([11, 22, 44, 55])), - Arc::new(StringArray::from(vec![ - Some("z"), - Some("y"), - Some("x"), - Some("w"), - ])), - ], - )?; - ctx.register_batch("t2", t2_data)?; - - Ok(ctx) -} - -// Create memory tables with nulls -fn create_join_context_with_nulls() -> Result { - let ctx = SessionContext::new(); - - let t1_schema = Arc::new(Schema::new(vec![ - Field::new("t1_id", DataType::UInt32, true), - Field::new("t1_name", DataType::Utf8, true), - ])); - let t1_data = RecordBatch::try_new( - t1_schema, - vec![ - Arc::new(UInt32Array::from(vec![11, 22, 33, 44, 77, 88, 99])), - Arc::new(StringArray::from(vec![ - Some("a"), - Some("b"), - Some("c"), - Some("d"), - Some("e"), - None, - None, - ])), - ], - )?; - ctx.register_batch("t1", t1_data)?; - - let t2_schema = Arc::new(Schema::new(vec![ - Field::new("t2_id", DataType::UInt32, true), - Field::new("t2_name", DataType::Utf8, true), - ])); - let t2_data = RecordBatch::try_new( - t2_schema, - vec![ - Arc::new(UInt32Array::from(vec![11, 22, 44, 55, 99])), - Arc::new(StringArray::from(vec![ - Some("z"), - None, - Some("x"), - Some("w"), - Some("u"), - ])), - ], - )?; - ctx.register_batch("t2", t2_data)?; - - Ok(ctx) -} - fn create_sort_merge_join_context( column_left: &str, column_right: &str, diff --git a/datafusion/core/tests/sqllogictests/test_files/joins.slt b/datafusion/core/tests/sqllogictests/test_files/joins.slt new file mode 100644 index 000000000000..acf4b7934efe --- /dev/null +++ b/datafusion/core/tests/sqllogictests/test_files/joins.slt @@ -0,0 +1,617 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +########## +## Joins Tests +########## + +# create table t1 +statement ok +CREATE TABLE t1(a INT, b INT, c INT) AS VALUES +(1, 10, 50), +(2, 20, 60), +(3, 30, 70), +(4, 40, 80) + +# create table t2 +statement ok +CREATE TABLE t2(a INT, b INT, c INT) AS VALUES +(1, 100, 500), +(2, 200, 600), +(9, 300, 700), +(4, 400, 800) + +# equijoin +query II nosort +SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t1.a = t2.a ORDER BY t1.a +---- +1 100 +2 200 +4 400 + +query II nosort +SELECT t1.a, t2.b FROM t1 INNER JOIN t2 ON t2.a = t1.a ORDER BY t1.a +---- +1 100 +2 200 +4 400 + +# inner_join_nulls +query ?? +SELECT * FROM (SELECT null AS id1) t1 +INNER JOIN (SELECT null AS id2) t2 ON id1 = id2 +---- + +statement ok +DROP TABLE t1 + +statement ok +DROP TABLE t2 + + +# create table a +statement ok +CREATE TABLE a(a INT, b INT, c INT) AS VALUES +(1, 10, 50), +(2, 20, 60), +(3, 30, 70), +(4, 40, 80) + +# create table b +statement ok +CREATE TABLE b(a INT, b INT, c INT) AS VALUES +(1, 100, 500), +(2, 200, 600), +(9, 300, 700), +(4, 400, 800) + +# issue_3002 +query II +select a.a, b.b from a join b on a.a = b.b +---- + +statement ok +DROP TABLE a + +statement ok +DROP TABLE b + +# create table t1 +statement ok +CREATE TABLE t1(t1_id INT, t1_name VARCHAR) AS VALUES +(11, 'a'), +(22, 'b'), +(33, 'c'), +(44, 'd'), +(77, 'e') + +# create table t2 +statement ok +CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES +(11, 'z'), +(22, 'y'), +(44, 'x'), +(55, 'w') + +# left_join_unbalanced +query ITT nosort +SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id ORDER BY t1_id +---- +11 a z +22 b y +33 c NULL +44 d x +77 e NULL + +query ITT nosort +SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t2_id = t1_id ORDER BY t1_id +---- +11 a z +22 b y +33 c NULL +44 d x +77 e NULL + + +# cross_join_unbalanced +query ITT nosort +SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id, t1_name, t2_name +---- +11 a w +11 a x +11 a y +11 a z +22 b w +22 b x +22 b y +22 b z +33 c w +33 c x +33 c y +33 c z +44 d w +44 d x +44 d y +44 d z +77 e w +77 e x +77 e y +77 e z + +statement ok +DROP TABLE t1 + +statement ok +DROP TABLE t2 + +# create table t1 +statement ok +CREATE TABLE t1(t1_id INT, t1_name VARCHAR) AS VALUES +(11, 'a'), +(22, 'b'), +(33, 'c'), +(44, 'd'), +(77, 'e'), +(88, NULL), +(99, NULL) + +# create table t2 +statement ok +CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES +(11, 'z'), +(22, NULL), +(44, 'x'), +(55, 'w'), +(99, 'u') + +# left_join_null_filter +query IIT nosort +SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_name IS NULL ORDER BY t1_id +---- +22 22 NULL +33 NULL NULL +77 NULL NULL +88 NULL NULL + +# left_join_null_filter_on_join_column +query IIT nosort +SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_id IS NULL ORDER BY t1_id +---- +33 NULL NULL +77 NULL NULL +88 NULL NULL + +# left_join_not_null_filter +query IIT nosort +SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_name IS NOT NULL ORDER BY t1_id +---- +11 11 z +44 44 x +99 99 u + +# left_join_not_null_filter_on_join_column +query IIT nosort +SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_id IS NOT NULL ORDER BY t1_id +---- +11 11 z +22 22 NULL +44 44 x +99 99 u + +# self_join_non_equijoin +query II nosort +SELECT x.t1_id, y.t1_id FROM t1 x JOIN t1 y ON x.t1_id = 11 AND y.t1_id = 44 +---- +11 44 + +# right_join_null_filter +query ITI nosort +SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE t1_name IS NULL ORDER BY t2_id +---- +NULL NULL 55 +99 NULL 99 + +# right_join_null_filter_on_join_column +query ITI nosort +SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE t1_id IS NULL ORDER BY t2_id +---- +NULL NULL 55 + +# right_join_not_null_filter +query ITI nosort +SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE t1_name IS NOT NULL ORDER BY t2_id +---- +11 a 11 +22 b 22 +44 d 44 + +# right_join_not_null_filter_on_join_column +query ITI nosort +SELECT t1_id, t1_name, t2_id FROM t1 RIGHT JOIN t2 ON t1_id = t2_id WHERE t1_id IS NOT NULL ORDER BY t2_id +---- +11 a 11 +22 b 22 +44 d 44 +99 NULL 99 + +# full_join_null_filter +query ITI nosort +SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON t1_id = t2_id WHERE t1_name IS NULL ORDER BY t1_id +---- +88 NULL NULL +99 NULL 99 +NULL NULL 55 + +# full_join_not_null_filter +query ITI nosort +SELECT t1_id, t1_name, t2_id FROM t1 FULL OUTER JOIN t2 ON t1_id = t2_id WHERE t1_name IS NOT NULL ORDER BY t1_id +---- +11 a 11 +22 b 22 +33 c NULL +44 d 44 +77 e NULL + +statement ok +DROP TABLE t1 + +statement ok +DROP TABLE t2 + +# create table t1 +statement ok +CREATE TABLE t1(id INT, t1_name VARCHAR, t1_int INT) AS VALUES +(11, 'a', 1), +(22, 'b', 2), +(33, 'c', 3), +(44, 'd', 4) + +# create table t2 +statement ok +CREATE TABLE t2(id INT, t2_name VARCHAR, t2_int INT) AS VALUES +(11, 'z', 3), +(22, 'y', 1), +(44, 'x', 3), +(55, 'w', 3) + +# left_join_using + +# set repartition_joins to true +statement ok +set datafusion.optimizer.repartition_joins = true + +query ITT nosort +SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER BY id +---- +11 a z +22 b y +33 c NULL +44 d x + +# set repartition_joins to false +statement ok +set datafusion.optimizer.repartition_joins = false + +query ITT nosort +SELECT id, t1_name, t2_name FROM t1 LEFT JOIN t2 USING (id) ORDER BY id +---- +11 a z +22 b y +33 c NULL +44 d x + +statement ok +DROP TABLE t1 + +statement ok +DROP TABLE t2 + +# create table t1 +statement ok +CREATE TABLE t1(t1_id INT, t1_name VARCHAR, t1_int INT) AS VALUES +(11, 'a', 1), +(22, 'b', 2), +(33, 'c', 3), +(44, 'd', 4) + +# create table t2 +statement ok +CREATE TABLE t2(t2_id INT, t2_name VARCHAR, t2_int INT) AS VALUES +(11, 'z', 3), +(22, 'y', 1), +(44, 'x', 3), +(55, 'w', 3) + +# cross_join + +# set repartition_joins to true +statement ok +set datafusion.optimizer.repartition_joins = true + +query ITT nosort +SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id +---- +11 a z +11 a y +11 a x +11 a w +22 b z +22 b y +22 b x +22 b w +33 c z +33 c y +33 c x +33 c w +44 d z +44 d y +44 d x +44 d w + +query ITT nosort +SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id +---- +11 a z +11 a y +11 a x +11 a w +22 b z +22 b y +22 b x +22 b w +33 c z +33 c y +33 c x +33 c w +44 d z +44 d y +44 d x +44 d w + +query ITT nosort +SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id +---- +11 a z +11 a y +11 a x +11 a w +22 b z +22 b y +22 b x +22 b w +33 c z +33 c y +33 c x +33 c w +44 d z +44 d y +44 d x +44 d w + +query ITITI rowsort +SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name FROM t1) AS t1 CROSS JOIN t2 +---- +11 a 11 z 3 +11 a 11 z 3 +11 a 22 y 1 +11 a 22 y 1 +11 a 44 x 3 +11 a 44 x 3 +11 a 55 w 3 +11 a 55 w 3 +22 b 11 z 3 +22 b 11 z 3 +22 b 22 y 1 +22 b 22 y 1 +22 b 44 x 3 +22 b 44 x 3 +22 b 55 w 3 +22 b 55 w 3 +33 c 11 z 3 +33 c 11 z 3 +33 c 22 y 1 +33 c 22 y 1 +33 c 44 x 3 +33 c 44 x 3 +33 c 55 w 3 +33 c 55 w 3 +44 d 11 z 3 +44 d 11 z 3 +44 d 22 y 1 +44 d 22 y 1 +44 d 44 x 3 +44 d 44 x 3 +44 d 55 w 3 +44 d 55 w 3 + +query ITT rowsort +SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN (SELECT t2_name FROM t2 UNION ALL SELECT t2_name FROM t2) AS t2_data +---- +11 a w +11 a w +11 a x +11 a x +11 a y +11 a y +11 a z +11 a z +22 b w +22 b w +22 b x +22 b x +22 b y +22 b y +22 b z +22 b z +33 c w +33 c w +33 c x +33 c x +33 c y +33 c y +33 c z +33 c z +44 d w +44 d w +44 d x +44 d x +44 d y +44 d y +44 d z +44 d z + +# set repartition_joins to true +statement ok +set datafusion.optimizer.repartition_joins = false + +query ITT nosort +SELECT t1_id, t1_name, t2_name FROM t1, t2 ORDER BY t1_id +---- +11 a z +11 a y +11 a x +11 a w +22 b z +22 b y +22 b x +22 b w +33 c z +33 c y +33 c x +33 c w +44 d z +44 d y +44 d x +44 d w + +query ITT nosort +SELECT t1_id, t1_name, t2_name FROM t1, t2 WHERE 1=1 ORDER BY t1_id +---- +11 a z +11 a y +11 a x +11 a w +22 b z +22 b y +22 b x +22 b w +33 c z +33 c y +33 c x +33 c w +44 d z +44 d y +44 d x +44 d w + +query ITT nosort +SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN t2 ORDER BY t1_id +---- +11 a z +11 a y +11 a x +11 a w +22 b z +22 b y +22 b x +22 b w +33 c z +33 c y +33 c x +33 c w +44 d z +44 d y +44 d x +44 d w + +query ITITI rowsort +SELECT * FROM (SELECT t1_id, t1_name FROM t1 UNION ALL SELECT t1_id, t1_name FROM t1) AS t1 CROSS JOIN t2 +---- +11 a 11 z 3 +11 a 11 z 3 +11 a 22 y 1 +11 a 22 y 1 +11 a 44 x 3 +11 a 44 x 3 +11 a 55 w 3 +11 a 55 w 3 +22 b 11 z 3 +22 b 11 z 3 +22 b 22 y 1 +22 b 22 y 1 +22 b 44 x 3 +22 b 44 x 3 +22 b 55 w 3 +22 b 55 w 3 +33 c 11 z 3 +33 c 11 z 3 +33 c 22 y 1 +33 c 22 y 1 +33 c 44 x 3 +33 c 44 x 3 +33 c 55 w 3 +33 c 55 w 3 +44 d 11 z 3 +44 d 11 z 3 +44 d 22 y 1 +44 d 22 y 1 +44 d 44 x 3 +44 d 44 x 3 +44 d 55 w 3 +44 d 55 w 3 + +query ITT rowsort +SELECT t1_id, t1_name, t2_name FROM t1 CROSS JOIN (SELECT t2_name FROM t2 UNION ALL SELECT t2_name FROM t2) AS t2_data +---- +11 a w +11 a w +11 a x +11 a x +11 a y +11 a y +11 a z +11 a z +22 b w +22 b w +22 b x +22 b x +22 b y +22 b y +22 b z +22 b z +33 c w +33 c w +33 c x +33 c x +33 c y +33 c y +33 c z +33 c z +44 d w +44 d w +44 d x +44 d x +44 d y +44 d y +44 d z +44 d z + +statement ok +DROP TABLE t1 + +statement ok +DROP TABLE t2 From cdf24d55fd9f2e6bc00478a5d4a5f2e95b736157 Mon Sep 17 00:00:00 2001 From: parkma99 <84610851+parkma99@users.noreply.github.com> Date: Fri, 12 May 2023 08:46:42 +0800 Subject: [PATCH 2/5] Update datafusion/core/tests/sqllogictests/test_files/joins.slt Co-authored-by: Andrew Lamb --- datafusion/core/tests/sqllogictests/test_files/joins.slt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/core/tests/sqllogictests/test_files/joins.slt b/datafusion/core/tests/sqllogictests/test_files/joins.slt index acf4b7934efe..a05680dc5fef 100644 --- a/datafusion/core/tests/sqllogictests/test_files/joins.slt +++ b/datafusion/core/tests/sqllogictests/test_files/joins.slt @@ -179,6 +179,9 @@ CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES (99, 'u') # left_join_null_filter +# // Since t2 is the non-preserved side of the join, we cannot push down a NULL filter. +# // Note that this is only true because IS NULL does not remove nulls. For filters that +# // remove nulls, we can rewrite the join as an inner join and then push down the filter. query IIT nosort SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_name IS NULL ORDER BY t1_id ---- From 411daed7e1488fdb30828845253055ca034040ed Mon Sep 17 00:00:00 2001 From: parkma99 <84610851+parkma99@users.noreply.github.com> Date: Fri, 12 May 2023 08:46:49 +0800 Subject: [PATCH 3/5] Update datafusion/core/tests/sqllogictests/test_files/joins.slt Co-authored-by: Andrew Lamb --- datafusion/core/tests/sqllogictests/test_files/joins.slt | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/core/tests/sqllogictests/test_files/joins.slt b/datafusion/core/tests/sqllogictests/test_files/joins.slt index a05680dc5fef..821ab8ebc30a 100644 --- a/datafusion/core/tests/sqllogictests/test_files/joins.slt +++ b/datafusion/core/tests/sqllogictests/test_files/joins.slt @@ -108,6 +108,7 @@ CREATE TABLE t2(t2_id INT, t2_name VARCHAR) AS VALUES (55, 'w') # left_join_unbalanced +# // the t1_id is larger than t2_id so the join_selection optimizer should kick in query ITT nosort SELECT t1_id, t1_name, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id ORDER BY t1_id ---- From f7f7893bbbeac5d2cea84d3506761f219da484ea Mon Sep 17 00:00:00 2001 From: parkma99 <84610851+parkma99@users.noreply.github.com> Date: Fri, 12 May 2023 08:46:57 +0800 Subject: [PATCH 4/5] Update datafusion/core/tests/sqllogictests/test_files/joins.slt Co-authored-by: Andrew Lamb --- datafusion/core/tests/sqllogictests/test_files/joins.slt | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/core/tests/sqllogictests/test_files/joins.slt b/datafusion/core/tests/sqllogictests/test_files/joins.slt index 821ab8ebc30a..7717da9e641f 100644 --- a/datafusion/core/tests/sqllogictests/test_files/joins.slt +++ b/datafusion/core/tests/sqllogictests/test_files/joins.slt @@ -192,6 +192,7 @@ SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_name 88 NULL NULL # left_join_null_filter_on_join_column +# // Again, since t2 is the non-preserved side of the join, we cannot push down a NULL filter. query IIT nosort SELECT t1_id, t2_id, t2_name FROM t1 LEFT JOIN t2 ON t1_id = t2_id WHERE t2_id IS NULL ORDER BY t1_id ---- From 9fcfb4e47c3b85707e9733b8863e7230ae62a59d Mon Sep 17 00:00:00 2001 From: parkma99 <84610851+parkma99@users.noreply.github.com> Date: Fri, 12 May 2023 08:47:06 +0800 Subject: [PATCH 5/5] Update datafusion/core/tests/sqllogictests/test_files/joins.slt Co-authored-by: Andrew Lamb --- datafusion/core/tests/sqllogictests/test_files/joins.slt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/core/tests/sqllogictests/test_files/joins.slt b/datafusion/core/tests/sqllogictests/test_files/joins.slt index 7717da9e641f..eb8f72470c6c 100644 --- a/datafusion/core/tests/sqllogictests/test_files/joins.slt +++ b/datafusion/core/tests/sqllogictests/test_files/joins.slt @@ -80,6 +80,8 @@ CREATE TABLE b(a INT, b INT, c INT) AS VALUES (4, 400, 800) # issue_3002 +# // repro case for https://github.com/apache/arrow-datafusion/issues/3002 + query II select a.a, b.b from a join b on a.a = b.b ----