From f957a07468df3e9a30393cbcb9baafcb41ad0bc6 Mon Sep 17 00:00:00 2001 From: tobymao Date: Thu, 15 Jun 2023 15:47:22 -0700 Subject: [PATCH] Fix: overly aggressive cross join removal --- sqlglot/optimizer/optimize_joins.py | 3 +++ tests/fixtures/optimizer/tpc-ds/tpc-ds.sql | 8 +++---- tests/fixtures/optimizer/tpc-h/tpc-h.sql | 26 +++++++++++----------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/sqlglot/optimizer/optimize_joins.py b/sqlglot/optimizer/optimize_joins.py index cfbd866b1f..d51276f021 100644 --- a/sqlglot/optimizer/optimize_joins.py +++ b/sqlglot/optimizer/optimize_joins.py @@ -36,6 +36,9 @@ def optimize_joins(expression): on = dep.args["on"] if isinstance(on, exp.Connector): + if len(other_table_names(dep)) < 2: + continue + for predicate in on.flatten(): if name in exp.column_table_names(predicate): predicate.replace(exp.true()) diff --git a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql index 16978c0a19..7ef7a6dbab 100644 --- a/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql +++ b/tests/fixtures/optimizer/tpc-ds/tpc-ds.sql @@ -12325,7 +12325,10 @@ SELECT "call_center"."cc_manager" AS "manager", SUM("catalog_returns"."cr_net_loss") AS "returns_loss" FROM "call_center" AS "call_center" -CROSS JOIN "customer" AS "customer" +JOIN "household_demographics" AS "household_demographics" + ON "household_demographics"."hd_buy_potential" LIKE 'Unknown%' +JOIN "customer" AS "customer" + ON "household_demographics"."hd_demo_sk" = "customer"."c_current_hdemo_sk" JOIN "catalog_returns" AS "catalog_returns" ON "catalog_returns"."cr_call_center_sk" = "call_center"."cc_call_center_sk" AND "catalog_returns"."cr_returning_customer_sk" = "customer"."c_customer_sk" @@ -12350,9 +12353,6 @@ JOIN "customer_demographics" AS "customer_demographics" "customer_demographics"."cd_marital_status" = 'M' OR "customer_demographics"."cd_marital_status" = 'W' ) -JOIN "household_demographics" AS "household_demographics" - ON "household_demographics"."hd_buy_potential" LIKE 'Unknown%' - AND "household_demographics"."hd_demo_sk" = "customer"."c_current_hdemo_sk" JOIN "date_dim" AS "date_dim" ON "catalog_returns"."cr_returned_date_sk" = "date_dim"."d_date_sk" AND "date_dim"."d_moy" = 12 diff --git a/tests/fixtures/optimizer/tpc-h/tpc-h.sql b/tests/fixtures/optimizer/tpc-h/tpc-h.sql index cd4cace4db..942295e1a0 100644 --- a/tests/fixtures/optimizer/tpc-h/tpc-h.sql +++ b/tests/fixtures/optimizer/tpc-h/tpc-h.sql @@ -99,19 +99,19 @@ order by p_partkey limit 100; -WITH "partsupp_2" AS ( - SELECT - "partsupp"."ps_partkey" AS "ps_partkey", - "partsupp"."ps_suppkey" AS "ps_suppkey", - "partsupp"."ps_supplycost" AS "ps_supplycost" - FROM "partsupp" AS "partsupp" -), "region_2" AS ( +WITH "region_2" AS ( SELECT "region"."r_regionkey" AS "r_regionkey", "region"."r_name" AS "r_name" FROM "region" AS "region" WHERE "region"."r_name" = 'EUROPE' +), "partsupp_2" AS ( + SELECT + "partsupp"."ps_partkey" AS "ps_partkey", + "partsupp"."ps_suppkey" AS "ps_suppkey", + "partsupp"."ps_supplycost" AS "ps_supplycost" + FROM "partsupp" AS "partsupp" ), "_u_0" AS ( SELECT MIN("partsupp"."ps_supplycost") AS "_col_0", @@ -136,13 +136,13 @@ SELECT "supplier"."s_phone" AS "s_phone", "supplier"."s_comment" AS "s_comment" FROM "part" AS "part" -CROSS JOIN "nation" AS "nation" +CROSS JOIN "region_2" AS "region" LEFT JOIN "_u_0" AS "_u_0" ON "part"."p_partkey" = "_u_0"."_u_1" +JOIN "nation" AS "nation" + ON "nation"."n_regionkey" = "region"."r_regionkey" JOIN "partsupp_2" AS "partsupp" ON "part"."p_partkey" = "partsupp"."ps_partkey" -JOIN "region_2" AS "region" - ON "nation"."n_regionkey" = "region"."r_regionkey" JOIN "supplier" AS "supplier" ON "supplier"."s_nationkey" = "nation"."n_nationkey" AND "supplier"."s_suppkey" = "partsupp"."ps_suppkey" @@ -380,7 +380,6 @@ SELECT 1 - "lineitem"."l_discount" )) AS "revenue" FROM "supplier" AS "supplier" -CROSS JOIN "customer" AS "customer" JOIN "lineitem" AS "lineitem" ON "supplier"."s_suppkey" = "lineitem"."l_suppkey" AND CAST("lineitem"."l_shipdate" AS DATE) <= CAST('1996-12-31' AS DATE) @@ -391,8 +390,7 @@ JOIN "nation" AS "n1" ) AND "supplier"."s_nationkey" = "n1"."n_nationkey" JOIN "nation" AS "n2" - ON "customer"."c_nationkey" = "n2"."n_nationkey" - AND ( + ON ( "n1"."n_name" = 'FRANCE' OR "n2"."n_name" = 'FRANCE' ) AND ( @@ -401,6 +399,8 @@ JOIN "nation" AS "n2" AND ( "n2"."n_name" = 'FRANCE' OR "n2"."n_name" = 'GERMANY' ) +JOIN "customer" AS "customer" + ON "customer"."c_nationkey" = "n2"."n_nationkey" JOIN "orders" AS "orders" ON "customer"."c_custkey" = "orders"."o_custkey" AND "orders"."o_orderkey" = "lineitem"."l_orderkey"