diff --git a/pkg/planner/core/casetest/rule/rule_outer2inner_test.go b/pkg/planner/core/casetest/rule/rule_outer2inner_test.go new file mode 100644 index 0000000000000..308dfa24790af --- /dev/null +++ b/pkg/planner/core/casetest/rule/rule_outer2inner_test.go @@ -0,0 +1,57 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package rule + +import ( + "testing" + + "github.com/pingcap/tidb/pkg/testkit" + "github.com/pingcap/tidb/pkg/testkit/testdata" +) + +func TestOuter2Inner(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t1(a1 int, b1 int, c1 int)") + tk.MustExec("create table t2(a2 int, b2 int, c2 int)") + tk.MustExec("create table t3(a3 int, b3 int, c3 int)") + tk.MustExec("create table t4(a4 int, b4 int, c4 int)") + tk.MustExec("create table ti(i int)") + tk.MustExec("CREATE TABLE lineitem (L_PARTKEY INTEGER ,L_QUANTITY DECIMAL(15,2),L_EXTENDEDPRICE DECIMAL(15,2))") + tk.MustExec("CREATE TABLE part(P_PARTKEY INTEGER,P_BRAND CHAR(10),P_CONTAINER CHAR(10))") + tk.MustExec("CREATE TABLE d (pk int, col_blob blob, col_blob_key blob, col_varchar_key varchar(1) , col_date date, col_int_key int)") + tk.MustExec("CREATE TABLE dd (pk int, col_blob blob, col_blob_key blob, col_date date, col_int_key int)") + tk.MustExec("create table t0 (a0 int, b0 char, c0 char(2))") + tk.MustExec("create table t11 (a1 int, b1 char, c1 char)") + + var input Input + var output []struct { + SQL string + Plan []string + } + suiteData := GetOuter2InnerSuiteData() + suiteData.LoadTestCases(t, &input, &output) + for i, sql := range input { + plan := tk.MustQuery("explain format = 'brief' " + sql) + testdata.OnRecord(func() { + output[i].SQL = sql + output[i].Plan = testdata.ConvertRowsToStrings(plan.Rows()) + }) + plan.Check(testkit.Rows(output[i].Plan...)) + } +} diff --git a/pkg/planner/core/casetest/rule/testdata/outer2inner_in.json b/pkg/planner/core/casetest/rule/testdata/outer2inner_in.json new file mode 100644 index 0000000000000..2e332efb168fc --- /dev/null +++ b/pkg/planner/core/casetest/rule/testdata/outer2inner_in.json @@ -0,0 +1,53 @@ +[ + { + "name": "TestOuter2Inner", + "cases": [ + "select * from t1 left outer join t2 on a1=a2 where b2 < 1 -- basic case of outer to inner join conversion", + "select * from t1 left outer join t2 on a1=a2 where b2 is not null -- basic case of not null", + "select * from t1 left outer join t2 on a1=a2 where not(b2 is null) -- another form of basic case of not null", + "select * from t1 left outer join t2 on a1=a2 where c2 = 5 OR b2 < 55 -- case with A OR B (Both A and B are null filtering)", + "select * from t1 left outer join t2 on a1=a2 where c2 = 5 AND b2 is null -- case with A AND B (A is null filtering and B is not)", + "select * from t1 left outer join t2 on a1=a2 where b2 is NULL AND c2 = 5 -- case with A AND B (A is null filtering and B is not)", + "select * from t1 left outer join t2 on a1=a2 where not (b2 is NULL OR c2 = 5) -- NOT case ", + "select * from t1 left outer join t2 on a1=a2 where not (b2 is NULL AND c2 = 5) -- NOT case ", + "select * from t2 left outer join t1 on a1=a2 where b1+b1 > 2; -- expression evaluates to UNKNOWN/FALSE", + "select * from t2 left outer join t1 on a1=a2 where coalesce(b1,2) > 2; -- false condition for b1=NULL", + "select * from t2 left outer join t1 on a1=a2 where true and b1 = 5; -- AND with one branch is null filtering", + "select * from t2 left outer join t1 on a1=a2 where false OR b1 = 5; -- OR with both branches are null filtering", + "select * from t3 as t1 left join t3 as t2 on t1.c3 = t2.c3 where t2.b3 != NULL; -- self join", + "select * from t1 ta left outer join (t1 tb left outer join t1 tc on tb.b1 = tc.b1) on ta.a1=tc.a1; -- nested join. On clause is null filtering on tc.", + "select * from t1 ta left outer join (t1 tb left outer join t1 tc on tb.b1 = tc.b1) on ta.a1=tc.a1 where tb.a1 > 5; -- nested join. On clause and WHERE clause are filters", + "select * from (t2 left join t1 on a1=a2) join t3 on b1=b3 -- on clause applied nested join", + "select * from ((t1 left join t2 on a1=a2) left join t3 on b2=b3) join t4 on b3=b4 -- nested and propagation of null filtering", + "select * from t1 right join t2 on a1=a2 where exists (select 1 from t3 where b1=b3) -- semi join is null filtering on the outer join", + "select sum(l_extendedprice) / 7.0 as avg_yearly from lineitem, part where p_partkey = l_partkey and p_brand = 'Brand#44' and p_container = 'WRAP PKG' and l_quantity < ( select 0.2 * avg(l_quantity) from lineitem where l_partkey = p_partkey) -- Q17 in TPCH. null filter on derived outer join", + "WITH cte AS ( SELECT alias1.col_date AS field1 FROM d AS alias1 LEFT JOIN dd AS alias2 ON alias1.col_blob_key=alias2.col_blob_key WHERE alias1.col_varchar_key IS NULL OR alias1.col_blob_key >= 'a') DELETE FROM outr1.*, outr2.* USING d AS outr1 LEFT OUTER JOIN dd AS outr2 ON (outr1.col_date=outr2.col_date) JOIN cte AS outrcte ON outr2.col_blob_key=outrcte.field1 -- nested complex case", + "with cte as (select count(a2) as cnt,b2-5 as b3 from t1 left outer join t2 on a1=a2 group by b3) select * from cte where b3 > 1 -- aggregate case.", + "select * from dd as outr1 WHERE outr1.col_blob IN (SELECT DISTINCT innr1.col_blob_key AS y FROM d AS innrcte left outer join dd AS innr1 ON innr1.pk = innrcte.col_date WHERE outr1.col_int_key > 6)", + "select * from t0 left outer join t11 on a0=a1 where t0.b0 in (t11.b1, t11.c1) -- each = in the in list is null filtering", + "select * from t1 left outer join t2 on a1=a2 where b2 is null -- negative case with single predicate which is not null filtering", + "select * from t1 left outer join t2 on a1=a2 where c2 = 5 OR b2 is null -- negative case with A OR B (A is null filtering and B is not)", + "select * from t1 left outer join t2 on a1=a2 where not(b2 is not null) -- nested 'not' negative case", + "select * from t1 left outer join t2 on a1=a2 where not(not(b2 is null)) -- nested 'not' negative case", + "select * from t1 left outer join t2 on a1=a2 where b1 is not null -- negative case with condition on outer table.", + "select * from t2 left outer join t1 on a1=a2 where coalesce(b1,2) = 2; -- true condition for b1=NULL", + "select * from t2 left outer join t1 on a1=a2 where true OR b1 = 5; -- negative case with OR and one branch is TRUE", + "select * from t3 as t1 left join t3 as t2 on t1.c3 = t2.c3 where t1.b3 != NULL -- negative case with self join", + "select * from (t1 left outer join t2 on a1=a2) left outer join t3 on a2=a3 and b2 = 5 -- negative case. inner side is not a join", + "select * from t1 ta right outer join (t1 tb right outer join t1 tc on tb.b1 = tc.b1) on ta.a1=tc.a1; -- negative case. inner side is not a join", + "select * from t1 ta right outer join (t1 tb right outer join t1 tc on tb.b1 = tc.b1) on ta.a1=tc.a1 where tc.a1 > 5; -- negative case. inner side is not a join and WHERE clause on outer table", + "select * from (t2 left join t1 on a1=a2) join t3 on b2=b3 -- negative case, on clause on outer table in nested join", + "select t1.c1 in (select count(s.b1) from t1 s where s.a1 = t1.a1) from t1 -- subquery test that generates outer join and not converted", + "SELECT * FROM ti LEFT JOIN (SELECT i FROM ti WHERE FALSE) AS d1 ON ti.i = d1.i WHERE NOT EXISTS (SELECT 1 FROM ti AS inner_t1 WHERE i = d1.i) -- anti semi join", + "select count(*) from t1 where t1.a1+100 > ( select count(*) from t2 where t1.a1=t2.a2 and t1.b1=t2.b2) group by t1.b1 -- filter not filtering over derived outer join", + "with cte as (select count(a2) as cnt,ifnull(b2,5) as b2 from t1 left outer join t2 on a1=a2 group by b2) select * from cte where b2 > 1 -- non null filter on group by", + "with cte as (select count(a2) as cnt,ifnull(b2,5) as b2 from t1 left outer join t2 on a1=a2 group by b2) select * from cte where cnt > 1 -- filter on aggregates not applicable", + "select * from t0 left outer join t11 on a0=a1 where t0.b0 in (t0.b0, t11.b1)", + "select * from t0 left outer join t11 on a0=a1 where '5' not in (t0.b0, t11.b1)", + "select * from t0 left outer join t11 on a0=a1 where '1' in (t0.b0, t11.b1)", + "select * from t0 left outer join t11 on a0=a1 where t0.b0 in ('5', t11.b1) -- some = in the in list is not null filtering", + "select * from t0 left outer join t11 on a0=a1 where '5' in (t0.b0, t11.b1) -- some = in the in list is not null filtering", + "select * from t1 left outer join t2 on a1=a2 where not (b2 is NOT NULL AND c2 = 5) -- NOT case " + ] + } +] diff --git a/pkg/planner/core/casetest/rule/testdata/outer2inner_out.json b/pkg/planner/core/casetest/rule/testdata/outer2inner_out.json new file mode 100644 index 0000000000000..42dc0d93a4847 --- /dev/null +++ b/pkg/planner/core/casetest/rule/testdata/outer2inner_out.json @@ -0,0 +1,655 @@ +[ + { + "Name": "TestOuter2Inner", + "Cases": [ + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where b2 < 1 -- basic case of outer to inner join conversion", + "Plan": [ + "Projection 4150.01 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2", + "└─HashJoin 4150.01 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 3320.01 root data:Selection", + " │ └─Selection 3320.01 cop[tikv] lt(test.t2.b2, 1), not(isnull(test.t2.a2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where b2 is not null -- basic case of not null", + "Plan": [ + "Projection 12475.01 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2", + "└─HashJoin 12475.01 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 9980.01 root data:Selection", + " │ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a2)), not(isnull(test.t2.b2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where not(b2 is null) -- another form of basic case of not null", + "Plan": [ + "Projection 12475.01 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2", + "└─HashJoin 12475.01 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 9980.01 root data:Selection", + " │ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a2)), not(isnull(test.t2.b2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where c2 = 5 OR b2 < 55 -- case with A OR B (Both A and B are null filtering)", + "Plan": [ + "Projection 4158.35 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2", + "└─HashJoin 4158.35 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 3326.68 root data:Selection", + " │ └─Selection 3326.68 cop[tikv] not(isnull(test.t2.a2)), or(eq(test.t2.c2, 5), lt(test.t2.b2, 55))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where c2 = 5 AND b2 is null -- case with A AND B (A is null filtering and B is not)", + "Plan": [ + "Projection 0.01 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2", + "└─HashJoin 0.01 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 0.01 root data:Selection", + " │ └─Selection 0.01 cop[tikv] eq(test.t2.c2, 5), isnull(test.t2.b2), not(isnull(test.t2.a2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where b2 is NULL AND c2 = 5 -- case with A AND B (A is null filtering and B is not)", + "Plan": [ + "Projection 0.01 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2", + "└─HashJoin 0.01 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 0.01 root data:Selection", + " │ └─Selection 0.01 cop[tikv] eq(test.t2.c2, 5), isnull(test.t2.b2), not(isnull(test.t2.a2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where not (b2 is NULL OR c2 = 5) -- NOT case ", + "Plan": [ + "Projection 9990.00 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2", + "└─HashJoin 9990.00 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 7992.00 root data:Selection", + " │ └─Selection 7992.00 cop[tikv] and(not(isnull(test.t2.b2)), ne(test.t2.c2, 5)), not(isnull(test.t2.a2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where not (b2 is NULL AND c2 = 5) -- NOT case ", + "Plan": [ + "Projection 12483.33 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2", + "└─HashJoin 12483.33 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 9986.66 root data:Selection", + " │ └─Selection 9986.66 cop[tikv] not(isnull(test.t2.a2)), or(not(isnull(test.t2.b2)), ne(test.t2.c2, 5))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t2 left outer join t1 on a1=a2 where b1+b1 > 2; -- expression evaluates to UNKNOWN/FALSE", + "Plan": [ + "Projection 9990.00 root test.t2.a2, test.t2.b2, test.t2.c2, test.t1.a1, test.t1.b1, test.t1.c1", + "└─HashJoin 9990.00 root inner join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 7992.00 root data:Selection", + " │ └─Selection 7992.00 cop[tikv] gt(plus(test.t1.b1, test.t1.b1), 2), not(isnull(test.t1.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t2 left outer join t1 on a1=a2 where coalesce(b1,2) > 2; -- false condition for b1=NULL", + "Plan": [ + "Projection 9990.00 root test.t2.a2, test.t2.b2, test.t2.c2, test.t1.a1, test.t1.b1, test.t1.c1", + "└─HashJoin 9990.00 root inner join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 7992.00 root data:Selection", + " │ └─Selection 7992.00 cop[tikv] gt(coalesce(test.t1.b1, 2), 2), not(isnull(test.t1.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t2 left outer join t1 on a1=a2 where true and b1 = 5; -- AND with one branch is null filtering", + "Plan": [ + "Projection 12.49 root test.t2.a2, test.t2.b2, test.t2.c2, test.t1.a1, test.t1.b1, test.t1.c1", + "└─HashJoin 12.49 root inner join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9.99 root data:Selection", + " │ └─Selection 9.99 cop[tikv] eq(test.t1.b1, 5), not(isnull(test.t1.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t2 left outer join t1 on a1=a2 where false OR b1 = 5; -- OR with both branches are null filtering", + "Plan": [ + "Projection 12.49 root test.t2.a2, test.t2.b2, test.t2.c2, test.t1.a1, test.t1.b1, test.t1.c1", + "└─HashJoin 12.49 root inner join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9.99 root data:Selection", + " │ └─Selection 9.99 cop[tikv] not(isnull(test.t1.a1)), or(0, eq(test.t1.b1, 5))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t3 as t1 left join t3 as t2 on t1.c3 = t2.c3 where t2.b3 != NULL; -- self join", + "Plan": [ + "Projection 0.00 root test.t3.a3, test.t3.b3, test.t3.c3, test.t3.a3, test.t3.b3, test.t3.c3", + "└─HashJoin 0.00 root inner join, equal:[eq(test.t3.c3, test.t3.c3)]", + " ├─TableReader(Build) 0.00 root data:Selection", + " │ └─Selection 0.00 cop[tikv] ne(test.t3.b3, NULL), not(isnull(test.t3.c3))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t3.c3))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 ta left outer join (t1 tb left outer join t1 tc on tb.b1 = tc.b1) on ta.a1=tc.a1; -- nested join. On clause is null filtering on tc.", + "Plan": [ + "HashJoin 15593.77 root left outer join, equal:[eq(test.t1.a1, test.t1.a1)]", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:ta keep order:false, stats:pseudo", + "└─Projection(Probe) 12475.01 root test.t1.a1, test.t1.b1, test.t1.c1, test.t1.a1, test.t1.b1, test.t1.c1", + " └─HashJoin 12475.01 root inner join, equal:[eq(test.t1.b1, test.t1.b1)]", + " ├─TableReader(Build) 9980.01 root data:Selection", + " │ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a1)), not(isnull(test.t1.b1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:tc keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b1))", + " └─TableFullScan 10000.00 cop[tikv] table:tb keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 ta left outer join (t1 tb left outer join t1 tc on tb.b1 = tc.b1) on ta.a1=tc.a1 where tb.a1 > 5; -- nested join. On clause and WHERE clause are filters", + "Plan": [ + "Projection 5203.12 root test.t1.a1, test.t1.b1, test.t1.c1, test.t1.a1, test.t1.b1, test.t1.c1, test.t1.a1, test.t1.b1, test.t1.c1", + "└─HashJoin 5203.12 root inner join, equal:[eq(test.t1.a1, test.t1.a1)]", + " ├─HashJoin(Build) 4162.50 root inner join, equal:[eq(test.t1.b1, test.t1.b1)]", + " │ ├─TableReader(Build) 3330.00 root data:Selection", + " │ │ └─Selection 3330.00 cop[tikv] gt(test.t1.a1, 5), not(isnull(test.t1.b1))", + " │ │ └─TableFullScan 10000.00 cop[tikv] table:tb keep order:false, stats:pseudo", + " │ └─TableReader(Probe) 9980.01 root data:Selection", + " │ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a1)), not(isnull(test.t1.b1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:tc keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:ta keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from (t2 left join t1 on a1=a2) join t3 on b1=b3 -- on clause applied nested join", + "Plan": [ + "Projection 15593.77 root test.t2.a2, test.t2.b2, test.t2.c2, test.t1.a1, test.t1.b1, test.t1.c1, test.t3.a3, test.t3.b3, test.t3.c3", + "└─HashJoin 15593.77 root inner join, equal:[eq(test.t1.b1, test.t3.b3)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t3.b3))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", + " └─HashJoin(Probe) 12475.01 root inner join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9980.01 root data:Selection", + " │ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a1)), not(isnull(test.t1.b1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from ((t1 left join t2 on a1=a2) left join t3 on b2=b3) join t4 on b3=b4 -- nested and propagation of null filtering", + "Plan": [ + "Projection 19492.21 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2, test.t3.a3, test.t3.b3, test.t3.c3, test.t4.a4, test.t4.b4, test.t4.c4", + "└─HashJoin 19492.21 root inner join, equal:[eq(test.t3.b3, test.t4.b4)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t4.b4))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t4 keep order:false, stats:pseudo", + " └─HashJoin(Probe) 15593.77 root inner join, equal:[eq(test.t2.b2, test.t3.b3)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t3.b3))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", + " └─HashJoin(Probe) 12475.01 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 9980.01 root data:Selection", + " │ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a2)), not(isnull(test.t2.b2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 right join t2 on a1=a2 where exists (select 1 from t3 where b1=b3) -- semi join is null filtering on the outer join", + "Plan": [ + "HashJoin 9980.01 root semi join, equal:[eq(test.t1.b1, test.t3.b3)]", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t3.b3))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", + "└─HashJoin(Probe) 12475.01 root inner join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9980.01 root data:Selection", + " │ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a1)), not(isnull(test.t1.b1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select sum(l_extendedprice) / 7.0 as avg_yearly from lineitem, part where p_partkey = l_partkey and p_brand = 'Brand#44' and p_container = 'WRAP PKG' and l_quantity < ( select 0.2 * avg(l_quantity) from lineitem where l_partkey = p_partkey) -- Q17 in TPCH. null filter on derived outer join", + "Plan": [ + "Projection 1.00 root div(Column#15, 7.0)->Column#16", + "└─StreamAgg 1.00 root funcs:sum(test.lineitem.l_extendedprice)->Column#15", + " └─HashJoin 0.01 root inner join, equal:[eq(test.part.p_partkey, test.lineitem.l_partkey)], other cond:lt(test.lineitem.l_quantity, mul(0.2, Column#13))", + " ├─HashJoin(Build) 0.01 root inner join, equal:[eq(test.part.p_partkey, test.lineitem.l_partkey)]", + " │ ├─TableReader(Build) 0.01 root data:Selection", + " │ │ └─Selection 0.01 cop[tikv] eq(test.part.p_brand, \"Brand#44\"), eq(test.part.p_container, \"WRAP PKG\"), not(isnull(test.part.p_partkey))", + " │ │ └─TableFullScan 10000.00 cop[tikv] table:part keep order:false, stats:pseudo", + " │ └─TableReader(Probe) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.lineitem.l_partkey))", + " │ └─TableFullScan 10000.00 cop[tikv] table:lineitem keep order:false, stats:pseudo", + " └─HashAgg(Probe) 7992.00 root group by:test.lineitem.l_partkey, funcs:avg(Column#19, Column#20)->Column#13, funcs:firstrow(test.lineitem.l_partkey)->test.lineitem.l_partkey", + " └─TableReader 7992.00 root data:HashAgg", + " └─HashAgg 7992.00 cop[tikv] group by:test.lineitem.l_partkey, funcs:count(test.lineitem.l_quantity)->Column#19, funcs:sum(test.lineitem.l_quantity)->Column#20", + " └─Selection 9990.00 cop[tikv] not(isnull(test.lineitem.l_partkey))", + " └─TableFullScan 10000.00 cop[tikv] table:lineitem keep order:false, stats:pseudo" + ] + }, + { + "SQL": "WITH cte AS ( SELECT alias1.col_date AS field1 FROM d AS alias1 LEFT JOIN dd AS alias2 ON alias1.col_blob_key=alias2.col_blob_key WHERE alias1.col_varchar_key IS NULL OR alias1.col_blob_key >= 'a') DELETE FROM outr1.*, outr2.* USING d AS outr1 LEFT OUTER JOIN dd AS outr2 ON (outr1.col_date=outr2.col_date) JOIN cte AS outrcte ON outr2.col_blob_key=outrcte.field1 -- nested complex case", + "Plan": [ + "Delete N/A root N/A", + "└─Projection 6523.44 root test.d.pk, test.d.col_blob, test.d.col_blob_key, test.d.col_varchar_key, test.d.col_date, test.d.col_int_key, test.d._tidb_rowid, test.dd.pk, test.dd.col_blob, test.dd.col_blob_key, test.dd.col_date, test.dd.col_int_key, test.dd._tidb_rowid, test.d.col_date", + " └─HashJoin 6523.44 root inner join, equal:[eq(test.d.col_date, Column#41)]", + " ├─HashJoin(Build) 4175.00 root left outer join, equal:[eq(test.d.col_blob_key, test.dd.col_blob_key)]", + " │ ├─TableReader(Build) 3340.00 root data:Selection", + " │ │ └─Selection 3340.00 cop[tikv] or(isnull(test.d.col_varchar_key), ge(test.d.col_blob_key, \"a\"))", + " │ │ └─TableFullScan 10000.00 cop[tikv] table:alias1 keep order:false, stats:pseudo", + " │ └─TableReader(Probe) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.dd.col_blob_key))", + " │ └─TableFullScan 10000.00 cop[tikv] table:alias2 keep order:false, stats:pseudo", + " └─Projection(Probe) 12487.50 root test.d.pk, test.d.col_blob, test.d.col_blob_key, test.d.col_varchar_key, test.d.col_date, test.d.col_int_key, test.d._tidb_rowid, test.dd.pk, test.dd.col_blob, test.dd.col_blob_key, test.dd.col_date, test.dd.col_int_key, test.dd._tidb_rowid, cast(test.dd.col_blob_key, datetime(6) BINARY)->Column#41", + " └─HashJoin 12487.50 root inner join, equal:[eq(test.d.col_date, test.dd.col_date)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.dd.col_date))", + " │ └─TableFullScan 10000.00 cop[tikv] table:outr2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.d.col_date))", + " └─TableFullScan 10000.00 cop[tikv] table:outr1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "with cte as (select count(a2) as cnt,b2-5 as b3 from t1 left outer join t2 on a1=a2 group by b3) select * from cte where b3 > 1 -- aggregate case.", + "Plan": [ + "Projection 6393.60 root Column#21, minus(test.t2.b2, 5)->Column#22", + "└─Selection 6393.60 root gt(minus(test.t2.b2, 5), 1)", + " └─HashAgg 7992.00 root group by:Column#26, funcs:count(Column#24)->Column#21, funcs:firstrow(Column#25)->test.t2.b2", + " └─Projection 12487.50 root test.t2.a2->Column#24, test.t2.b2->Column#25, minus(test.t2.b2, 5)->Column#26", + " └─HashJoin 12487.50 root inner join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from dd as outr1 WHERE outr1.col_blob IN (SELECT DISTINCT innr1.col_blob_key AS y FROM d AS innrcte left outer join dd AS innr1 ON innr1.pk = innrcte.col_date WHERE outr1.col_int_key > 6)", + "Plan": [ + "Apply 9990.00 root semi join, equal:[eq(test.dd.col_blob, test.dd.col_blob_key)]", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.dd.col_blob))", + "│ └─TableFullScan 10000.00 cop[tikv] table:outr1 keep order:false, stats:pseudo", + "└─HashAgg(Probe) 63872064.00 root group by:test.dd.col_blob_key, funcs:firstrow(test.dd.col_blob_key)->test.dd.col_blob_key", + " └─HashJoin 99800100.00 root inner join, equal:[eq(Column#21, Column#20)]", + " ├─Projection(Build) 79920000.00 root cast(test.d.col_date, double BINARY)->Column#20", + " │ └─TableReader 79920000.00 root data:Selection", + " │ └─Selection 79920000.00 cop[tikv] gt(test.dd.col_int_key, 6)", + " │ └─TableFullScan 99900000.00 cop[tikv] table:innrcte keep order:false, stats:pseudo", + " └─Projection(Probe) 79840080.00 root test.dd.col_blob_key, cast(test.dd.pk, double BINARY)->Column#21", + " └─TableReader 79840080.00 root data:Selection", + " └─Selection 79840080.00 cop[tikv] gt(test.dd.col_int_key, 6), not(isnull(test.dd.col_blob_key))", + " └─TableFullScan 99900000.00 cop[tikv] table:innr1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t0 left outer join t11 on a0=a1 where t0.b0 in (t11.b1, t11.c1) -- each = in the in list is null filtering", + "Plan": [ + "HashJoin 12487.50 root inner join, equal:[eq(test.t0.a0, test.t11.a1)], other cond:in(test.t0.b0, test.t11.b1, test.t11.c1)", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t0.a0))", + " └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where b2 is null -- negative case with single predicate which is not null filtering", + "Plan": [ + "Selection 9990.00 root isnull(test.t2.b2)", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where c2 = 5 OR b2 is null -- negative case with A OR B (A is null filtering and B is not)", + "Plan": [ + "Selection 9990.00 root or(eq(test.t2.c2, 5), isnull(test.t2.b2))", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where not(b2 is not null) -- nested 'not' negative case", + "Plan": [ + "Selection 9990.00 root not(not(isnull(test.t2.b2)))", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where not(not(b2 is null)) -- nested 'not' negative case", + "Plan": [ + "Selection 9990.00 root not(not(isnull(test.t2.b2)))", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where b1 is not null -- negative case with condition on outer table.", + "Plan": [ + "HashJoin 12487.50 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b1))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t2 left outer join t1 on a1=a2 where coalesce(b1,2) = 2; -- true condition for b1=NULL", + "Plan": [ + "Selection 9990.00 root eq(coalesce(test.t1.b1, 2), 2)", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t2 left outer join t1 on a1=a2 where true OR b1 = 5; -- negative case with OR and one branch is TRUE", + "Plan": [ + "Selection 9990.00 root or(1, eq(test.t1.b1, 5))", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t3 as t1 left join t3 as t2 on t1.c3 = t2.c3 where t1.b3 != NULL -- negative case with self join", + "Plan": [ + "HashJoin 0.00 root left outer join, equal:[eq(test.t3.c3, test.t3.c3)]", + "├─TableReader(Build) 0.00 root data:Selection", + "│ └─Selection 0.00 cop[tikv] ne(test.t3.b3, NULL)", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t3.c3))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from (t1 left outer join t2 on a1=a2) left outer join t3 on a2=a3 and b2 = 5 -- negative case. inner side is not a join", + "Plan": [ + "HashJoin 15609.38 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─HashJoin(Probe) 12487.50 root left outer join, equal:[eq(test.t2.a2, test.t3.a3)], left cond:[eq(test.t2.b2, 5)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t3.a3))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 ta right outer join (t1 tb right outer join t1 tc on tb.b1 = tc.b1) on ta.a1=tc.a1; -- negative case. inner side is not a join", + "Plan": [ + "Projection 15593.77 root test.t1.a1, test.t1.b1, test.t1.c1, test.t1.a1, test.t1.b1, test.t1.c1, test.t1.a1, test.t1.b1, test.t1.c1", + "└─HashJoin 15593.77 root right outer join, equal:[eq(test.t1.b1, test.t1.b1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:tb keep order:false, stats:pseudo", + " └─HashJoin(Probe) 12487.50 root right outer join, equal:[eq(test.t1.a1, test.t1.a1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:ta keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:tc keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 ta right outer join (t1 tb right outer join t1 tc on tb.b1 = tc.b1) on ta.a1=tc.a1 where tc.a1 > 5; -- negative case. inner side is not a join and WHERE clause on outer table", + "Plan": [ + "Projection 5208.33 root test.t1.a1, test.t1.b1, test.t1.c1, test.t1.a1, test.t1.b1, test.t1.c1, test.t1.a1, test.t1.b1, test.t1.c1", + "└─HashJoin 5208.33 root right outer join, equal:[eq(test.t1.b1, test.t1.b1)]", + " ├─HashJoin(Build) 4166.67 root right outer join, equal:[eq(test.t1.a1, test.t1.a1)]", + " │ ├─TableReader(Build) 3333.33 root data:Selection", + " │ │ └─Selection 3333.33 cop[tikv] gt(test.t1.a1, 5)", + " │ │ └─TableFullScan 10000.00 cop[tikv] table:tc keep order:false, stats:pseudo", + " │ └─TableReader(Probe) 3333.33 root data:Selection", + " │ └─Selection 3333.33 cop[tikv] gt(test.t1.a1, 5), not(isnull(test.t1.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:ta keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b1))", + " └─TableFullScan 10000.00 cop[tikv] table:tb keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from (t2 left join t1 on a1=a2) join t3 on b2=b3 -- negative case, on clause on outer table in nested join", + "Plan": [ + "HashJoin 15609.38 root inner join, equal:[eq(test.t2.b2, test.t3.b3)]", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t3.b3))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo", + "└─HashJoin(Probe) 12487.50 root left outer join, equal:[eq(test.t2.a2, test.t1.a1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.b2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select t1.c1 in (select count(s.b1) from t1 s where s.a1 = t1.a1) from t1 -- subquery test that generates outer join and not converted", + "Plan": [ + "Projection 10000.00 root Column#14", + "└─Apply 10000.00 root CARTESIAN left outer semi join, other cond:eq(test.t1.c1, Column#13)", + " ├─TableReader(Build) 10000.00 root data:TableFullScan", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─StreamAgg(Probe) 10000.00 root funcs:count(Column#16)->Column#13", + " └─TableReader 10000.00 root data:StreamAgg", + " └─StreamAgg 10000.00 cop[tikv] funcs:count(test.t1.b1)->Column#16", + " └─Selection 100000.00 cop[tikv] eq(test.t1.a1, test.t1.a1)", + " └─TableFullScan 100000000.00 cop[tikv] table:s keep order:false, stats:pseudo" + ] + }, + { + "SQL": "SELECT * FROM ti LEFT JOIN (SELECT i FROM ti WHERE FALSE) AS d1 ON ti.i = d1.i WHERE NOT EXISTS (SELECT 1 FROM ti AS inner_t1 WHERE i = d1.i) -- anti semi join", + "Plan": [ + "HashJoin 8000.00 root anti semi join, equal:[eq(test.ti.i, test.ti.i)]", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:inner_t1 keep order:false, stats:pseudo", + "└─HashJoin(Probe) 10000.00 root left outer join, equal:[eq(test.ti.i, test.ti.i)]", + " ├─Selection(Build) 0.00 root not(isnull(test.ti.i))", + " │ └─TableDual 0.00 root rows:0", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:ti keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select count(*) from t1 where t1.a1+100 > ( select count(*) from t2 where t1.a1=t2.a2 and t1.b1=t2.b2) group by t1.b1 -- filter not filtering over derived outer join", + "Plan": [ + "HashAgg 6400.00 root group by:test.t1.b1, funcs:count(1)->Column#10", + "└─Selection 8000.00 root gt(plus(test.t1.a1, 100), ifnull(Column#9, 0))", + " └─HashJoin 10000.00 root left outer join, equal:[eq(test.t1.a1, test.t2.a2) eq(test.t1.b1, test.t2.b2)]", + " ├─HashAgg(Build) 7984.01 root group by:test.t2.a2, test.t2.b2, funcs:count(Column#11)->Column#9, funcs:firstrow(test.t2.a2)->test.t2.a2, funcs:firstrow(test.t2.b2)->test.t2.b2", + " │ └─TableReader 7984.01 root data:HashAgg", + " │ └─HashAgg 7984.01 cop[tikv] group by:test.t2.a2, test.t2.b2, funcs:count(1)->Column#11", + " │ └─Selection 9980.01 cop[tikv] not(isnull(test.t2.a2)), not(isnull(test.t2.b2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "with cte as (select count(a2) as cnt,ifnull(b2,5) as b2 from t1 left outer join t2 on a1=a2 group by b2) select * from cte where b2 > 1 -- non null filter on group by", + "Plan": [ + "Projection 6393.60 root Column#21, ifnull(test.t2.b2, 5)->Column#22", + "└─HashAgg 6393.60 root group by:test.t2.b2, funcs:count(test.t2.a2)->Column#21, funcs:firstrow(test.t2.b2)->test.t2.b2", + " └─Selection 9990.00 root gt(ifnull(test.t2.b2, 5), 1)", + " └─HashJoin 12487.50 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 10000.00 root data:TableFullScan", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "with cte as (select count(a2) as cnt,ifnull(b2,5) as b2 from t1 left outer join t2 on a1=a2 group by b2) select * from cte where cnt > 1 -- filter on aggregates not applicable", + "Plan": [ + "Projection 6393.60 root Column#21, ifnull(test.t2.b2, 5)->Column#22", + "└─Selection 6393.60 root gt(Column#21, 1)", + " └─HashAgg 7992.00 root group by:test.t2.b2, funcs:count(test.t2.a2)->Column#21, funcs:firstrow(test.t2.b2)->test.t2.b2", + " └─HashJoin 12487.50 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 10000.00 root data:TableFullScan", + " │ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + " └─TableReader(Probe) 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t0 left outer join t11 on a0=a1 where t0.b0 in (t0.b0, t11.b1)", + "Plan": [ + "Selection 9990.00 root in(test.t0.b0, test.t0.b0, test.t11.b1)", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t0 left outer join t11 on a0=a1 where '5' not in (t0.b0, t11.b1)", + "Plan": [ + "Selection 9990.00 root not(in(\"5\", test.t0.b0, test.t11.b1))", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t0 left outer join t11 on a0=a1 where '1' in (t0.b0, t11.b1)", + "Plan": [ + "Selection 9990.00 root in(\"1\", test.t0.b0, test.t11.b1)", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t0 left outer join t11 on a0=a1 where t0.b0 in ('5', t11.b1) -- some = in the in list is not null filtering", + "Plan": [ + "Selection 9990.00 root in(test.t0.b0, \"5\", test.t11.b1)", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t0 left outer join t11 on a0=a1 where '5' in (t0.b0, t11.b1) -- some = in the in list is not null filtering", + "Plan": [ + "Selection 9990.00 root in(\"5\", test.t0.b0, test.t11.b1)", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo" + ] + }, + { + "SQL": "select * from t1 left outer join t2 on a1=a2 where not (b2 is NOT NULL AND c2 = 5) -- NOT case ", + "Plan": [ + "Selection 9990.00 root not(and(not(isnull(test.t2.b2)), eq(test.t2.c2, 5)))", + "└─HashJoin 12487.50 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]", + " ├─TableReader(Build) 9990.00 root data:Selection", + " │ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))", + " │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + " └─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ] + } + ] + } +] diff --git a/pkg/planner/core/rule_outer_to_inner_join.go b/pkg/planner/core/rule_outer_to_inner_join.go new file mode 100644 index 0000000000000..83a8244993a12 --- /dev/null +++ b/pkg/planner/core/rule_outer_to_inner_join.go @@ -0,0 +1,150 @@ +// Copyright 2024 PingCAP, Inc. + +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "context" + + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/planner/core/base" + "github.com/pingcap/tidb/pkg/planner/util" + "github.com/pingcap/tidb/pkg/planner/util/optimizetrace" +) + +func mergeOnClausePredicates(p *LogicalJoin, predicates []expression.Expression) []expression.Expression { + combinedCond := make([]expression.Expression, 0, + len(p.LeftConditions)+len(p.RightConditions)+ + len(p.EqualConditions)+len(p.OtherConditions)+ + len(predicates)) + combinedCond = append(combinedCond, p.LeftConditions...) + combinedCond = append(combinedCond, p.RightConditions...) + combinedCond = append(combinedCond, expression.ScalarFuncs2Exprs(p.EqualConditions)...) + combinedCond = append(combinedCond, p.OtherConditions...) + combinedCond = append(combinedCond, predicates...) + return combinedCond +} + +// convertOuterToInnerJoin converts outer to inner joins if the unmtaching rows are filtered. +type convertOuterToInnerJoin struct { +} + +// convertOuterToInnerJoin is refactoring of the outer to inner join logic that used to be part of predicate push down. +// The rewrite passes down predicates from selection (WHERE clause) and join predicates (ON clause). +// All nodes except LogicalJoin are pass through where the rewrite is done for the child and nothing for the node itself. +// The main logic is applied for joins: +// 1. Traversal is preorder and the passed down predicate is checked for the left/right after join +// 2. The ON clause and passed down predicate (from higher selects or joins) are comined and applied to join children. +// This logic depends on the join type with the following logic: +// - For left/right outer joins, the ON clause an be applied only on the inner side (null producing side) +// - For inner/semi joins, the ON clause can be applied on both children +// - For anti semi joins, ON clause applied only on left side +// - For all other cases, do not pass ON clause. +func (*convertOuterToInnerJoin) optimize(_ context.Context, p base.LogicalPlan, _ *optimizetrace.LogicalOptimizeOp) (base.LogicalPlan, bool, error) { + planChanged := false + return p.ConvertOuterToInnerJoin(nil), planChanged, nil +} + +// LogicalAggregation just works since schema = child + aggregate expressions. No need to map predicates. +// Also, predicates involving aggregate expressions are not null filtering. IsNullReject always returns +// false for those cases. + +// ConvertOuterToInnerJoin implements base.LogicalPlan ConvertOuterToInnerJoin interface. +func (s *baseLogicalPlan) ConvertOuterToInnerJoin(predicates []expression.Expression) base.LogicalPlan { + p := s.self + for i, child := range p.Children() { + newChild := child.ConvertOuterToInnerJoin(predicates) + p.SetChild(i, newChild) + } + return p +} + +// ConvertOuterToInnerJoin implements base.LogicalPlan ConvertOuterToInnerJoin interface. +func (p *LogicalJoin) ConvertOuterToInnerJoin(predicates []expression.Expression) base.LogicalPlan { + innerTable := p.Children()[0] + outerTable := p.Children()[1] + switchChild := false + + if p.JoinType == LeftOuterJoin { + innerTable, outerTable = outerTable, innerTable + switchChild = true + } + + // First, simplify this join + if p.JoinType == LeftOuterJoin || p.JoinType == RightOuterJoin { + canBeSimplified := false + for _, expr := range predicates { + isOk := util.IsNullRejected(p.SCtx(), innerTable.Schema(), expr) + if isOk { + canBeSimplified = true + break + } + } + if canBeSimplified { + p.JoinType = InnerJoin + } + } + + // Next simplify join children + + combinedCond := mergeOnClausePredicates(p, predicates) + if p.JoinType == LeftOuterJoin || p.JoinType == RightOuterJoin { + innerTable = innerTable.ConvertOuterToInnerJoin(combinedCond) + outerTable = outerTable.ConvertOuterToInnerJoin(predicates) + } else if p.JoinType == InnerJoin || p.JoinType == SemiJoin { + innerTable = innerTable.ConvertOuterToInnerJoin(combinedCond) + outerTable = outerTable.ConvertOuterToInnerJoin(combinedCond) + } else if p.JoinType == AntiSemiJoin { + innerTable = innerTable.ConvertOuterToInnerJoin(predicates) + outerTable = outerTable.ConvertOuterToInnerJoin(combinedCond) + } else { + innerTable = innerTable.ConvertOuterToInnerJoin(predicates) + outerTable = outerTable.ConvertOuterToInnerJoin(predicates) + } + + if switchChild { + p.SetChild(0, outerTable) + p.SetChild(1, innerTable) + } else { + p.SetChild(0, innerTable) + p.SetChild(1, outerTable) + } + + return p +} + +func (*convertOuterToInnerJoin) name() string { + return "convert_outer_to_inner_joins" +} + +// ConvertOuterToInnerJoin implements base.LogicalPlan ConvertOuterToInnerJoin interface. +func (s *LogicalSelection) ConvertOuterToInnerJoin(predicates []expression.Expression) base.LogicalPlan { + p := s.self.(*LogicalSelection) + combinedCond := append(predicates, p.Conditions...) + child := p.Children()[0] + child = child.ConvertOuterToInnerJoin(combinedCond) + p.SetChildren(child) + return p +} + +// ConvertOuterToInnerJoin implements base.LogicalPlan ConvertOuterToInnerJoin interface. +func (s *LogicalProjection) ConvertOuterToInnerJoin(predicates []expression.Expression) base.LogicalPlan { + p := s.self.(*LogicalProjection) + canBePushed, _ := BreakDownPredicates(p, predicates) + child := p.Children()[0] + child = child.ConvertOuterToInnerJoin(canBePushed) + p.SetChildren(child) + return p +} diff --git a/pkg/planner/util/BUILD.bazel b/pkg/planner/util/BUILD.bazel index 5c07c753230cd..5337badb5bd1e 100644 --- a/pkg/planner/util/BUILD.bazel +++ b/pkg/planner/util/BUILD.bazel @@ -14,7 +14,15 @@ go_library( "//pkg/kv", "//pkg/parser/ast", "//pkg/parser/model", +<<<<<<< HEAD "//pkg/sessionctx", +======= + "//pkg/parser/mysql", + "//pkg/planner/context", + "//pkg/planner/core/base", + "//pkg/sessionctx/stmtctx", + "//pkg/tablecodec", +>>>>>>> 61a29811293 (Optimizer: Fix null filtering logic for IN list (#53370)) "//pkg/types", "//pkg/util/collate", "//pkg/util/ranger", diff --git a/pkg/planner/util/null_misc.go b/pkg/planner/util/null_misc.go new file mode 100644 index 0000000000000..dbf5ee1393b01 --- /dev/null +++ b/pkg/planner/util/null_misc.go @@ -0,0 +1,117 @@ +// Copyright 2024 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "github.com/pingcap/tidb/pkg/expression" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/planner/context" + "github.com/pingcap/tidb/pkg/planner/core/base" +) + +// allConstants checks if only the expression has only constants. +func allConstants(ctx expression.BuildContext, expr expression.Expression) bool { + if expression.MaybeOverOptimized4PlanCache(ctx, []expression.Expression{expr}) { + return false // expression contains non-deterministic parameter + } + switch v := expr.(type) { + case *expression.ScalarFunction: + for _, arg := range v.GetArgs() { + if !allConstants(ctx, arg) { + return false + } + } + return true + case *expression.Constant: + return true + } + return false +} + +// isNullRejectedInList checks null filter for IN list using OR logic. +// Reason is that null filtering through evaluation by isNullRejectedSimpleExpr +// has problems with IN list. For example, constant in (outer-table.col1, inner-table.col2) +// is not null rejecting since constant in (outer-table.col1, NULL) is not false/unknown. +func isNullRejectedInList(ctx base.PlanContext, expr *expression.ScalarFunction, innerSchema *expression.Schema) bool { + for i, arg := range expr.GetArgs() { + if i > 0 { + newArgs := make([]expression.Expression, 0, 2) + newArgs = append(newArgs, expr.GetArgs()[0]) + newArgs = append(newArgs, arg) + eQCondition, err := expression.NewFunction(ctx.GetExprCtx(), ast.EQ, expr.GetType(), newArgs...) + if err != nil { + return false + } + if !(isNullRejectedSimpleExpr(ctx, innerSchema, eQCondition)) { + return false + } + } + } + return true +} + +// IsNullRejected takes care of complex predicates like this: +// IsNullRejected(A OR B) = IsNullRejected(A) AND IsNullRejected(B) +// IsNullRejected(A AND B) = IsNullRejected(A) OR IsNullRejected(B) +func IsNullRejected(ctx base.PlanContext, innerSchema *expression.Schema, predicate expression.Expression) bool { + predicate = expression.PushDownNot(ctx.GetNullRejectCheckExprCtx(), predicate) + if expression.ContainOuterNot(predicate) { + return false + } + + switch expr := predicate.(type) { + case *expression.ScalarFunction: + if expr.FuncName.L == ast.LogicAnd { + if IsNullRejected(ctx, innerSchema, expr.GetArgs()[0]) { + return true + } + return IsNullRejected(ctx, innerSchema, expr.GetArgs()[1]) + } else if expr.FuncName.L == ast.LogicOr { + if !(IsNullRejected(ctx, innerSchema, expr.GetArgs()[0])) { + return false + } + return IsNullRejected(ctx, innerSchema, expr.GetArgs()[1]) + } else if expr.FuncName.L == ast.In { + return isNullRejectedInList(ctx, expr, innerSchema) + } else { + return isNullRejectedSimpleExpr(ctx, innerSchema, expr) + } + default: + return isNullRejectedSimpleExpr(ctx, innerSchema, predicate) + } +} + +// isNullRejectedSimpleExpr check whether a condition is null-rejected +// A condition would be null-rejected in one of following cases: +// If it is a predicate containing a reference to an inner table (null producing side) that evaluates +// to UNKNOWN or FALSE when one of its arguments is NULL. +func isNullRejectedSimpleExpr(ctx context.PlanContext, schema *expression.Schema, expr expression.Expression) bool { + // The expression should reference at least one field in innerSchema or all constants. + if !expression.ExprReferenceSchema(expr, schema) && !allConstants(ctx.GetExprCtx(), expr) { + return false + } + exprCtx := ctx.GetNullRejectCheckExprCtx() + sc := ctx.GetSessionVars().StmtCtx + result := expression.EvaluateExprWithNull(exprCtx, schema, expr) + x, ok := result.(*expression.Constant) + if ok { + if x.Value.IsNull() { + return true + } else if isTrue, err := x.Value.ToBool(sc.TypeCtxOrDefault()); err == nil && isTrue == 0 { + return true + } + } + return false +}