From 179464d557614131434ecdef68fae023d0e29f4e Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Tue, 14 May 2024 16:28:12 +0800 Subject: [PATCH 1/4] This is an automated cherry-pick of #52801 Signed-off-by: ti-chi-bot --- cmd/explaintest/r/select.result | 5 ++++ pkg/planner/core/casetest/BUILD.bazel | 33 +++++++++++++++++++++++++++ planner/core/casetest/plan_test.go | 22 ++++++++++++++++++ planner/core/expression_rewriter.go | 30 +++++++++++++++++++++--- 4 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 pkg/planner/core/casetest/BUILD.bazel diff --git a/cmd/explaintest/r/select.result b/cmd/explaintest/r/select.result index 70101f0218ca8..95566f9de7a2d 100644 --- a/cmd/explaintest/r/select.result +++ b/cmd/explaintest/r/select.result @@ -413,8 +413,13 @@ explain format = 'brief' select a = all (select a from t t2) from t t1; id estRows task access object operator info Projection 10000.00 root or(and(and(le(Column#11, 1), eq(test.t.a, Column#10)), if(ne(Column#12, 0), , 1)), or(eq(Column#13, 0), if(isnull(test.t.a), , 0)))->Column#14 └─HashJoin 10000.00 root CARTESIAN inner join +<<<<<<< HEAD:cmd/explaintest/r/select.result ├─StreamAgg(Build) 1.00 root funcs:firstrow(Column#16)->Column#10, funcs:count(distinct Column#17)->Column#11, funcs:sum(Column#18)->Column#12, funcs:count(1)->Column#13 │ └─Projection 10000.00 root test.t.a, test.t.a, cast(isnull(test.t.a), decimal(20,0) BINARY)->Column#18 +======= + ├─StreamAgg(Build) 1.00 root funcs:max(Column#16)->Column#10, funcs:count(distinct Column#17)->Column#11, funcs:sum(Column#18)->Column#12, funcs:count(1)->Column#13 + │ └─Projection 10000.00 root select.t.a->Column#16, select.t.a->Column#17, cast(isnull(select.t.a), decimal(20,0) BINARY)->Column#18 +>>>>>>> 98e5cfbd1c5 (planner: fix wrong behavior for = all() (#52801)):tests/integrationtest/r/select.result │ └─TableReader 10000.00 root data:TableFullScan │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo └─TableReader(Probe) 10000.00 root data:TableFullScan diff --git a/pkg/planner/core/casetest/BUILD.bazel b/pkg/planner/core/casetest/BUILD.bazel new file mode 100644 index 0000000000000..6c62171aef90d --- /dev/null +++ b/pkg/planner/core/casetest/BUILD.bazel @@ -0,0 +1,33 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_test") + +go_test( + name = "casetest_test", + timeout = "moderate", + srcs = [ + "integration_test.go", + "main_test.go", + "plan_test.go", + "stats_test.go", + "tiflash_selection_late_materialization_test.go", + ], + data = glob(["testdata/**"]), + flaky = True, + shard_count = 24, + deps = [ + "//pkg/domain", + "//pkg/parser", + "//pkg/parser/model", + "//pkg/planner/core", + "//pkg/planner/core/base", + "//pkg/planner/property", + "//pkg/testkit", + "//pkg/testkit/testdata", + "//pkg/testkit/testmain", + "//pkg/testkit/testsetup", + "//pkg/util/hint", + "//pkg/util/plancodec", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_stretchr_testify//require", + "@org_uber_go_goleak//:goleak", + ], +) diff --git a/planner/core/casetest/plan_test.go b/planner/core/casetest/plan_test.go index 3c2814d8b6721..decff150c9341 100644 --- a/planner/core/casetest/plan_test.go +++ b/planner/core/casetest/plan_test.go @@ -270,3 +270,25 @@ func TestJSONPlanInExplain(t *testing.T) { } } } + +func TestHandleEQAll(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("CREATE TABLE t1 (c1 int, c2 int, UNIQUE i1 (c1, c2));") + tk.MustExec("INSERT INTO t1 VALUES (7, null),(5,1);") + tk.MustQuery("SELECT c1 FROM t1 WHERE ('m' = ALL (SELECT /*+ IGNORE_INDEX(t1, i1) */ c2 FROM t1)) IS NOT UNKNOWN; ").Check(testkit.Rows("5", "7")) + tk.MustQuery("SELECT c1 FROM t1 WHERE ('m' = ALL (SELECT /*+ use_INDEX(t1, i1) */ c2 FROM t1)) IS NOT UNKNOWN; ").Check(testkit.Rows("5", "7")) + tk.MustQuery("select (null = ALL (SELECT /*+ NO_INDEX() */ c2 FROM t1)) IS NOT UNKNOWN").Check(testkit.Rows("0")) + tk.MustExec("CREATE TABLE t2 (c1 int, c2 int, UNIQUE i1 (c1, c2));") + tk.MustExec("INSERT INTO t2 VALUES (7, null),(5,null);") + tk.MustQuery("select (null = ALL (SELECT /*+ NO_INDEX() */ c2 FROM t2)) IS NOT UNKNOWN").Check(testkit.Rows("0")) + tk.MustQuery("SELECT c1 FROM t2 WHERE ('m' = ALL (SELECT /*+ IGNORE_INDEX(t2, i1) */ c2 FROM t2)) IS NOT UNKNOWN; ").Check(testkit.Rows()) + tk.MustQuery("SELECT c1 FROM t2 WHERE ('m' = ALL (SELECT /*+ use_INDEX(t2, i1) */ c2 FROM t2)) IS NOT UNKNOWN; ").Check(testkit.Rows()) + tk.MustExec("truncate table t2") + tk.MustExec("INSERT INTO t2 VALUES (7, null),(7,null);") + tk.MustQuery("select c1 from t2 where (c1 = all (select /*+ IGNORE_INDEX(t2, i1) */ c1 from t2))").Check(testkit.Rows("7", "7")) + tk.MustQuery("select c1 from t2 where (c1 = all (select /*+ use_INDEX(t2, i1) */ c1 from t2))").Check(testkit.Rows("7", "7")) + tk.MustQuery("select c2 from t2 where (c2 = all (select /*+ IGNORE_INDEX(t2, i1) */ c2 from t2))").Check(testkit.Rows()) + tk.MustQuery("select c2 from t2 where (c2 = all (select /*+ use_INDEX(t2, i1) */ c2 from t2))").Check(testkit.Rows()) +} diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 80c310dedde8a..1c151220ca6c8 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -777,8 +777,18 @@ func (er *expressionRewriter) handleNEAny(lexpr, rexpr expression.Expression, np // handleEQAll handles the case of = all. For example, if the query is t.id = all (select s.id from s), it will be rewrote to // t.id = (select s.id from s having count(distinct s.id) <= 1 and [all checker]). +<<<<<<< HEAD:planner/core/expression_rewriter.go func (er *expressionRewriter) handleEQAll(lexpr, rexpr expression.Expression, np LogicalPlan, markNoDecorrelate bool) { firstRowFunc, err := aggregation.NewAggFuncDesc(er.sctx, ast.AggFuncFirstRow, []expression.Expression{rexpr}, false) +======= +func (er *expressionRewriter) handleEQAll(planCtx *exprRewriterPlanCtx, lexpr, rexpr expression.Expression, np base.LogicalPlan, markNoDecorrelate bool) { + intest.AssertNotNil(planCtx) + sctx := planCtx.builder.ctx + exprCtx := sctx.GetExprCtx() + // If there is NULL in s.id column, s.id should be the value that isn't null in condition t.id == s.id. + // So use function max to filter NULL. + maxFunc, err := aggregation.NewAggFuncDesc(exprCtx, ast.AggFuncMax, []expression.Expression{rexpr}, false) +>>>>>>> 98e5cfbd1c5 (planner: fix wrong behavior for = all() (#52801)):pkg/planner/core/expression_rewriter.go if err != nil { er.err = err return @@ -789,14 +799,23 @@ func (er *expressionRewriter) handleEQAll(lexpr, rexpr expression.Expression, np return } plan4Agg := LogicalAggregation{ +<<<<<<< HEAD:planner/core/expression_rewriter.go AggFuncs: []*aggregation.AggFuncDesc{firstRowFunc, countFunc}, }.Init(er.sctx, er.b.getSelectOffset()) if hint := er.b.TableHints(); hint != nil { plan4Agg.aggHints = hint.aggHints +======= + AggFuncs: []*aggregation.AggFuncDesc{maxFunc, countFunc}, + }.Init(sctx, planCtx.builder.getSelectOffset()) + if hintinfo := planCtx.builder.TableHints(); hintinfo != nil { + plan4Agg.PreferAggType = hintinfo.PreferAggType + plan4Agg.PreferAggToCop = hintinfo.PreferAggToCop +>>>>>>> 98e5cfbd1c5 (planner: fix wrong behavior for = all() (#52801)):pkg/planner/core/expression_rewriter.go } plan4Agg.SetChildren(np) plan4Agg.names = append(plan4Agg.names, types.EmptyName) +<<<<<<< HEAD:planner/core/expression_rewriter.go // Currently, firstrow agg function is treated like the exact representation of aggregate group key, // so the data type is the same with group key, even if the group key is not null. // However, the return type of firstrow should be nullable, we clear the null flag here instead of @@ -810,16 +829,21 @@ func (er *expressionRewriter) handleEQAll(lexpr, rexpr expression.Expression, np firstRowResultCol := &expression.Column{ UniqueID: er.sctx.GetSessionVars().AllocPlanColumnID(), RetType: firstRowFunc.RetTp, +======= + maxResultCol := &expression.Column{ + UniqueID: sctx.GetSessionVars().AllocPlanColumnID(), + RetType: maxFunc.RetTp, +>>>>>>> 98e5cfbd1c5 (planner: fix wrong behavior for = all() (#52801)):pkg/planner/core/expression_rewriter.go } - firstRowResultCol.SetCoercibility(rexpr.Coercibility()) + maxResultCol.SetCoercibility(rexpr.Coercibility()) plan4Agg.names = append(plan4Agg.names, types.EmptyName) count := &expression.Column{ UniqueID: er.sctx.GetSessionVars().AllocPlanColumnID(), RetType: countFunc.RetTp, } - plan4Agg.SetSchema(expression.NewSchema(firstRowResultCol, count)) + plan4Agg.SetSchema(expression.NewSchema(maxResultCol, count)) leFunc := expression.NewFunctionInternal(er.sctx, ast.LE, types.NewFieldType(mysql.TypeTiny), count, expression.NewOne()) - eqCond := expression.NewFunctionInternal(er.sctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), lexpr, firstRowResultCol) + eqCond := expression.NewFunctionInternal(er.sctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), lexpr, maxResultCol) cond := expression.ComposeCNFCondition(er.sctx, leFunc, eqCond) er.buildQuantifierPlan(plan4Agg, cond, lexpr, rexpr, true, markNoDecorrelate) } From 8497cfb4f480d46cb95260fea77a7253099c5afe Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Mon, 2 Sep 2024 17:11:34 +0800 Subject: [PATCH 2/4] update Signed-off-by: Weizhen Wang --- cmd/explaintest/r/select.result | 5 ---- pkg/planner/core/casetest/BUILD.bazel | 33 ----------------------- planner/core/expression_rewriter.go | 38 +++------------------------ 3 files changed, 3 insertions(+), 73 deletions(-) delete mode 100644 pkg/planner/core/casetest/BUILD.bazel diff --git a/cmd/explaintest/r/select.result b/cmd/explaintest/r/select.result index 95566f9de7a2d..90fb8a1c4fe93 100644 --- a/cmd/explaintest/r/select.result +++ b/cmd/explaintest/r/select.result @@ -413,13 +413,8 @@ explain format = 'brief' select a = all (select a from t t2) from t t1; id estRows task access object operator info Projection 10000.00 root or(and(and(le(Column#11, 1), eq(test.t.a, Column#10)), if(ne(Column#12, 0), , 1)), or(eq(Column#13, 0), if(isnull(test.t.a), , 0)))->Column#14 └─HashJoin 10000.00 root CARTESIAN inner join -<<<<<<< HEAD:cmd/explaintest/r/select.result - ├─StreamAgg(Build) 1.00 root funcs:firstrow(Column#16)->Column#10, funcs:count(distinct Column#17)->Column#11, funcs:sum(Column#18)->Column#12, funcs:count(1)->Column#13 - │ └─Projection 10000.00 root test.t.a, test.t.a, cast(isnull(test.t.a), decimal(20,0) BINARY)->Column#18 -======= ├─StreamAgg(Build) 1.00 root funcs:max(Column#16)->Column#10, funcs:count(distinct Column#17)->Column#11, funcs:sum(Column#18)->Column#12, funcs:count(1)->Column#13 │ └─Projection 10000.00 root select.t.a->Column#16, select.t.a->Column#17, cast(isnull(select.t.a), decimal(20,0) BINARY)->Column#18 ->>>>>>> 98e5cfbd1c5 (planner: fix wrong behavior for = all() (#52801)):tests/integrationtest/r/select.result │ └─TableReader 10000.00 root data:TableFullScan │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo └─TableReader(Probe) 10000.00 root data:TableFullScan diff --git a/pkg/planner/core/casetest/BUILD.bazel b/pkg/planner/core/casetest/BUILD.bazel deleted file mode 100644 index 6c62171aef90d..0000000000000 --- a/pkg/planner/core/casetest/BUILD.bazel +++ /dev/null @@ -1,33 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_test") - -go_test( - name = "casetest_test", - timeout = "moderate", - srcs = [ - "integration_test.go", - "main_test.go", - "plan_test.go", - "stats_test.go", - "tiflash_selection_late_materialization_test.go", - ], - data = glob(["testdata/**"]), - flaky = True, - shard_count = 24, - deps = [ - "//pkg/domain", - "//pkg/parser", - "//pkg/parser/model", - "//pkg/planner/core", - "//pkg/planner/core/base", - "//pkg/planner/property", - "//pkg/testkit", - "//pkg/testkit/testdata", - "//pkg/testkit/testmain", - "//pkg/testkit/testsetup", - "//pkg/util/hint", - "//pkg/util/plancodec", - "@com_github_pingcap_failpoint//:failpoint", - "@com_github_stretchr_testify//require", - "@org_uber_go_goleak//:goleak", - ], -) diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 1c151220ca6c8..28be5f3a85316 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -777,18 +777,10 @@ func (er *expressionRewriter) handleNEAny(lexpr, rexpr expression.Expression, np // handleEQAll handles the case of = all. For example, if the query is t.id = all (select s.id from s), it will be rewrote to // t.id = (select s.id from s having count(distinct s.id) <= 1 and [all checker]). -<<<<<<< HEAD:planner/core/expression_rewriter.go func (er *expressionRewriter) handleEQAll(lexpr, rexpr expression.Expression, np LogicalPlan, markNoDecorrelate bool) { - firstRowFunc, err := aggregation.NewAggFuncDesc(er.sctx, ast.AggFuncFirstRow, []expression.Expression{rexpr}, false) -======= -func (er *expressionRewriter) handleEQAll(planCtx *exprRewriterPlanCtx, lexpr, rexpr expression.Expression, np base.LogicalPlan, markNoDecorrelate bool) { - intest.AssertNotNil(planCtx) - sctx := planCtx.builder.ctx - exprCtx := sctx.GetExprCtx() // If there is NULL in s.id column, s.id should be the value that isn't null in condition t.id == s.id. // So use function max to filter NULL. - maxFunc, err := aggregation.NewAggFuncDesc(exprCtx, ast.AggFuncMax, []expression.Expression{rexpr}, false) ->>>>>>> 98e5cfbd1c5 (planner: fix wrong behavior for = all() (#52801)):pkg/planner/core/expression_rewriter.go + maxFunc, err := aggregation.NewAggFuncDesc(er.sctx, ast.AggFuncMax, []expression.Expression{rexpr}, false) if err != nil { er.err = err return @@ -799,41 +791,17 @@ func (er *expressionRewriter) handleEQAll(planCtx *exprRewriterPlanCtx, lexpr, r return } plan4Agg := LogicalAggregation{ -<<<<<<< HEAD:planner/core/expression_rewriter.go - AggFuncs: []*aggregation.AggFuncDesc{firstRowFunc, countFunc}, + AggFuncs: []*aggregation.AggFuncDesc{maxFunc, countFunc}, }.Init(er.sctx, er.b.getSelectOffset()) if hint := er.b.TableHints(); hint != nil { plan4Agg.aggHints = hint.aggHints -======= - AggFuncs: []*aggregation.AggFuncDesc{maxFunc, countFunc}, - }.Init(sctx, planCtx.builder.getSelectOffset()) - if hintinfo := planCtx.builder.TableHints(); hintinfo != nil { - plan4Agg.PreferAggType = hintinfo.PreferAggType - plan4Agg.PreferAggToCop = hintinfo.PreferAggToCop ->>>>>>> 98e5cfbd1c5 (planner: fix wrong behavior for = all() (#52801)):pkg/planner/core/expression_rewriter.go } plan4Agg.SetChildren(np) plan4Agg.names = append(plan4Agg.names, types.EmptyName) -<<<<<<< HEAD:planner/core/expression_rewriter.go - // Currently, firstrow agg function is treated like the exact representation of aggregate group key, - // so the data type is the same with group key, even if the group key is not null. - // However, the return type of firstrow should be nullable, we clear the null flag here instead of - // during invoking NewAggFuncDesc, in order to keep compatibility with the existing presumption - // that the return type firstrow does not change nullability, whatsoever. - // Cloning it because the return type is the same object with argument's data type. - newRetTp := firstRowFunc.RetTp.Clone() - newRetTp.DelFlag(mysql.NotNullFlag) - firstRowFunc.RetTp = newRetTp - - firstRowResultCol := &expression.Column{ - UniqueID: er.sctx.GetSessionVars().AllocPlanColumnID(), - RetType: firstRowFunc.RetTp, -======= maxResultCol := &expression.Column{ - UniqueID: sctx.GetSessionVars().AllocPlanColumnID(), + UniqueID: er.sctx.GetSessionVars().AllocPlanColumnID(), RetType: maxFunc.RetTp, ->>>>>>> 98e5cfbd1c5 (planner: fix wrong behavior for = all() (#52801)):pkg/planner/core/expression_rewriter.go } maxResultCol.SetCoercibility(rexpr.Coercibility()) plan4Agg.names = append(plan4Agg.names, types.EmptyName) From e6f8f9d68259727a41e286be2112ea0790a8107f Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Mon, 2 Sep 2024 17:52:28 +0800 Subject: [PATCH 3/4] update Signed-off-by: Weizhen Wang --- cmd/explaintest/r/select.result | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cmd/explaintest/r/select.result b/cmd/explaintest/r/select.result index 90fb8a1c4fe93..61c065be102fc 100644 --- a/cmd/explaintest/r/select.result +++ b/cmd/explaintest/r/select.result @@ -414,7 +414,7 @@ id estRows task access object operator info Projection 10000.00 root or(and(and(le(Column#11, 1), eq(test.t.a, Column#10)), if(ne(Column#12, 0), , 1)), or(eq(Column#13, 0), if(isnull(test.t.a), , 0)))->Column#14 └─HashJoin 10000.00 root CARTESIAN inner join ├─StreamAgg(Build) 1.00 root funcs:max(Column#16)->Column#10, funcs:count(distinct Column#17)->Column#11, funcs:sum(Column#18)->Column#12, funcs:count(1)->Column#13 - │ └─Projection 10000.00 root select.t.a->Column#16, select.t.a->Column#17, cast(isnull(select.t.a), decimal(20,0) BINARY)->Column#18 + │ └─Projection 10000.00 root test.t.a, test.t.a, cast(isnull(test.t.a), decimal(20,0) BINARY)->Column#18 │ └─TableReader 10000.00 root data:TableFullScan │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo └─TableReader(Probe) 10000.00 root data:TableFullScan @@ -495,10 +495,11 @@ PRIMARY KEY (`id`) explain format = 'brief' select row_number() over( partition by i ) - x as rnk from t; id estRows task access object operator info Projection 10000.00 root minus(Column#5, test.t.x)->Column#7 -└─Window 10000.00 root row_number()->Column#5 over(partition by test.t.i rows between current row and current row) - └─Sort 10000.00 root test.t.i - └─TableReader 10000.00 root data:TableFullScan - └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +└─Shuffle 10000.00 root execution info: concurrency:4, data sources:[TableReader] + └─Window 10000.00 root row_number()->Column#5 over(partition by test.t.i rows between current row and current row) + └─Sort 10000.00 root test.t.i + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo create table precise_types ( a BIGINT UNSIGNED NOT NULL, b BIGINT NOT NULL, From 8b0b8dfb8ae0e28a765b37a6e0ae14604a202196 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Mon, 2 Sep 2024 23:25:37 +0800 Subject: [PATCH 4/4] *: fix flaky test TestColumnTable Signed-off-by: Weizhen Wang --- cmd/explaintest/r/select.result | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cmd/explaintest/r/select.result b/cmd/explaintest/r/select.result index 61c065be102fc..49bfabb96052c 100644 --- a/cmd/explaintest/r/select.result +++ b/cmd/explaintest/r/select.result @@ -495,11 +495,10 @@ PRIMARY KEY (`id`) explain format = 'brief' select row_number() over( partition by i ) - x as rnk from t; id estRows task access object operator info Projection 10000.00 root minus(Column#5, test.t.x)->Column#7 -└─Shuffle 10000.00 root execution info: concurrency:4, data sources:[TableReader] - └─Window 10000.00 root row_number()->Column#5 over(partition by test.t.i rows between current row and current row) - └─Sort 10000.00 root test.t.i - └─TableReader 10000.00 root data:TableFullScan - └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +└─Window 10000.00 root row_number()->Column#5 over(partition by test.t.i rows between current row and current row) + └─Sort 10000.00 root test.t.i + └─TableReader 10000.00 root data:TableFullScan + └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo create table precise_types ( a BIGINT UNSIGNED NOT NULL, b BIGINT NOT NULL,