Skip to content

Commit

Permalink
planner: support hint for IndexHashJoin and IndexMergeJoin (pingcap#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
XuHuaiyu authored and XiaTianliang committed Dec 21, 2019
1 parent 443866b commit ae7900d
Show file tree
Hide file tree
Showing 14 changed files with 349 additions and 103 deletions.
2 changes: 1 addition & 1 deletion cmd/explaintest/r/generated_columns.result
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ VALUES ('{"a": 1}', '{"1": "1"}');
ANALYZE TABLE sgc1, sgc2;
EXPLAIN SELECT /*+ TIDB_INLJ(sgc1, sgc2) */ * from sgc1 join sgc2 on sgc1.a=sgc2.a;
id count task operator info
IndexHashJoin_35 5.00 root inner join, inner:IndexLookUp_25, outer key:Column#8, inner key:Column#3
IndexJoin_26 5.00 root inner join, inner:IndexLookUp_25, outer key:Column#8, inner key:Column#3
├─IndexLookUp_25 5.00 root
│ ├─Selection_24 5.00 cop[tikv] not(isnull(Column#3))
│ │ └─IndexScan_22 5.00 cop[tikv] table:sgc1, index:a, range: decided by [eq(Column#3, Column#8)], keep order:false
Expand Down
8 changes: 4 additions & 4 deletions cmd/explaintest/r/index_join.result
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set session tidb_hashagg_partial_concurrency = 1;
set session tidb_hashagg_final_concurrency = 1;
explain select /*+ TIDB_INLJ(t1, t2) */ * from t1 join t2 on t1.a=t2.a;
id count task operator info
IndexHashJoin_34 5.00 root inner join, inner:IndexLookUp_24, outer key:Column#4, inner key:Column#1
IndexJoin_25 5.00 root inner join, inner:IndexLookUp_24, outer key:Column#4, inner key:Column#1
├─IndexLookUp_24 5.00 root
│ ├─Selection_23 5.00 cop[tikv] not(isnull(Column#1))
│ │ └─IndexScan_21 5.00 cop[tikv] table:t1, index:a, range: decided by [eq(Column#1, Column#4)], keep order:false
Expand All @@ -32,11 +32,11 @@ create table t2(a int not null, b int not null, key a(a));
set @@tidb_opt_insubq_to_join_and_agg=0;
explain select /*+ TIDB_INLJ(t2@sel_2) */ * from t1 where t1.a in (select t2.a from t2);
id count task operator info
IndexMergeJoin_14 8000.00 root semi join, inner:IndexReader_12, outer key:Column#1, inner key:Column#4
IndexJoin_10 8000.00 root semi join, inner:IndexReader_9, outer key:Column#1, inner key:Column#4
├─TableReader_18 10000.00 root data:TableScan_17
│ └─TableScan_17 10000.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
└─IndexReader_12 1.25 root index:IndexScan_11
└─IndexScan_11 1.25 cop[tikv] table:t2, index:a, range: decided by [eq(Column#4, Column#1)], keep order:true, stats:pseudo
└─IndexReader_9 1.25 root index:IndexScan_8
└─IndexScan_8 1.25 cop[tikv] table:t2, index:a, range: decided by [eq(Column#4, Column#1)], keep order:false, stats:pseudo
show warnings;
Level Code Message
set @@tidb_opt_insubq_to_join_and_agg=1;
Expand Down
12 changes: 6 additions & 6 deletions cmd/explaintest/r/topn_push_down.result
Original file line number Diff line number Diff line change
Expand Up @@ -217,20 +217,20 @@ create table t(a int not null, index idx(a));
explain select /*+ TIDB_INLJ(t2) */ * from t t1 join t t2 on t1.a = t2.a limit 5;
id count task operator info
Limit_11 5.00 root offset:0, count:5
└─IndexMergeJoin_19 5.00 root inner join, inner:IndexReader_17, outer key:Column#1, inner key:Column#3
└─IndexJoin_15 5.00 root inner join, inner:IndexReader_14, outer key:Column#1, inner key:Column#3
├─TableReader_23 4.00 root data:TableScan_22
│ └─TableScan_22 4.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
└─IndexReader_17 1.25 root index:IndexScan_16
└─IndexScan_16 1.25 cop[tikv] table:t2, index:a, range: decided by [eq(Column#3, Column#1)], keep order:true, stats:pseudo
└─IndexReader_14 1.25 root index:IndexScan_13
└─IndexScan_13 1.25 cop[tikv] table:t2, index:a, range: decided by [eq(Column#3, Column#1)], keep order:false, stats:pseudo
explain select /*+ TIDB_INLJ(t2) */ * from t t1 left join t t2 on t1.a = t2.a where t2.a is null limit 5;
id count task operator info
Limit_12 5.00 root offset:0, count:5
└─Selection_13 5.00 root isnull(Column#3)
└─IndexMergeJoin_21 5.00 root left outer join, inner:IndexReader_19, outer key:Column#1, inner key:Column#3
└─IndexJoin_17 5.00 root left outer join, inner:IndexReader_16, outer key:Column#1, inner key:Column#3
├─TableReader_25 4.00 root data:TableScan_24
│ └─TableScan_24 4.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
└─IndexReader_19 1.25 root index:IndexScan_18
└─IndexScan_18 1.25 cop[tikv] table:t2, index:a, range: decided by [eq(Column#3, Column#1)], keep order:true, stats:pseudo
└─IndexReader_16 1.25 root index:IndexScan_15
└─IndexScan_15 1.25 cop[tikv] table:t2, index:a, range: decided by [eq(Column#3, Column#1)], keep order:false, stats:pseudo
explain select /*+ TIDB_SMJ(t1, t2) */ * from t t1 join t t2 on t1.a = t2.a limit 5;
id count task operator info
Limit_11 5.00 root offset:0, count:5
Expand Down
20 changes: 20 additions & 0 deletions executor/index_lookup_join_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,33 @@ func (s *testSuite1) TestInapplicableIndexJoinHint(c *C) {
tk.MustQuery(`select /*+ TIDB_INLJ(t1, t2) */ * from t1 join t2 on t1.a=t2.a;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_JOIN(t1, t2) */ or /*+ TIDB_INLJ(t1, t2) */ is inapplicable`))

tk.MustQuery(`select /*+ INL_HASH_JOIN(t1, t2) */ * from t1, t2;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_HASH_JOIN(t1, t2) */ is inapplicable without column equal ON condition`))
tk.MustQuery(`select /*+ INL_HASH_JOIN(t1, t2) */ * from t1 join t2 on t1.a=t2.a;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_HASH_JOIN(t1, t2) */ is inapplicable`))

tk.MustQuery(`select /*+ INL_MERGE_JOIN(t1, t2) */ * from t1, t2;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_MERGE_JOIN(t1, t2) */ is inapplicable without column equal ON condition`))
tk.MustQuery(`select /*+ INL_MERGE_JOIN(t1, t2) */ * from t1 join t2 on t1.a=t2.a;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_MERGE_JOIN(t1, t2) */ is inapplicable`))

tk.MustExec(`drop table if exists t1, t2;`)
tk.MustExec(`create table t1(a bigint, b bigint, index idx_a(a));`)
tk.MustExec(`create table t2(a bigint, b bigint);`)
tk.MustQuery(`select /*+ TIDB_INLJ(t1) */ * from t1 left join t2 on t1.a=t2.a;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_JOIN(t1) */ or /*+ TIDB_INLJ(t1) */ is inapplicable`))
tk.MustQuery(`select /*+ TIDB_INLJ(t2) */ * from t1 right join t2 on t1.a=t2.a;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_JOIN(t2) */ or /*+ TIDB_INLJ(t2) */ is inapplicable`))

tk.MustQuery(`select /*+ INL_HASH_JOIN(t1) */ * from t1 left join t2 on t1.a=t2.a;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_HASH_JOIN(t1) */ is inapplicable`))
tk.MustQuery(`select /*+ INL_HASH_JOIN(t2) */ * from t1 right join t2 on t1.a=t2.a;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_HASH_JOIN(t2) */ is inapplicable`))

tk.MustQuery(`select /*+ INL_MERGE_JOIN(t1) */ * from t1 left join t2 on t1.a=t2.a;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_MERGE_JOIN(t1) */ is inapplicable`))
tk.MustQuery(`select /*+ INL_MERGE_JOIN(t2) */ * from t1 right join t2 on t1.a=t2.a;`).Check(testkit.Rows())
tk.MustQuery(`show warnings;`).Check(testkit.Rows(`Warning 1815 Optimizer Hint /*+ INL_MERGE_JOIN(t2) */ is inapplicable`))
}

func (s *testSuite) TestIndexJoinOverflow(c *C) {
Expand Down
14 changes: 7 additions & 7 deletions executor/join_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,9 @@ func (s *testSuiteJoin2) TestJoin(c *C) {
tk.MustExec("create table t1(a int, b int)")
tk.MustExec("insert into t values(1, 3), (2, 2), (3, 1)")
tk.MustExec("insert into t1 values(0, 0), (1, 2), (1, 3), (3, 4)")
tk.MustQuery("select /*+ TIDB_INLJ(t1) */ * from t join t1 on t.a=t1.a order by t.b").Check(testkit.Rows("3 1 3 4", "1 3 1 2", "1 3 1 3"))
tk.MustQuery("select /*+ TIDB_INLJ(t1) */ * from t join t1 on t.a=t1.a order by t.b").Sort().Check(testkit.Rows("1 3 1 2", "1 3 1 3", "3 1 3 4"))
tk.MustQuery("select /*+ TIDB_INLJ(t) */ t.a, t.b from t join t1 on t.a=t1.a where t1.b = 4 limit 1").Check(testkit.Rows("3 1"))
tk.MustQuery("select /*+ TIDB_INLJ(t, t1) */ * from t right join t1 on t.a=t1.a order by t.b").Check(testkit.Rows("<nil> <nil> 0 0", "3 1 3 4", "1 3 1 2", "1 3 1 3"))
tk.MustQuery("select /*+ TIDB_INLJ(t, t1) */ * from t right join t1 on t.a=t1.a order by t.b").Sort().Check(testkit.Rows("1 3 1 2", "1 3 1 3", "3 1 3 4", "<nil> <nil> 0 0"))

// join reorder will disorganize the resulting schema
tk.MustExec("drop table if exists t, t1")
Expand Down Expand Up @@ -1020,16 +1020,16 @@ func (s *testSuiteJoin1) TestIndexLookupJoin(c *C) {
tk.MustExec("insert into t1 values(1, 0), (2, null)")
tk.MustExec("create table t2(a int primary key)")
tk.MustExec("insert into t2 values(0)")
tk.MustQuery("select /*+ TIDB_INLJ(t2)*/ * from t1 left join t2 on t1.b = t2.a;").Check(testkit.Rows(
`2 <nil> <nil>`,
tk.MustQuery("select /*+ TIDB_INLJ(t2)*/ * from t1 left join t2 on t1.b = t2.a;").Sort().Check(testkit.Rows(
`1 0 0`,
`2 <nil> <nil>`,
))

tk.MustExec("create table t3(a int, key(a))")
tk.MustExec("insert into t3 values(0)")
tk.MustQuery("select /*+ TIDB_INLJ(t3)*/ * from t1 left join t3 on t1.b = t3.a;").Check(testkit.Rows(
`2 <nil> <nil>`,
`1 0 0`,
`2 <nil> <nil>`,
))
}

Expand All @@ -1054,14 +1054,14 @@ func (s *testSuiteJoin1) TestIndexNestedLoopHashJoin(c *C) {
tk.MustExec("analyze table t")
tk.MustExec("analyze table s")
// Test IndexNestedLoopHashJoin keepOrder.
tk.MustQuery("explain select /*+ TIDB_INLJ(s) */ * from t left join s on t.a=s.a order by t.pk").Check(testkit.Rows(
tk.MustQuery("explain select /*+ INL_HASH_JOIN(s) */ * from t left join s on t.a=s.a order by t.pk").Check(testkit.Rows(
"IndexHashJoin_28 100.00 root left outer join, inner:TableReader_22, outer key:Column#2, inner key:Column#3",
"├─TableReader_30 100.00 root data:TableScan_29",
"│ └─TableScan_29 100.00 cop[tikv] table:t, range:[-inf,+inf], keep order:true",
"└─TableReader_22 1.00 root data:TableScan_21",
" └─TableScan_21 1.00 cop[tikv] table:s, range: decided by [Column#2], keep order:false",
))
rs := tk.MustQuery("select /*+ TIDB_INLJ(s) */ * from t left join s on t.a=s.a order by t.pk")
rs := tk.MustQuery("select /*+ INL_HASH_JOIN(s) */ * from t left join s on t.a=s.a order by t.pk")
for i, row := range rs.Rows() {
c.Assert(row[0].(string), Equals, fmt.Sprintf("%d", i))
}
Expand Down
144 changes: 114 additions & 30 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1157,18 +1157,45 @@ func (ijHelper *indexJoinBuildHelper) buildTemplateRange(matchedKeyCnt int, eqAn
// tryToGetIndexJoin will get index join by hints. If we can generate a valid index join by hint, the second return value
// will be true, which means we force to choose this index join. Otherwise we will select a join algorithm with min-cost.
func (p *LogicalJoin) tryToGetIndexJoin(prop *property.PhysicalProperty) (indexJoins []PhysicalPlan, forced bool) {
rightOuter := (p.preferJoinType & preferLeftAsIndexInner) > 0
leftOuter := (p.preferJoinType & preferRightAsIndexInner) > 0
hasIndexJoinHint := leftOuter || rightOuter
inljRightOuter := (p.preferJoinType & preferLeftAsINLJInner) > 0
inljLeftOuter := (p.preferJoinType & preferRightAsINLJInner) > 0
hasINLJHint := inljLeftOuter || inljRightOuter

inlhjRightOuter := (p.preferJoinType & preferLeftAsINLHJInner) > 0
inlhjLeftOuter := (p.preferJoinType & preferRightAsINLHJInner) > 0
hasINLHJHint := inlhjLeftOuter || inlhjRightOuter

inlmjRightOuter := (p.preferJoinType & preferLeftAsINLMJInner) > 0
inlmjLeftOuter := (p.preferJoinType & preferRightAsINLMJInner) > 0
hasINLMJHint := inlmjLeftOuter || inlmjRightOuter

forceLeftOuter := inljLeftOuter || inlhjLeftOuter || inlmjLeftOuter
forceRightOuter := inljRightOuter || inlhjRightOuter || inlmjRightOuter

defer func() {
if !forced && hasIndexJoinHint {
// refine error message
if !forced && (hasINLJHint || hasINLHJHint || hasINLMJHint) {
// Construct warning message prefix.
errMsg := "Optimizer Hint INL_JOIN or TIDB_INLJ is inapplicable"
var errMsg string
switch {
case hasINLJHint:
errMsg = "Optimizer Hint INL_JOIN or TIDB_INLJ is inapplicable"
case hasINLHJHint:
errMsg = "Optimizer Hint INL_HASH_JOIN is inapplicable"
case hasINLMJHint:
errMsg = "Optimizer Hint INL_MERGE_JOIN is inapplicable"
}
if p.hintInfo != nil {
errMsg = fmt.Sprintf("Optimizer Hint %s or %s is inapplicable",
restore2JoinHint(HintINLJ, p.hintInfo.indexNestedLoopJoinTables),
restore2JoinHint(TiDBIndexNestedLoopJoin, p.hintInfo.indexNestedLoopJoinTables))
t := p.hintInfo.indexNestedLoopJoinTables
switch {
case len(t.inljTables) != 0:
errMsg = fmt.Sprintf("Optimizer Hint %s or %s is inapplicable",
restore2JoinHint(HintINLJ, t.inljTables), restore2JoinHint(TiDBIndexNestedLoopJoin, t.inljTables))
case len(t.inlhjTables) != 0:
errMsg = fmt.Sprintf("Optimizer Hint %s is inapplicable", restore2JoinHint(HintINLHJ, t.inlhjTables))
case len(t.inlmjTables) != 0:
errMsg = fmt.Sprintf("Optimizer Hint %s is inapplicable", restore2JoinHint(HintINLMJ, t.inlmjTables))
}
}

// Append inapplicable reason.
Expand All @@ -1182,36 +1209,93 @@ func (p *LogicalJoin) tryToGetIndexJoin(prop *property.PhysicalProperty) (indexJ
}
}()

// supportLeftOuter and supportRightOuter indicates whether this type of join
// supports the left side or right side to be the outer side.
var supportLeftOuter, supportRightOuter bool
switch p.JoinType {
case SemiJoin, AntiSemiJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin, LeftOuterJoin:
join := p.getIndexJoinByOuterIdx(prop, 0)
return join, join != nil && leftOuter
supportLeftOuter = true
case RightOuterJoin:
join := p.getIndexJoinByOuterIdx(prop, 1)
return join, join != nil && rightOuter
supportRightOuter = true
case InnerJoin:
lhsCardinality := p.Children()[0].statsInfo().Count()
rhsCardinality := p.Children()[1].statsInfo().Count()

leftJoins := p.getIndexJoinByOuterIdx(prop, 0)
if leftJoins != nil && (leftOuter && !rightOuter || lhsCardinality < rhsCardinality) {
return leftJoins, leftOuter
supportLeftOuter, supportRightOuter = true, true
}

var allLeftOuterJoins, allRightOuterJoins, forcedLeftOuterJoins, forcedRightOuterJoins []PhysicalPlan
if supportLeftOuter {
allLeftOuterJoins = p.getIndexJoinByOuterIdx(prop, 0)
forcedLeftOuterJoins = make([]PhysicalPlan, 0, len(allLeftOuterJoins))
for _, j := range allLeftOuterJoins {
switch j.(type) {
case *PhysicalIndexJoin:
if hasINLJHint {
forcedLeftOuterJoins = append(forcedLeftOuterJoins, j)
}
case *PhysicalIndexHashJoin:
if hasINLHJHint {
forcedLeftOuterJoins = append(forcedLeftOuterJoins, j)
}
case *PhysicalIndexMergeJoin:
if hasINLMJHint {
forcedLeftOuterJoins = append(forcedLeftOuterJoins, j)
}
}
}

rightJoins := p.getIndexJoinByOuterIdx(prop, 1)
if rightJoins != nil && (rightOuter && !leftOuter || rhsCardinality < lhsCardinality) {
return rightJoins, rightOuter
switch {
case p.JoinType == InnerJoin && p.Children()[0].statsInfo().Count() < p.Children()[1].statsInfo().Count():
if len(forcedLeftOuterJoins) != 0 {
return forcedLeftOuterJoins, forceLeftOuter
}
if len(allLeftOuterJoins) != 0 {
return allLeftOuterJoins, forceLeftOuter
}
case len(forcedLeftOuterJoins) == 0 && !supportRightOuter:
return allLeftOuterJoins, false
case len(forcedLeftOuterJoins) != 0 && (!supportRightOuter || forceLeftOuter && !forceRightOuter):
return forcedLeftOuterJoins, forceLeftOuter
}
}
if supportRightOuter {
allRightOuterJoins = p.getIndexJoinByOuterIdx(prop, 1)
forcedRightOuterJoins = make([]PhysicalPlan, 0, len(allRightOuterJoins))
for _, j := range allRightOuterJoins {
switch j.(type) {
case *PhysicalIndexJoin:
if hasINLJHint {
forcedRightOuterJoins = append(forcedRightOuterJoins, j)
}
case *PhysicalIndexHashJoin:
if hasINLHJHint {
forcedRightOuterJoins = append(forcedRightOuterJoins, j)
}
case *PhysicalIndexMergeJoin:
if hasINLMJHint {
forcedRightOuterJoins = append(forcedRightOuterJoins, j)
}
}
}
switch {
case p.JoinType == InnerJoin && p.Children()[0].statsInfo().Count() > p.Children()[1].statsInfo().Count():
if len(forcedRightOuterJoins) != 0 {
return forcedRightOuterJoins, forceRightOuter
}
if len(allRightOuterJoins) != 0 {
return allRightOuterJoins, forceRightOuter
}
case len(forcedRightOuterJoins) == 0 && !supportLeftOuter:
return allRightOuterJoins, false
case len(forcedRightOuterJoins) != 0 && (!supportLeftOuter || forceRightOuter && !forceLeftOuter):
return forcedRightOuterJoins, forceRightOuter
}

canForceLeft := leftJoins != nil && leftOuter
canForceRight := rightJoins != nil && rightOuter
forced = canForceLeft || canForceRight

joins := append(leftJoins, rightJoins...)
return joins, forced
}

return nil, false
canForceLeft := len(forcedLeftOuterJoins) != 0 && forceLeftOuter
canForceRight := len(forcedRightOuterJoins) != 0 && forceRightOuter
forced = canForceLeft || canForceRight
if forced {
return append(forcedLeftOuterJoins, forcedRightOuterJoins...), forced
}
return append(allLeftOuterJoins, allRightOuterJoins...), forced
}

// LogicalJoin can generates hash join, index join and sort merge join.
Expand Down
4 changes: 3 additions & 1 deletion planner/core/hints.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,9 @@ func genHintsFromPhysicalPlan(p PhysicalPlan, nodeType nodeType) (res []*ast.Tab
case *PhysicalIndexJoin:
res = append(res, getJoinHints(p.SCtx(), HintINLJ, p.SelectBlockOffset(), nodeType, pp.children[pp.InnerChildIdx])...)
case *PhysicalIndexMergeJoin:
res = append(res, getJoinHints(p.SCtx(), HintINLJ, p.SelectBlockOffset(), nodeType, pp.children[pp.InnerChildIdx])...)
res = append(res, getJoinHints(p.SCtx(), HintINLMJ, p.SelectBlockOffset(), nodeType, pp.children[pp.InnerChildIdx])...)
case *PhysicalIndexHashJoin:
res = append(res, getJoinHints(p.SCtx(), HintINLHJ, p.SelectBlockOffset(), nodeType, pp.children[pp.InnerChildIdx])...)
}
return res
}
Loading

0 comments on commit ae7900d

Please sign in to comment.