From 12f15f5db4e094c03c81544834c1388148d4eb2d Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 30 Aug 2018 17:15:09 +0800 Subject: [PATCH 1/7] plan: change the logic of converting to inner join --- expression/util.go | 17 +++++++++++++++++ plan/exhaust_physical_plans.go | 28 ++++++++++++++++++++-------- plan/physical_plan_test.go | 4 ++++ 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/expression/util.go b/expression/util.go index 45928e279ae71..5ae4e69d2bd85 100644 --- a/expression/util.go +++ b/expression/util.go @@ -19,6 +19,7 @@ import ( "time" "unicode" + "github.com/cznic/mathutil" "github.com/juju/errors" "github.com/pingcap/tidb/ast" "github.com/pingcap/tidb/mysql" @@ -449,3 +450,19 @@ func (s *exprStack) push(expr Expression) { func (s *exprStack) len() int { return len(s.stack) } + +// ColumnSliceIntersect intersects two column slice. +// You need to make sure that at least each element in s2 is unique. +func ColumnSliceIntersect(s1, s2 []*Column) []*Column { + intSet := map[int]struct{}{} + for _, col := range s1 { + intSet[col.UniqueID] = struct{}{} + } + result := make([]*Column, 0, mathutil.Min(len(s1), len(s2))) + for _, col := range s2 { + if _, ok := intSet[col.UniqueID]; ok { + result = append(result, col) + } + } + return result +} diff --git a/plan/exhaust_physical_plans.go b/plan/exhaust_physical_plans.go index c23c742510bf9..3d5a18b24ebd1 100644 --- a/plan/exhaust_physical_plans.go +++ b/plan/exhaust_physical_plans.go @@ -518,7 +518,7 @@ func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPl return nil, nil, nil } - conds, eqConds, keyOff2IdxOff := p.buildFakeEqCondsForIndexJoin(innerJoinKeys, idxCols, colLengths, innerPlan) + access, eqConds, remained, keyOff2IdxOff := p.buildFakeEqCondsForIndexJoin(innerJoinKeys, idxCols, colLengths, innerPlan.pushedDownConds) if len(keyOff2IdxOff) == 0 { return nil, nil, nil @@ -527,7 +527,7 @@ func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPl // After constant propagation, there won'be cases that t1.a=t2.a and t2.a=1 occur in the same time. // And if there're cases like t1.a=t2.a and t1.a > 1, we can also guarantee that t1.a > 1 won't be chosen as access condition. // So DetachCondAndBuildRangeForIndex won't miss the equal conditions we generate. - ranges, accesses, remained, _, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, conds, idxCols, colLengths) + ranges, accesses, moreRemained, _, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, access, idxCols, colLengths) if err != nil { terror.Log(errors.Trace(err)) return nil, nil, nil @@ -540,27 +540,30 @@ func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPl } } - return ranges, remained, keyOff2IdxOff + return ranges, append(remained, moreRemained...), keyOff2IdxOff } func (p *LogicalJoin) buildFakeEqCondsForIndexJoin(keys, idxCols []*expression.Column, colLengths []int, - innerPlan *DataSource) (accesses, eqConds []expression.Expression, keyOff2IdxOff []int) { + innerFilters []expression.Expression) (accesses, eqConds, remained []expression.Expression, keyOff2IdxOff []int) { // Check whether all join keys match one column from index. keyOff2IdxOff = joinKeysMatchIndex(keys, idxCols, colLengths) if keyOff2IdxOff == nil { - return nil, nil, nil + return nil, nil, nil, nil } + usableKeys := make([]*expression.Column, 0, len(keys)) + // After predicate push down, the one side conditions of join must be the conditions that cannot be pushed down and // cannot calculate range either. So we only need the innerPlan.pushedDownConds and the eq conditions that we generate. // TODO: There may be a selection that block the index join. - conds := make([]expression.Expression, 0, len(keys)+len(innerPlan.pushedDownConds)) + conds := make([]expression.Expression, 0, len(keys)+len(innerFilters)) eqConds = make([]expression.Expression, 0, len(keys)) // Construct a fake equal expression for calculating the range. for i, key := range keys { if keyOff2IdxOff[i] < 0 { continue } + usableKeys = append(usableKeys, key) // Int datum 1 can convert to all column's type(numeric type, string type, json, time type, enum, set) safely. fakeConstant := &expression.Constant{Value: types.NewIntDatum(1), RetType: key.GetType()} eqFunc := expression.NewFunctionInternal(p.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), key, fakeConstant) @@ -568,8 +571,17 @@ func (p *LogicalJoin) buildFakeEqCondsForIndexJoin(keys, idxCols []*expression.C eqConds = append(eqConds, eqFunc) } - conds = append(conds, innerPlan.pushedDownConds...) - return conds, eqConds, keyOff2IdxOff + remained = make([]expression.Expression, 0, len(innerFilters)) + for _, filter := range innerFilters { + affectedCols := expression.ExtractColumns(filter) + if len(expression.ColumnSliceIntersect(affectedCols, usableKeys)) > 0 { + remained = append(remained, filter) + continue + } + conds = append(conds, filter) + } + + return conds, eqConds, remained, keyOff2IdxOff } // tryToGetIndexJoin will get index join by hints. If we can generate a valid index join by hint, the second return value diff --git a/plan/physical_plan_test.go b/plan/physical_plan_test.go index 8fc193f08b381..71219839c9321 100644 --- a/plan/physical_plan_test.go +++ b/plan/physical_plan_test.go @@ -407,6 +407,10 @@ func (s *testPlanSuite) TestDAGPlanBuilderJoin(c *C) { sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 join t t2 where t1.f=t2.f and t1.a=t2.a", best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.a,t2.a)", }, + { + sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 join t t2 where t1.a=t2.a and t1.a in(1, 2) and t2.a in (1, 2)", + best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t)->Sel([in(t2.a, 1, 2)]))}(t1.a,t2.a)", + }, } for i, tt := range tests { comment := Commentf("case:%v sql:%s", i, tt.sql) From 023f2288de4231f6d618f9c0753741e826af5319 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Fri, 31 Aug 2018 14:36:11 +0800 Subject: [PATCH 2/7] fix merge error --- expression/util.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expression/util.go b/expression/util.go index 5ae4e69d2bd85..4eec1f636421f 100644 --- a/expression/util.go +++ b/expression/util.go @@ -454,7 +454,7 @@ func (s *exprStack) len() int { // ColumnSliceIntersect intersects two column slice. // You need to make sure that at least each element in s2 is unique. func ColumnSliceIntersect(s1, s2 []*Column) []*Column { - intSet := map[int]struct{}{} + intSet := map[int64]struct{}{} for _, col := range s1 { intSet[col.UniqueID] = struct{}{} } From 968f34731d37da894bb99347c9e352b712859007 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Fri, 31 Aug 2018 15:22:42 +0800 Subject: [PATCH 3/7] tiny change on test. --- plan/physical_plan_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plan/physical_plan_test.go b/plan/physical_plan_test.go index cde495ddfdddf..f3a87b9ee643d 100644 --- a/plan/physical_plan_test.go +++ b/plan/physical_plan_test.go @@ -408,7 +408,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderJoin(c *C) { best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.a,t2.a)", }, { - sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 join t t2 where t1.a=t2.a and t1.a in(1, 2) and t2.a in (1, 2)", + sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 join t t2 where t1.a=t2.a and t2.a in (1, 2)", best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t)->Sel([in(t2.a, 1, 2)]))}(t1.a,t2.a)", }, } From 5c9e133e62e5e22b35b99afc72ee1d71744db317 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Fri, 31 Aug 2018 16:04:45 +0800 Subject: [PATCH 4/7] address comment --- expression/util.go | 11 ++++------- plan/exhaust_physical_plans.go | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/expression/util.go b/expression/util.go index 4eec1f636421f..4bd4794873e69 100644 --- a/expression/util.go +++ b/expression/util.go @@ -19,7 +19,6 @@ import ( "time" "unicode" - "github.com/cznic/mathutil" "github.com/juju/errors" "github.com/pingcap/tidb/ast" "github.com/pingcap/tidb/mysql" @@ -451,18 +450,16 @@ func (s *exprStack) len() int { return len(s.stack) } -// ColumnSliceIntersect intersects two column slice. -// You need to make sure that at least each element in s2 is unique. -func ColumnSliceIntersect(s1, s2 []*Column) []*Column { +// ColumnSliceIsIntersect checks whether two column slice is intersected. +func ColumnSliceIsIntersect(s1, s2 []*Column) bool { intSet := map[int64]struct{}{} for _, col := range s1 { intSet[col.UniqueID] = struct{}{} } - result := make([]*Column, 0, mathutil.Min(len(s1), len(s2))) for _, col := range s2 { if _, ok := intSet[col.UniqueID]; ok { - result = append(result, col) + return true } } - return result + return false } diff --git a/plan/exhaust_physical_plans.go b/plan/exhaust_physical_plans.go index 3d5a18b24ebd1..472deb41ed7f7 100644 --- a/plan/exhaust_physical_plans.go +++ b/plan/exhaust_physical_plans.go @@ -574,7 +574,7 @@ func (p *LogicalJoin) buildFakeEqCondsForIndexJoin(keys, idxCols []*expression.C remained = make([]expression.Expression, 0, len(innerFilters)) for _, filter := range innerFilters { affectedCols := expression.ExtractColumns(filter) - if len(expression.ColumnSliceIntersect(affectedCols, usableKeys)) > 0 { + if expression.ColumnSliceIsIntersect(affectedCols, usableKeys) { remained = append(remained, filter) continue } From 0ed34a284bbe707f9b33baafd549369988f6faa0 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Tue, 4 Sep 2018 13:22:35 +0800 Subject: [PATCH 5/7] add comment. --- plan/exhaust_physical_plans.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/plan/exhaust_physical_plans.go b/plan/exhaust_physical_plans.go index 472deb41ed7f7..3eb0c28fa34eb 100644 --- a/plan/exhaust_physical_plans.go +++ b/plan/exhaust_physical_plans.go @@ -553,12 +553,9 @@ func (p *LogicalJoin) buildFakeEqCondsForIndexJoin(keys, idxCols []*expression.C usableKeys := make([]*expression.Column, 0, len(keys)) - // After predicate push down, the one side conditions of join must be the conditions that cannot be pushed down and - // cannot calculate range either. So we only need the innerPlan.pushedDownConds and the eq conditions that we generate. - // TODO: There may be a selection that block the index join. conds := make([]expression.Expression, 0, len(keys)+len(innerFilters)) eqConds = make([]expression.Expression, 0, len(keys)) - // Construct a fake equal expression for calculating the range. + // Construct a fake equal expression for every join key for calculating the range. for i, key := range keys { if keyOff2IdxOff[i] < 0 { continue @@ -571,6 +568,7 @@ func (p *LogicalJoin) buildFakeEqCondsForIndexJoin(keys, idxCols []*expression.C eqConds = append(eqConds, eqFunc) } + // Look into every `innerFilter`, if it contains join keys' column, put this filter into `remained` part directly. remained = make([]expression.Expression, 0, len(innerFilters)) for _, filter := range innerFilters { affectedCols := expression.ExtractColumns(filter) From e486247538136c8236bd5b6a03e99e48f7f7f3b6 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Tue, 4 Sep 2018 17:17:25 +0800 Subject: [PATCH 6/7] update comment. --- plan/exhaust_physical_plans.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plan/exhaust_physical_plans.go b/plan/exhaust_physical_plans.go index 3eb0c28fa34eb..6c3a726dc3711 100644 --- a/plan/exhaust_physical_plans.go +++ b/plan/exhaust_physical_plans.go @@ -518,15 +518,15 @@ func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPl return nil, nil, nil } + // Extract the filter to calculate access and the filters that must be remained ones. access, eqConds, remained, keyOff2IdxOff := p.buildFakeEqCondsForIndexJoin(innerJoinKeys, idxCols, colLengths, innerPlan.pushedDownConds) if len(keyOff2IdxOff) == 0 { return nil, nil, nil } - // After constant propagation, there won'be cases that t1.a=t2.a and t2.a=1 occur in the same time. - // And if there're cases like t1.a=t2.a and t1.a > 1, we can also guarantee that t1.a > 1 won't be chosen as access condition. - // So DetachCondAndBuildRangeForIndex won't miss the equal conditions we generate. + // In `buildFakeEqCondsForIndexJoin`, we construct the equal cond for join keys and remove filters that containing the join keys' column. + // So the eq cond we built can be successfully used to build range if it can be used. Won't be affected by the existing filters. ranges, accesses, moreRemained, _, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, access, idxCols, colLengths) if err != nil { terror.Log(errors.Trace(err)) From 90b6937d81366e531825e4beae95332092c97478 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 5 Sep 2018 10:50:57 +0800 Subject: [PATCH 7/7] address comment. --- plan/exhaust_physical_plans.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plan/exhaust_physical_plans.go b/plan/exhaust_physical_plans.go index 6c3a726dc3711..2d5f1ff2ba9a1 100644 --- a/plan/exhaust_physical_plans.go +++ b/plan/exhaust_physical_plans.go @@ -525,8 +525,9 @@ func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPl return nil, nil, nil } - // In `buildFakeEqCondsForIndexJoin`, we construct the equal cond for join keys and remove filters that containing the join keys' column. - // So the eq cond we built can be successfully used to build range if it can be used. Won't be affected by the existing filters. + // In `buildFakeEqCondsForIndexJoin`, we construct the equal conditions for join keys and remove filters that contain the join keys' column. + // When t1.a = t2.a and t1.a > 1, we can also guarantee that t1.a > 1 won't be chosen as the access condition. + // So the equal conditions we built can be successfully used to build a range if they can be used. They won't be affected by the existing filters. ranges, accesses, moreRemained, _, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, access, idxCols, colLengths) if err != nil { terror.Log(errors.Trace(err))