Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: improve skyline pruning #26271

Merged
merged 14 commits into from
Aug 2, 2021
2 changes: 1 addition & 1 deletion planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1251,7 +1251,7 @@ func (ijHelper *indexJoinBuildHelper) findUsefulEqAndInFilters(innerPlan *DataSo
var remainedEqOrIn []expression.Expression
// Extract the eq/in functions of possible join key.
// you can see the comment of ExtractEqAndInCondition to get the meaning of the second return value.
usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _ = ranger.ExtractEqAndInCondition(
usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _, _ = ranger.ExtractEqAndInCondition(
innerPlan.ctx, innerPlan.pushedDownConds,
ijHelper.curNotUsedIndexCols,
ijHelper.curNotUsedColLens,
Expand Down
102 changes: 68 additions & 34 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,10 +415,11 @@ func (ds *DataSource) tryToGetDualTask() (task, error) {

// candidatePath is used to maintain required info for skyline pruning.
type candidatePath struct {
path *util.AccessPath
columnSet *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions.
isSingleScan bool
isMatchProp bool
path *util.AccessPath
accessCondsColSet *intsets.Sparse // accessCondsColSet is the set of columns that occurred in the access conditions.
indexFiltersColSet *intsets.Sparse // indexFiltersColSet is the set of columns that occurred in the index filters.
isSingleScan bool
isMatchProp bool
}

// compareColumnSet will compares the two set. The last return value is used to indicate
Expand Down Expand Up @@ -451,18 +452,31 @@ func compareBool(l, r bool) int {
return 1
}

func compareIndexBack(lhs, rhs *candidatePath) (int, bool) {
result := compareBool(lhs.isSingleScan, rhs.isSingleScan)
if result == 0 && !lhs.isSingleScan {
// if both lhs and rhs need to access table after IndexScan, we use the set of columns that occurred in IndexFilters
// to compare how many table rows will be accessed.
return compareColumnSet(lhs.indexFiltersColSet, rhs.indexFiltersColSet)
}
return result, true
}
Comment on lines +454 to +462
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any test case that can cover this rule?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


// compareCandidates is the core of skyline pruning. It compares the two candidate paths on three dimensions:
// (1): the set of columns that occurred in the access condition,
// (2): whether or not it matches the physical property
// (3): does it require a double scan.
// If `x` is not worse than `y` at all factors,
// and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
func compareCandidates(lhs, rhs *candidatePath) int {
setsResult, comparable := compareColumnSet(lhs.columnSet, rhs.columnSet)
setsResult, comparable := compareColumnSet(lhs.accessCondsColSet, rhs.accessCondsColSet)
if !comparable {
return 0
}
scanResult, comparable := compareIndexBack(lhs, rhs)
if !comparable {
return 0
}
scanResult := compareBool(lhs.isSingleScan, rhs.isSingleScan)
matchResult := compareBool(lhs.isMatchProp, rhs.isMatchProp)
sum := setsResult + scanResult + matchResult
if setsResult >= 0 && scanResult >= 0 && matchResult >= 0 && sum > 0 {
Expand All @@ -474,52 +488,72 @@ func compareCandidates(lhs, rhs *candidatePath) int {
return 0
}

func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
candidate := &candidatePath{path: path}
func (ds *DataSource) isMatchProp(path *util.AccessPath, prop *property.PhysicalProperty) bool {
var isMatchProp bool
if path.IsIntHandlePath {
pkCol := ds.getPKIsHandleCol()
if len(prop.SortItems) == 1 && pkCol != nil {
candidate.isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol)
isMatchProp = prop.SortItems[0].Col.Equal(nil, pkCol)
if path.StoreType == kv.TiFlash {
candidate.isMatchProp = candidate.isMatchProp && !prop.SortItems[0].Desc
isMatchProp = isMatchProp && !prop.SortItems[0].Desc
}
}
} else {
all, _ := prop.AllSameOrder()
// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
// it needs not to keep order for index scan.
if !prop.IsEmpty() && all {
for i, col := range path.IdxCols {
if col.Equal(nil, prop.SortItems[0].Col) {
candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems)
return isMatchProp
}
// TODO: do we need to consider TiFlash here?
// TODO: check is it ok to cache the optimization?
all, _ := prop.AllSameOrder()
// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
// it needs not to keep order for index scan.

// Basically, if `prop.SortItems` is the prefix of `path.IdxCols`, then `isMatchProp` is true. However, we need to consider
// the situations when some columns of `path.IdxCols` are evaluated as constant. For example:
// ```
// create table t(a int, b int, c int, d int, index idx_a_b_c(a, b, c), index idx_d_c_b_a(d, c, b, a));
// select * from t where a = 1 order by b, c;
// select * from t where b = 1 order by a, c;
// select * from t where d = 1 and b = 2 order by c, a;
// select * from t where d = 1 and b = 2 order by c, b, a;
// ```
// In the first two `SELECT` statements, `idx_a_b_c` matches the sort order. In the last two `SELECT` statements, `idx_d_c_b_a`
// matches the sort order. Hence, we use `path.ConstantCols` to deal with the above situations.
if !prop.IsEmpty() && all && len(path.IdxCols) >= len(prop.SortItems) {
isMatchProp = true
i := 0
for _, sortItem := range prop.SortItems {
found := false
for ; i < len(path.IdxCols); i++ {
if path.IdxColLens[i] == types.UnspecifiedLength && sortItem.Col.Equal(nil, path.IdxCols[i]) {
found = true
i++
break
} else if i >= path.EqCondCount {
}
if path.ConstantCols == nil || !path.ConstantCols[i] {
break
}
}
if !found {
isMatchProp = false
break
}
}
}
candidate.columnSet = expression.ExtractColumnSet(path.AccessConds)
return isMatchProp
}

func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath {
candidate := &candidatePath{path: path}
candidate.isMatchProp = ds.isMatchProp(path, prop)
candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
candidate.isSingleScan = true
return candidate
}

func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty, isSingleScan bool) *candidatePath {
candidate := &candidatePath{path: path}
all, _ := prop.AllSameOrder()
// When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because
// it needs not to keep order for index scan.
if !prop.IsEmpty() && all {
for i, col := range path.IdxCols {
if col.Equal(nil, prop.SortItems[0].Col) {
candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.SortItems)
break
} else if i >= path.EqCondCount {
break
}
}
}
candidate.columnSet = expression.ExtractColumnSet(path.AccessConds)
candidate.isMatchProp = ds.isMatchProp(path, prop)
candidate.accessCondsColSet = expression.ExtractColumnSet(path.AccessConds)
candidate.indexFiltersColSet = expression.ExtractColumnSet(path.IndexFilters)
candidate.isSingleScan = isSingleScan
return candidate
}
Expand Down
24 changes: 24 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3952,3 +3952,27 @@ func (s *testIntegrationSerialSuite) TestSelectIgnoreTemporaryTableInView(c *C)
tk.MustQuery("select * from v5").Check(testkit.Rows("1 2", "3 4"))

}

// TestIsMatchProp is used to test https://github.com/pingcap/tidb/issues/26017.
func (s *testIntegrationSuite) TestIsMatchProp(c *C) {
tk := testkit.NewTestKit(c, s.store)

tk.MustExec("use test")
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1(a int, b int, c int, d int, index idx_a_b_c(a, b, c))")
tk.MustExec("create table t2(a int, b int, c int, d int, index idx_a_b_c_d(a, b, c, d))")

var input []string
var output []struct {
SQL string
Plan []string
}
s.testData.GetTestCases(c, &input, &output)
for i, tt := range input {
s.testData.OnRecord(func() {
output[i].SQL = tt
output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery("explain format = 'brief' " + tt).Rows())
})
tk.MustQuery("explain format = 'brief' " + tt).Check(testkit.Rows(output[i].Plan...))
}
}
18 changes: 17 additions & 1 deletion planner/core/logical_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1694,12 +1694,28 @@ func (s *testPlanSuite) TestSkylinePruning(c *C) {
},
{
sql: "select * from t where f > 1 and g > 1",
result: "PRIMARY_KEY,f,g,f_g",
result: "PRIMARY_KEY,g,f_g",
},
{
sql: "select count(1) from t",
result: "PRIMARY_KEY,c_d_e,f,g,f_g,c_d_e_str,e_d_c_str_prefix",
},
{
sql: "select * from t where f > 3 and g = 5",
result: "PRIMARY_KEY,g,f_g",
},
{
sql: "select * from t where g = 5 order by f",
result: "PRIMARY_KEY,g,f_g",
},
{
sql: "select * from t where d = 3 order by c, e",
result: "PRIMARY_KEY,c_d_e",
},
{
sql: "select * from t where d = 1 and f > 1 and g > 1 order by c, e",
result: "PRIMARY_KEY,c_d_e,g,f_g",
},
}
ctx := context.TODO()
for i, tt := range tests {
Expand Down
12 changes: 12 additions & 0 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,12 @@ func (ds *DataSource) deriveCommonHandleTablePathStats(path *util.AccessPath, co
path.EqCondCount = res.EqCondCount
path.EqOrInCondCount = res.EqOrInCount
path.IsDNFCond = res.IsDNFCond
path.ConstantCols = make([]bool, len(path.IdxCols))
if res.ColumnValues != nil {
for i := range path.ConstantCols {
path.ConstantCols[i] = res.ColumnValues[i] != nil
}
}
path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
if err != nil {
return false, err
Expand Down Expand Up @@ -854,6 +860,12 @@ func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Ex
path.EqCondCount = res.EqCondCount
path.EqOrInCondCount = res.EqOrInCount
path.IsDNFCond = res.IsDNFCond
path.ConstantCols = make([]bool, len(path.IdxCols))
if res.ColumnValues != nil {
for i := range path.ConstantCols {
path.ConstantCols[i] = res.ColumnValues[i] != nil
}
}
path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges)
if err != nil {
return err
Expand Down
4 changes: 2 additions & 2 deletions planner/core/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ func newDateType() types.FieldType {

// MockSignedTable is only used for plan related tests.
func MockSignedTable() *model.TableInfo {
// column: a, b, c, d, e, c_str, d_str, e_str, f, g
// column: a, b, c, d, e, c_str, d_str, e_str, f, g, h, i_date
// PK: a
// indices: c_d_e, e, f, g, f_g, c_d_e_str, c_d_e_str_prefix
// indices: c_d_e, e, f, g, f_g, c_d_e_str, e_d_c_str_prefix
indices := []*model.IndexInfo{
{
Name: model.NewCIStr("c_d_e"),
Expand Down
10 changes: 10 additions & 0 deletions planner/core/testdata/integration_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -310,5 +310,15 @@
"select sum(1) from s1",
"select count(1) as cnt from s1 union select count(1) as cnt from s2"
]
},
{
"name": "TestIsMatchProp",
"cases": [
"select a, b, c from t1 where a > 3 and b = 4 order by a, c",
"select * from t2 where a = 1 and c = 2 order by b, d",
"select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c",
"select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c",
"select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d"
]
}
]
43 changes: 43 additions & 0 deletions planner/core/testdata/integration_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -1636,5 +1636,48 @@
]
}
]
},
{
"Name": "TestIsMatchProp",
"Cases": [
{
"SQL": "select a, b, c from t1 where a > 3 and b = 4 order by a, c",
"Plan": [
"IndexReader 3.33 root index:Selection",
"└─Selection 3.33 cop[tikv] eq(test.t1.b, 4)",
" └─IndexRangeScan 3333.33 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:(3,+inf], keep order:true, stats:pseudo"
]
},
{
"SQL": "select * from t2 where a = 1 and c = 2 order by b, d",
"Plan": [
"IndexReader 0.01 root index:Selection",
"└─Selection 0.01 cop[tikv] eq(test.t2.c, 2)",
" └─IndexRangeScan 10.00 cop[tikv] table:t2, index:idx_a_b_c_d(a, b, c, d) range:[1,1], keep order:true, stats:pseudo"
]
},
{
"SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c = 1) or (a = 1 and b = 1 and c = 2) order by c",
"Plan": [
"IndexReader 0.03 root index:IndexRangeScan",
"└─IndexRangeScan 0.03 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 1,1 1 2], keep order:true, stats:pseudo",
]
},
{
"SQL": "select a, b, c from t1 where (a = 1 and b = 1 and c < 3) or (a = 1 and b = 1 and c > 6) order by c",
"Plan": [
"IndexReader 0.67 root index:IndexRangeScan",
"└─IndexRangeScan 0.67 cop[tikv] table:t1, index:idx_a_b_c(a, b, c) range:[1 1 -inf,1 1 3), (1 1 6,1 1 +inf], keep order:true, stats:pseudo",
]
},
{
"SQL": "select * from t2 where ((a = 1 and b = 1 and d < 3) or (a = 1 and b = 1 and d > 6)) and c = 3 order by d",
"Plan": [
"IndexReader 0.00 root index:Selection",
"└─Selection 0.00 cop[tikv] eq(test.t2.c, 3), or(and(eq(test.t2.a, 1), and(eq(test.t2.b, 1), lt(test.t2.d, 3))), and(eq(test.t2.a, 1), and(eq(test.t2.b, 1), gt(test.t2.d, 6))))",
" └─IndexRangeScan 10.00 cop[tikv] table:t2, index:idx_a_b_c_d(a, b, c, d) range:[1,1], keep order:true, stats:pseudo"
]
}
]
}
]
4 changes: 3 additions & 1 deletion planner/util/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ type AccessPath struct {
FullIdxColLens []int
IdxCols []*expression.Column
IdxColLens []int
Ranges []*ranger.Range
// ConstantCols indicates whether the column is constant under the given conditions for all index columns.
ConstantCols []bool
Ranges []*ranger.Range
// CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
// For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters.
CountAfterAccess float64
Expand Down
Loading