Skip to content

Commit

Permalink
planner: avoid double scan for index prefix col is (not) null (#38555)
Browse files Browse the repository at this point in the history
ref #21145
  • Loading branch information
xuyifangreeneyes authored Oct 24, 2022
1 parent 7596b03 commit 64051f9
Show file tree
Hide file tree
Showing 16 changed files with 599 additions and 116 deletions.
14 changes: 14 additions & 0 deletions executor/set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,20 @@ func TestSetVar(t *testing.T) {
tk.MustExec("set global tidb_auto_analyze_partition_batch_size = 9999")
tk.MustQuery("select @@global.tidb_auto_analyze_partition_batch_size").Check(testkit.Rows("1024")) // max value is 1024

// test variable 'tidb_opt_prefix_index_single_scan'
// global scope
tk.MustQuery("select @@global.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1")) // default value
tk.MustExec("set global tidb_opt_prefix_index_single_scan = 0")
tk.MustQuery("select @@global.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("0"))
tk.MustExec("set global tidb_opt_prefix_index_single_scan = 1")
tk.MustQuery("select @@global.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1"))
// session scope
tk.MustQuery("select @@session.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1")) // default value
tk.MustExec("set session tidb_opt_prefix_index_single_scan = 0")
tk.MustQuery("select @@session.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("0"))
tk.MustExec("set session tidb_opt_prefix_index_single_scan = 1")
tk.MustQuery("select @@session.tidb_opt_prefix_index_single_scan").Check(testkit.Rows("1"))

// test tidb_opt_range_max_size
tk.MustQuery("select @@tidb_opt_range_max_size").Check(testkit.Rows("67108864"))
tk.MustExec("set global tidb_opt_range_max_size = -1")
Expand Down
2 changes: 1 addition & 1 deletion planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
cop.commonHandleCols = ds.commonHandleCols
}
is.initSchema(append(path.FullIdxCols, ds.commonHandleCols...), cop.tablePlan != nil)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens)
if maxOneRow {
// Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger
// than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect
Expand Down
93 changes: 73 additions & 20 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1118,7 +1118,7 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c

func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) (indexPlan PhysicalPlan) {
is := ds.getOriginalPhysicalIndexScan(prop, path, false, false)
// TODO: Consider using isCoveringIndex() to avoid another TableRead
// TODO: Consider using isIndexCoveringColumns() to avoid another TableRead
indexConds := path.IndexFilters
if indexConds != nil {
var selectivity float64
Expand Down Expand Up @@ -1281,33 +1281,80 @@ func extractFiltersForIndexMerge(sc *stmtctx.StatementContext, client kv.Client,
return
}

func indexCoveringCol(col *expression.Column, indexCols []*expression.Column, idxColLens []int) bool {
func isIndexColsCoveringCol(col *expression.Column, indexCols []*expression.Column, idxColLens []int, ignoreLen bool) bool {
for i, indexCol := range indexCols {
isFullLen := idxColLens[i] == types.UnspecifiedLength || idxColLens[i] == col.RetType.GetFlen()
if indexCol != nil && col.EqualByExprAndID(nil, indexCol) && isFullLen {
if indexCol == nil || !col.EqualByExprAndID(nil, indexCol) {
continue
}
if ignoreLen || idxColLens[i] == types.UnspecifiedLength || idxColLens[i] == col.RetType.GetFlen() {
return true
}
}
return false
}

func (ds *DataSource) isCoveringIndex(columns, indexColumns []*expression.Column, idxColLens []int, tblInfo *model.TableInfo) bool {
func (ds *DataSource) indexCoveringColumn(column *expression.Column, indexColumns []*expression.Column, idxColLens []int, ignoreLen bool) bool {
if ds.tableInfo.PKIsHandle && mysql.HasPriKeyFlag(column.RetType.GetFlag()) {
return true
}
if column.ID == model.ExtraHandleID {
return true
}
coveredByPlainIndex := isIndexColsCoveringCol(column, indexColumns, idxColLens, ignoreLen)
coveredByClusteredIndex := isIndexColsCoveringCol(column, ds.commonHandleCols, ds.commonHandleLens, ignoreLen)
if !coveredByPlainIndex && !coveredByClusteredIndex {
return false
}
isClusteredNewCollationIdx := collate.NewCollationEnabled() &&
column.GetType().EvalType() == types.ETString &&
!mysql.HasBinaryFlag(column.GetType().GetFlag())
if !coveredByPlainIndex && coveredByClusteredIndex && isClusteredNewCollationIdx && ds.table.Meta().CommonHandleVersion == 0 {
return false
}
return true
}

func (ds *DataSource) isIndexCoveringColumns(columns, indexColumns []*expression.Column, idxColLens []int) bool {
for _, col := range columns {
if tblInfo.PKIsHandle && mysql.HasPriKeyFlag(col.RetType.GetFlag()) {
continue
if !ds.indexCoveringColumn(col, indexColumns, idxColLens, false) {
return false
}
if col.ID == model.ExtraHandleID {
continue
}
return true
}

func (ds *DataSource) isIndexCoveringCondition(condition expression.Expression, indexColumns []*expression.Column, idxColLens []int) bool {
switch v := condition.(type) {
case *expression.Column:
return ds.indexCoveringColumn(v, indexColumns, idxColLens, false)
case *expression.ScalarFunction:
// Even if the index only contains prefix `col`, the index can cover `col is null`.
if v.FuncName.L == ast.IsNull {
if col, ok := v.GetArgs()[0].(*expression.Column); ok {
return ds.indexCoveringColumn(col, indexColumns, idxColLens, true)
}
}
coveredByPlainIndex := indexCoveringCol(col, indexColumns, idxColLens)
coveredByClusteredIndex := indexCoveringCol(col, ds.commonHandleCols, ds.commonHandleLens)
if !coveredByPlainIndex && !coveredByClusteredIndex {
return false
for _, arg := range v.GetArgs() {
if !ds.isIndexCoveringCondition(arg, indexColumns, idxColLens) {
return false
}
}
isClusteredNewCollationIdx := collate.NewCollationEnabled() &&
col.GetType().EvalType() == types.ETString &&
!mysql.HasBinaryFlag(col.GetType().GetFlag())
if !coveredByPlainIndex && coveredByClusteredIndex && isClusteredNewCollationIdx && ds.table.Meta().CommonHandleVersion == 0 {
return true
}
return true
}

func (ds *DataSource) isSingleScan(indexColumns []*expression.Column, idxColLens []int) bool {
if !ds.ctx.GetSessionVars().OptPrefixIndexSingleScan || ds.colsRequiringFullLen == nil {
// ds.colsRequiringFullLen is set at (*DataSource).PruneColumns. In some cases we don't reach (*DataSource).PruneColumns
// and ds.colsRequiringFullLen is nil, so we fall back to ds.isIndexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens).
return ds.isIndexCoveringColumns(ds.schema.Columns, indexColumns, idxColLens)
}
if !ds.isIndexCoveringColumns(ds.colsRequiringFullLen, indexColumns, idxColLens) {
return false
}
for _, cond := range ds.allConds {
if !ds.isIndexCoveringCondition(cond, indexColumns, idxColLens) {
return false
}
}
Expand Down Expand Up @@ -1575,11 +1622,17 @@ func matchIndicesProp(idxCols []*expression.Column, colLens []int, propItems []p
return true
}

func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Expression, indexColumns []*expression.Column, idxColLens []int,
table *model.TableInfo) (indexConds, tableConds []expression.Expression) {
func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Expression, indexColumns []*expression.Column,
idxColLens []int) (indexConds, tableConds []expression.Expression) {
var indexConditions, tableConditions []expression.Expression
for _, cond := range conditions {
if ds.isCoveringIndex(expression.ExtractColumns(cond), indexColumns, idxColLens, table) {
var covered bool
if ds.ctx.GetSessionVars().OptPrefixIndexSingleScan {
covered = ds.isIndexCoveringCondition(cond, indexColumns, idxColLens)
} else {
covered = ds.isIndexCoveringColumns(expression.ExtractColumns(cond), indexColumns, idxColLens)
}
if covered {
indexConditions = append(indexConditions, cond)
} else {
tableConditions = append(tableConditions, cond)
Expand Down
57 changes: 57 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7528,6 +7528,10 @@ func TestCorColRangeWithRangeMaxSize(t *testing.T) {
tk.MustExec("insert into t3 values (2), (4)")
tk.MustExec("insert into mysql.opt_rule_blacklist value(\"decorrelate\")")
tk.MustExec("admin reload opt_rule_blacklist")
defer func() {
tk.MustExec("delete from mysql.opt_rule_blacklist where name = \"decorrelate\"")
tk.MustExec("admin reload opt_rule_blacklist")
}()

// Correlated column in index range.
tk.MustExec("set @@tidb_opt_range_max_size=1000")
Expand Down Expand Up @@ -7693,3 +7697,56 @@ func TestOuterJoinEliminationForIssue18216(t *testing.T) {
tk.MustExec("select group_concat(c order by (select group_concat(c order by a) from t2 where a=t1.a)) from t1; ")
tk.MustQuery("select group_concat(c order by (select group_concat(c order by c) from t2 where a=t1.a), c desc) from t1;").Check(testkit.Rows("2,1,4,3"))
}

func TestNullConditionForPrefixIndex(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`CREATE TABLE t1 (
id char(1) DEFAULT NULL,
c1 varchar(255) DEFAULT NULL,
c2 text DEFAULT NULL,
KEY idx1 (c1),
KEY idx2 (c1,c2(5))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin`)
tk.MustExec("create table t2(a int, b varchar(10), index idx(b(5)))")
tk.MustExec("set tidb_opt_prefix_index_single_scan = 1")
tk.MustExec("insert into t1 values ('a', '0xfff', '111111'), ('b', '0xfff', '222222'), ('c', '0xfff', ''), ('d', '0xfff', null)")
tk.MustExec("insert into t2 values (1, 'aaaaaa'), (2, 'bbb'), (3, ''), (4, null)")

var input []string
var output []struct {
SQL string
Plan []string
Result []string
}
integrationSuiteData := core.GetIntegrationSuiteData()
integrationSuiteData.LoadTestCases(t, &input, &output)
for i, tt := range input {
testdata.OnRecord(func() {
output[i].SQL = tt
output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery("explain format='brief' " + tt).Rows())
output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(tt).Sort().Rows())
})
tk.MustQuery("explain format='brief' " + tt).Check(testkit.Rows(output[i].Plan...))
tk.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...))
}

// test plan cache
tk.MustExec(`set tidb_enable_prepared_plan_cache=1`)
tk.MustExec("set @@tidb_enable_collect_execution_info=0")
tk.MustExec("prepare stmt from 'select count(1) from t1 where c1 = ? and c2 is not null'")
tk.MustExec("set @a = '0xfff'")
tk.MustQuery("execute stmt using @a").Check(testkit.Rows("3"))
tk.MustQuery("execute stmt using @a").Check(testkit.Rows("3"))
tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("1"))
tk.MustQuery("execute stmt using @a").Check(testkit.Rows("3"))
tkProcess := tk.Session().ShowProcess()
ps := []*util.ProcessInfo{tkProcess}
tk.Session().SetSessionManager(&testkit.MockSessionManager{PS: ps})
tk.MustQuery(fmt.Sprintf("explain for connection %d", tkProcess.ID)).Check(testkit.Rows(
"StreamAgg_18 1.00 root funcs:count(Column#7)->Column#5",
"└─IndexReader_19 1.00 root index:StreamAgg_9",
" └─StreamAgg_9 1.00 cop[tikv] funcs:count(1)->Column#7",
" └─IndexRangeScan_17 99.90 cop[tikv] table:t1, index:idx2(c1, c2) range:[\"0xfff\" -inf,\"0xfff\" +inf], keep order:false, stats:pseudo"))
}
8 changes: 6 additions & 2 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1220,6 +1220,10 @@ type DataSource struct {
// contain unique index and the first field is tidb_shard(),
// such as (tidb_shard(a), a ...), the fields are more than 2
containExprPrefixUk bool

// colsRequiringFullLen is the columns that must be fetched with full length.
// It is used to decide whether single scan is enough when reading from an index.
colsRequiringFullLen []*expression.Column
}

// ExtractCorrelatedCols implements LogicalPlan interface.
Expand Down Expand Up @@ -1343,7 +1347,7 @@ func (ds *DataSource) Convert2Gathers() (gathers []LogicalPlan) {
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index)
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.schema.Columns, path.Index)
// If index columns can cover all of the needed columns, we can use a IndexGather + IndexScan.
if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) {
if ds.isSingleScan(path.FullIdxCols, path.FullIdxColLens) {
gathers = append(gathers, ds.buildIndexGather(path))
}
// TODO: If index columns can not cover the schema, use IndexLookUpGather.
Expand Down Expand Up @@ -1548,7 +1552,7 @@ func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, _ []expression
}
}
var indexFilters []expression.Expression
indexFilters, path.TableFilters = ds.splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
indexFilters, path.TableFilters = ds.splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens)
path.IndexFilters = append(path.IndexFilters, indexFilters...)
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
Expand Down
8 changes: 8 additions & 0 deletions planner/core/rule_column_pruning.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,14 @@ func (ds *DataSource) PruneColumns(parentUsedCols []*expression.Column, opt *log

originSchemaColumns := ds.schema.Columns
originColumns := ds.Columns

ds.colsRequiringFullLen = make([]*expression.Column, 0, len(used))
for i, col := range ds.schema.Columns {
if used[i] || (ds.containExprPrefixUk && expression.GcColumnExprIsTidbShard(col.VirtualExpr)) {
ds.colsRequiringFullLen = append(ds.colsRequiringFullLen, col)
}
}

for i := len(used) - 1; i >= 0; i-- {
if !used[i] && !exprUsed[i] {
// If ds has a shard index, and the column is generated column by `tidb_shard()`
Expand Down
2 changes: 1 addition & 1 deletion planner/core/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func (ds *DataSource) derivePathStatsAndTryHeuristics() error {
path.IsSingleScan = true
} else {
ds.deriveIndexPathStats(path, ds.pushedDownConds, false)
path.IsSingleScan = ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
path.IsSingleScan = ds.isSingleScan(path.FullIdxCols, path.FullIdxColLens)
}
// Try some heuristic rules to select access path.
if len(path.Ranges) == 0 {
Expand Down
17 changes: 17 additions & 0 deletions planner/core/testdata/integration_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -1011,5 +1011,22 @@
"set @@tidb_opt_range_max_size = 300",
"explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.e where t1.b > t2.f and t1.b < t2.f + 10"
]
},
{
"name": "TestNullConditionForPrefixIndex",
"cases": [
"select count(1) from t1 where c1 = '0xfff' and c2 is not null",
"select count(1) from t1 where c1 = '0xfff' and c2 is null",
"select count(1) from t1 where c1 >= '0xfff' and c2 is not null",
"select count(1) from t1 where c1 >= '0xfff' and c2 is null",
"select count(1) from t1 where c1 = '0xfff' and (c2 + 1) is not null",
"select count(1) from t1 where c1 = '0xfff' and (c2 + 1) is null",
"select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is not null",
"select c2 from t1 use index(idx2) where c1 = '0xfff' and c2 is null",
"select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is not null",
"select c2 from t1 use index(idx2) where c1 >= '0xfff' and c2 is null",
"select b from t2 use index(idx) where b is not null",
"select b from t2 use index(idx) where b is null"
]
}
]
Loading

0 comments on commit 64051f9

Please sign in to comment.