Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: update the scan-row-size calculation formula in model2 #38968

Merged
merged 5 commits into from
Nov 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions planner/core/plan_cost_ver1.go
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint
sessVars := p.ctx.GetSessionVars()
oomUseTmpStorage := variable.EnableTmpStorageOnOOM.Load()
memQuota := sessVars.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint
rowSize := getAvgRowSize(build.statsInfo(), build.Schema())
rowSize := getAvgRowSize(build.statsInfo(), build.Schema().Columns)
spill := oomUseTmpStorage && memQuota > 0 && rowSize*buildCnt > float64(memQuota) && p.storeTp != kv.TiFlash
// Cost of building hash table.
cpuFactor := sessVars.GetCPUFactor()
Expand Down Expand Up @@ -1049,7 +1049,7 @@ func (p *PhysicalSort) GetCost(count float64, schema *expression.Schema) float64

oomUseTmpStorage := variable.EnableTmpStorageOnOOM.Load()
memQuota := sessVars.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint
rowSize := getAvgRowSize(p.statsInfo(), schema)
rowSize := getAvgRowSize(p.statsInfo(), schema.Columns)
spill := oomUseTmpStorage && memQuota > 0 && rowSize*count > float64(memQuota)
diskCost := count * sessVars.GetDiskFactor() * rowSize
if !spill {
Expand Down
32 changes: 19 additions & 13 deletions planner/core/plan_cost_ver2.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func (p *PhysicalIndexScan) getPlanCostVer2(taskType property.TaskType, option *
}

rows := getCardinality(p, option.CostFlag)
rowSize := math.Max(p.getScanRowSize(), 2.0)
rowSize := math.Max(getAvgRowSize(p.stats, p.schema.Columns), 2.0) // consider all index columns
scanFactor := getTaskScanFactorVer2(p, taskType)

p.planCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
Expand All @@ -132,7 +132,13 @@ func (p *PhysicalTableScan) getPlanCostVer2(taskType property.TaskType, option *
}

rows := getCardinality(p, option.CostFlag)
rowSize := math.Max(p.getScanRowSize(), 2.0)
var rowSize float64
if p.StoreType == kv.TiKV {
rowSize = getAvgRowSize(p.stats, p.tblCols) // consider all columns if TiKV
} else { // TiFlash
rowSize = getAvgRowSize(p.stats, p.schema.Columns)
}
rowSize = math.Max(rowSize, 2.0)
scanFactor := getTaskScanFactorVer2(p, taskType)

p.planCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
Expand All @@ -155,7 +161,7 @@ func (p *PhysicalIndexReader) getPlanCostVer2(taskType property.TaskType, option
}

rows := getCardinality(p.indexPlan, option.CostFlag)
rowSize := getAvgRowSize(p.indexPlan.Stats(), p.indexPlan.Schema())
rowSize := getAvgRowSize(p.indexPlan.Stats(), p.indexPlan.Schema().Columns)
netFactor := getTaskNetFactorVer2(p, taskType)
concurrency := float64(p.ctx.GetSessionVars().DistSQLScanConcurrency())

Expand All @@ -180,7 +186,7 @@ func (p *PhysicalTableReader) getPlanCostVer2(taskType property.TaskType, option
}

rows := getCardinality(p.tablePlan, option.CostFlag)
rowSize := getAvgRowSize(p.tablePlan.Stats(), p.tablePlan.Schema())
rowSize := getAvgRowSize(p.tablePlan.Stats(), p.tablePlan.Schema().Columns)
netFactor := getTaskNetFactorVer2(p, taskType)
concurrency := float64(p.ctx.GetSessionVars().DistSQLScanConcurrency())
childType := property.CopSingleReadTaskType
Expand Down Expand Up @@ -284,7 +290,7 @@ func (p *PhysicalIndexMergeReader) getPlanCostVer2(taskType property.TaskType, o
var tableSideCost costVer2
if tablePath := p.tablePlan; tablePath != nil {
rows := getCardinality(tablePath, option.CostFlag)
rowSize := getAvgRowSize(tablePath.Stats(), tablePath.Schema())
rowSize := getAvgRowSize(tablePath.Stats(), tablePath.Schema().Columns)

tableNetCost := netCostVer2(option, rows, rowSize, netFactor)
tableChildCost, err := tablePath.getPlanCostVer2(taskType, option)
Expand All @@ -297,7 +303,7 @@ func (p *PhysicalIndexMergeReader) getPlanCostVer2(taskType property.TaskType, o
indexSideCost := make([]costVer2, 0, len(p.partialPlans))
for _, indexPath := range p.partialPlans {
rows := getCardinality(indexPath, option.CostFlag)
rowSize := getAvgRowSize(indexPath.Stats(), indexPath.Schema())
rowSize := getAvgRowSize(indexPath.Stats(), indexPath.Schema().Columns)

indexNetCost := netCostVer2(option, rows, rowSize, netFactor)
indexChildCost, err := indexPath.getPlanCostVer2(taskType, option)
Expand Down Expand Up @@ -329,7 +335,7 @@ func (p *PhysicalSort) getPlanCostVer2(taskType property.TaskType, option *PlanC
}

rows := math.Max(getCardinality(p.children[0], option.CostFlag), 1)
rowSize := getAvgRowSize(p.statsInfo(), p.Schema())
rowSize := getAvgRowSize(p.statsInfo(), p.Schema().Columns)
cpuFactor := getTaskCPUFactorVer2(p, taskType)
memFactor := getTaskMemFactorVer2(p, taskType)
diskFactor := defaultVer2Factors.TiDBDisk
Expand Down Expand Up @@ -378,7 +384,7 @@ func (p *PhysicalTopN) getPlanCostVer2(taskType property.TaskType, option *PlanC

rows := getCardinality(p.children[0], option.CostFlag)
N := math.Max(1, float64(p.Count+p.Offset))
rowSize := getAvgRowSize(p.statsInfo(), p.Schema())
rowSize := getAvgRowSize(p.statsInfo(), p.Schema().Columns)
cpuFactor := getTaskCPUFactorVer2(p, taskType)
memFactor := getTaskMemFactorVer2(p, taskType)

Expand Down Expand Up @@ -429,7 +435,7 @@ func (p *PhysicalHashAgg) getPlanCostVer2(taskType property.TaskType, option *Pl

inputRows := getCardinality(p.children[0], option.CostFlag)
outputRows := getCardinality(p, option.CostFlag)
outputRowSize := getAvgRowSize(p.Stats(), p.Schema())
outputRowSize := getAvgRowSize(p.Stats(), p.Schema().Columns)
cpuFactor := getTaskCPUFactorVer2(p, taskType)
memFactor := getTaskMemFactorVer2(p, taskType)
concurrency := float64(p.ctx.GetSessionVars().HashAggFinalConcurrency())
Expand Down Expand Up @@ -501,7 +507,7 @@ func (p *PhysicalHashJoin) getPlanCostVer2(taskType property.TaskType, option *P
}
buildRows := getCardinality(build, option.CostFlag)
probeRows := getCardinality(probe, option.CostFlag)
buildRowSize := getAvgRowSize(build.Stats(), build.Schema())
buildRowSize := getAvgRowSize(build.Stats(), build.Schema().Columns)
tidbConcurrency := float64(p.Concurrency)
mppConcurrency := float64(3) // TODO: remove this empirical value
cpuFactor := getTaskCPUFactorVer2(p, taskType)
Expand Down Expand Up @@ -645,7 +651,7 @@ func (p *PhysicalExchangeReceiver) getPlanCostVer2(taskType property.TaskType, o
}

rows := getCardinality(p, option.CostFlag)
rowSize := getAvgRowSize(p.stats, p.Schema())
rowSize := getAvgRowSize(p.stats, p.Schema().Columns)
netFactor := getTaskNetFactorVer2(p, taskType)
isBCast := false
if sender, ok := p.children[0].(*PhysicalExchangeSender); ok {
Expand Down Expand Up @@ -678,7 +684,7 @@ func (p *PointGetPlan) getPlanCostVer2(taskType property.TaskType, option *PlanC
p.planCostInit = true
return zeroCostVer2, nil
}
rowSize := getAvgRowSize(p.stats, p.schema)
rowSize := getAvgRowSize(p.stats, p.schema.Columns)
netFactor := getTaskNetFactorVer2(p, taskType)

p.planCostVer2 = netCostVer2(option, 1, rowSize, netFactor)
Expand All @@ -698,7 +704,7 @@ func (p *BatchPointGetPlan) getPlanCostVer2(taskType property.TaskType, option *
return zeroCostVer2, nil
}
rows := getCardinality(p, option.CostFlag)
rowSize := getAvgRowSize(p.stats, p.schema)
rowSize := getAvgRowSize(p.stats, p.schema.Columns)
netFactor := getTaskNetFactorVer2(p, taskType)

p.planCostVer2 = netCostVer2(option, rows, rowSize, netFactor)
Expand Down
44 changes: 42 additions & 2 deletions planner/core/plan_cost_ver2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,55 @@ func TestCostModelShowFormula(t *testing.T) {
actual := make([][]interface{}, 0, len(plan))
for _, row := range plan {
actual = append(actual, []interface{}{row[0], row[3]}) // id,costFormula
fmt.Println(actual)
}
require.Equal(t, actual, [][]interface{}{
{"TableReader_7", "((Selection_6) + (net(2*rowsize(16)*tidb_kv_net_factor(3.96))))/15"},
{"└─Selection_6", "(cpu(3*filters(1)*tikv_cpu_factor(49.9))) + (TableFullScan_5)"},
{" └─TableFullScan_5", "scan(3*logrowsize(29)*tikv_scan_factor(40.7))"},
{" └─TableFullScan_5", "scan(3*logrowsize(32)*tikv_scan_factor(40.7))"},
})
}

func TestCostModelVer2ScanRowSize(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`create table t (pk int, a int, b int, c int, d int, primary key(pk), index ab(a, b), index abc(a, b, c))`)
tk.MustExec("insert into t values (1, 1, 1, 1, 1)")
tk.MustExec(`set @@tidb_cost_model_version=2`)

cases := []struct {
query string
scanFormula string
}{
// index scan row-size on idx_ab is always equal to row-size(index_ab)
{"select a from t use index(ab) where a=1", "scan(1*logrowsize(32)*tikv_scan_factor(40.7))"},
{"select a, b from t use index(ab) where a=1", "scan(1*logrowsize(32)*tikv_scan_factor(40.7))"},
{"select b from t use index(ab) where a=1 and b=1", "scan(1*logrowsize(32)*tikv_scan_factor(40.7))"},
// index scan row-size on idx_abc is always equal to row-size(index_abc)
{"select a from t use index(abc) where a=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
{"select a from t use index(abc) where a=1 and b=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
{"select a, b from t use index(abc) where a=1 and b=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
{"select a, b, c from t use index(abc) where a=1 and b=1 and c=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
// table scan row-size is always equal to row-size(*)
{"select a from t use index(primary) where a=1", "scan(1*logrowsize(80)*tikv_scan_factor(40.7))"},
{"select a, d from t use index(primary) where a=1", "scan(1*logrowsize(80)*tikv_scan_factor(40.7))"},
{"select * from t use index(primary) where a=1", "scan(1*logrowsize(80)*tikv_scan_factor(40.7))"},
}
for _, c := range cases {
rs := tk.MustQuery("explain analyze format=true_card_cost " + c.query).Rows()
scan := rs[len(rs)-1]
formula := scan[3]
require.Equal(t, formula, c.scanFormula)
}

tk.MustQuery("explain select a from t where a=1").Check(testkit.Rows(
`IndexReader_6 10.00 root index:IndexRangeScan_5`, // use idx_ab automatically since it has the smallest row-size in all access paths.
`└─IndexRangeScan_5 10.00 cop[tikv] table:t, index:ab(a, b) range:[1,1], keep order:false, stats:pseudo`))
tk.MustQuery("explain select a, b, c from t where a=1").Check(testkit.Rows(
`IndexReader_6 10.00 root index:IndexRangeScan_5`, // use idx_abc automatically
`└─IndexRangeScan_5 10.00 cop[tikv] table:t, index:abc(a, b, c) range:[1,1], keep order:false, stats:pseudo`))
}

func TestCostModelTraceVer2(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
Expand Down
5 changes: 2 additions & 3 deletions planner/core/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,12 +297,11 @@ func (p *PhysicalIndexJoin) attach2Task(tasks ...task) task {
return t
}

func getAvgRowSize(stats *property.StatsInfo, schema *expression.Schema) (size float64) {
func getAvgRowSize(stats *property.StatsInfo, cols []*expression.Column) (size float64) {
if stats.HistColl != nil {
size = stats.HistColl.GetAvgRowSizeListInDisk(schema.Columns)
size = stats.HistColl.GetAvgRowSizeListInDisk(cols)
} else {
// Estimate using just the type info.
cols := schema.Columns
for _, col := range cols {
size += float64(chunk.EstimateTypeWidth(col.GetType()))
}
Expand Down
Loading