Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner, statistics: fix the inconsistent est when table has no stats (#52427) #52614

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/planner/cardinality/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":cardinality"],
flaky = True,
shard_count = 27,
shard_count = 28,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
36 changes: 36 additions & 0 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1310,3 +1310,39 @@ func TestSubsetIdxCardinality(t *testing.T) {
testKit.MustQuery(input[i]).Check(testkit.Rows(output[i].Result...))
}
}

func TestBuiltinInEstWithoutStats(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()

tk.MustExec("use test")
tk.MustExec("create table t(a int)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t values(1), (2), (3), (4), (5), (6), (7), (8), (9), (10)")
require.NoError(t, h.DumpStatsDeltaToKV(true))
is := dom.InfoSchema()
require.NoError(t, h.Update(is))

tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows(
"TableReader 0.08 root data:Selection",
"└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
))

h.Clear()
require.NoError(t, h.InitStatsLite(is))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows(
"TableReader 0.08 root data:Selection",
"└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
))

h.Clear()
require.NoError(t, h.InitStats(is))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows(
"TableReader 0.08 root data:Selection",
"└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
))
}
10 changes: 10 additions & 0 deletions pkg/planner/core/collect_column_stats_usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,21 @@ func (c *columnStatsUsageCollector) addHistNeededColumns(ds *DataSource) {
colIDSet := intset.NewFastIntSet()

for _, col := range columns {
// If the column is plan-generated one, Skip it.
// TODO: we may need to consider the ExtraHandle.
if col.ID < 0 {
continue
}
tblColID := model.TableItemID{TableID: ds.physicalTableID, ID: col.ID, IsIndex: false}
colIDSet.Insert(int(col.ID))
c.histNeededCols[tblColID] = true
}
for _, col := range ds.Columns {
// If the column is plan-generated one, Skip it.
// TODO: we may need to consider the ExtraHandle.
if col.ID < 0 {
continue
}
if !colIDSet.Has(int(col.ID)) && !col.Hidden {
tblColID := model.TableItemID{TableID: ds.physicalTableID, ID: col.ID, IsIndex: false}
if _, ok := c.histNeededCols[tblColID]; ok {
Expand Down
8 changes: 5 additions & 3 deletions pkg/statistics/handle/storage/read.go
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,9 @@ func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int {
if item.IsIndex {
_, loadNeeded = tbl.IndexIsLoadNeeded(item.ID)
} else {
_, loadNeeded = tbl.ColumnIsLoadNeeded(item.ID, true)
var analyzed bool
_, loadNeeded, analyzed = tbl.ColumnIsLoadNeeded(item.ID, true)
loadNeeded = loadNeeded && analyzed
}
if !loadNeeded {
statistics.HistogramNeededItems.Delete(item)
Expand All @@ -589,8 +591,8 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S
return nil
}
var colInfo *model.ColumnInfo
_, loadNeeded := tbl.ColumnIsLoadNeeded(col.ID, true)
if !loadNeeded {
_, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(col.ID, true)
if !loadNeeded || !analyzed {
statistics.HistogramNeededItems.Delete(col)
return nil
}
Expand Down
18 changes: 16 additions & 2 deletions pkg/statistics/handle/syncload/stats_syncload.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ func (s *statsSyncLoad) removeHistLoadedColumns(neededItems []model.StatsLoadIte
}
continue
}
_, loadNeeded := tbl.ColumnIsLoadNeeded(item.ID, item.FullLoad)
_, loadNeeded, _ := tbl.ColumnIsLoadNeeded(item.ID, item.FullLoad)
if loadNeeded {
remainedItems = append(remainedItems, item)
}
Expand Down Expand Up @@ -270,7 +270,7 @@ func (s *statsSyncLoad) handleOneItemTask(sctx sessionctx.Context, task *statsty
wrapper.idxInfo = tbl.ColAndIdxExistenceMap.GetIndex(item.ID)
}
} else {
col, loadNeeded := tbl.ColumnIsLoadNeeded(item.ID, task.Item.FullLoad)
col, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(item.ID, task.Item.FullLoad)
if !loadNeeded {
return result, nil
}
Expand All @@ -279,6 +279,20 @@ func (s *statsSyncLoad) handleOneItemTask(sctx sessionctx.Context, task *statsty
} else {
wrapper.colInfo = tbl.ColAndIdxExistenceMap.GetCol(item.ID)
}
// If this column is not analyzed yet and we don't have it in memory.
// We create a fake one for the pseudo estimation.
if loadNeeded && !analyzed {
wrapper.col = &statistics.Column{
PhysicalID: item.TableID,
Info: wrapper.colInfo,
Histogram: *statistics.NewHistogram(item.ID, 0, 0, 0, &wrapper.colInfo.FieldType, 0, 0),
IsHandle: tbl.IsPkIsHandle && mysql.HasPriKeyFlag(wrapper.colInfo.GetFlag()),
}
if s.updateCachedItem(item, wrapper.col, wrapper.idx, task.Item.FullLoad) {
return result, nil
}
return nil, nil
}
}
t := time.Now()
needUpdate := false
Expand Down
22 changes: 17 additions & 5 deletions pkg/statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -626,25 +626,37 @@ func (t *Table) GetStatsHealthy() (int64, bool) {
// The Column should be visible in the table and really has analyzed statistics in the stroage.
// Also, if the stats has been loaded into the memory, we also don't need to load it.
// We return the Column together with the checking result, to avoid accessing the map multiple times.
func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool) {
// The first bool is whether we have it in memory. The second bool is whether this column has stats in the system table or not.
func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool) {
if t.Pseudo {
return nil, false, false
}
col, ok := t.Columns[id]
hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false)

// If it's not analyzed yet. Don't need to load it.
// If it's not analyzed yet.
if !hasAnalyzed {
return nil, false
// If we don't have it in memory, we create a fake hist for pseudo estimation (see handleOneItemTask()).
if !ok {
// If we don't have this column. We skip it.
// It's something ridiculous. But it's possible that the stats don't have some ColumnInfo.
// We need to find a way to maintain it more correctly.
return nil, t.ColAndIdxExistenceMap.Has(id, false), false
}
// Otherwise we don't need to load it.
return nil, false, false
}

// Restore the condition from the simplified form:
// 1. !ok && hasAnalyzed => need load
// 2. ok && hasAnalyzed && fullLoad && !col.IsFullLoad => need load
// 3. ok && hasAnalyzed && !fullLoad && !col.statsInitialized => need load
if !ok || (fullLoad && !col.IsFullLoad()) || (!fullLoad && !col.statsInitialized) {
return col, true
return col, true, true
}

// Otherwise don't need load it.
return col, false
return col, false, true
}

// IndexIsLoadNeeded checks whether the index needs trigger the async/sync load.
Expand Down