Skip to content

Commit

Permalink
statistics: fix sync load fails after disabling lite init stats (#54531)
Browse files Browse the repository at this point in the history
close #54532
  • Loading branch information
hawkingrei authored Aug 27, 2024
1 parent 94666a2 commit 87244ed
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 38 deletions.
13 changes: 13 additions & 0 deletions pkg/parser/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,19 @@ func (t *TableInfo) Cols() []*ColumnInfo {
return publicColumns[0 : maxOffset+1]
}

// GetColumnByID finds the column by ID.
func (t *TableInfo) GetColumnByID(id int64) *ColumnInfo {
for _, col := range t.Columns {
if col.State != StatePublic {
continue
}
if col.ID == id {
return col
}
}
return nil
}

// FindIndexByName finds index by name.
func (t *TableInfo) FindIndexByName(idxName string) *IndexInfo {
for _, idx := range t.Indices {
Expand Down
42 changes: 27 additions & 15 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1315,31 +1315,43 @@ func TestBuiltinInEstWithoutStats(t *testing.T) {
h := dom.StatsHandle()

tk.MustExec("use test")
tk.MustExec("create table t(a int)")
tk.MustExec("create table t(a int, b int)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t values(1), (2), (3), (4), (5), (6), (7), (8), (9), (10)")
tk.MustExec("insert into t values(1,1), (2,2), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8), (9,9), (10,10)")
require.NoError(t, h.DumpStatsDeltaToKV(true))
is := dom.InfoSchema()
require.NoError(t, h.Update(context.Background(), is))

tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows(
expectedA := testkit.Rows(
"TableReader 0.08 root data:Selection",
"└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
))
)
expectedB := testkit.Rows(
"TableReader 0.08 root data:Selection",
"└─Selection 0.08 cop[tikv] in(test.t.b, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
)
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA)
// try again with other column
tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB)

h.Clear()
require.NoError(t, h.InitStatsLite(context.Background(), is))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows(
"TableReader 0.08 root data:Selection",
"└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA)
tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB)

h.Clear()
require.NoError(t, h.InitStats(context.Background(), is))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows(
"TableReader 8.00 root data:Selection",
"└─Selection 8.00 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA)
tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB)
require.NoError(t, h.Update(context.Background(), is))
tbl, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
statsTbl, found := h.Get(tbl.Meta().ID)
require.True(t, found)
require.False(t, statsTbl.ColAndIdxExistenceMap.IsEmpty())
for _, col := range tbl.Cols() {
require.True(t, statsTbl.ColAndIdxExistenceMap.Has(col.ID, false))
require.False(t, statsTbl.ColAndIdxExistenceMap.HasAnalyzed(col.ID, false))
}
}
32 changes: 25 additions & 7 deletions pkg/statistics/handle/storage/read.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/infoschema"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/model"
Expand Down Expand Up @@ -559,14 +560,14 @@ func LoadHistogram(sctx sessionctx.Context, tableID int64, isIndex int, histID i
}

// LoadNeededHistograms will load histograms for those needed columns/indices.
func LoadNeededHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, loadFMSketch bool) (err error) {
func LoadNeededHistograms(sctx sessionctx.Context, statsHandle statstypes.StatsHandle, loadFMSketch bool) (err error) {
items := asyncload.AsyncLoadHistogramNeededItems.AllItems()
for _, item := range items {
if !item.IsIndex {
err = loadNeededColumnHistograms(sctx, statsCache, item.TableItemID, loadFMSketch, item.FullLoad)
err = loadNeededColumnHistograms(sctx, statsHandle, item.TableItemID, loadFMSketch, item.FullLoad)
} else {
// Index is always full load.
err = loadNeededIndexHistograms(sctx, statsCache, item.TableItemID, loadFMSketch)
err = loadNeededIndexHistograms(sctx, statsHandle, item.TableItemID, loadFMSketch)
}
if err != nil {
return err
Expand Down Expand Up @@ -602,8 +603,8 @@ func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int {
return reallyNeeded
}

func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) {
tbl, ok := statsCache.Get(col.TableID)
func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.StatsHandle, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) {
tbl, ok := statsHandle.Get(col.TableID)
if !ok {
return nil
}
Expand All @@ -613,7 +614,19 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
return nil
}
isUpdateColAndIdxExistenceMap := false
colInfo = tbl.ColAndIdxExistenceMap.GetCol(col.ID)
if colInfo == nil {
// Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats.
// so we have to get the column info from the domain.
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
tblInfo, ok := statsHandle.TableInfoByID(is, col.TableID)
if !ok {
return nil
}
colInfo = tblInfo.Meta().GetColumnByID(col.ID)
isUpdateColAndIdxExistenceMap = true
}
hg, _, statsVer, _, err := HistMetaFromStorageWithHighPriority(sctx, &col, colInfo)
if hg == nil || err != nil {
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
Expand Down Expand Up @@ -652,7 +665,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S
}
// Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions
// like `GetPartitionStats` called in `fmSketchFromStorage` would have modified the stats cache already.
tbl, ok = statsCache.Get(col.TableID)
tbl, ok = statsHandle.Get(col.TableID)
if !ok {
return nil
}
Expand All @@ -667,9 +680,14 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S
if statsVer != statistics.Version0 {
tbl.StatsVer = int(statsVer)
}
if isUpdateColAndIdxExistenceMap {
tbl.ColAndIdxExistenceMap.InsertCol(col.ID, colInfo, true)
}
} else if isUpdateColAndIdxExistenceMap {
tbl.ColAndIdxExistenceMap.InsertCol(col.ID, colInfo, false)
}
tbl.SetCol(col.ID, colHist)
statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil)
statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil)
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
if col.IsSyncLoadFailed {
logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.",
Expand Down
2 changes: 2 additions & 0 deletions pkg/statistics/handle/syncload/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/config",
"//pkg/infoschema",
"//pkg/kv",
"//pkg/metrics",
"//pkg/parser/model",
Expand All @@ -16,6 +17,7 @@ go_library(
"//pkg/statistics",
"//pkg/statistics/handle/storage",
"//pkg/statistics/handle/types",
"//pkg/table",
"//pkg/types",
"//pkg/util",
"//pkg/util/intest",
Expand Down
21 changes: 16 additions & 5 deletions pkg/statistics/handle/syncload/stats_syncload.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/infoschema"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/parser/model"
Expand All @@ -32,6 +33,7 @@ import (
"github.com/pingcap/tidb/pkg/statistics"
"github.com/pingcap/tidb/pkg/statistics/handle/storage"
statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types"
"github.com/pingcap/tidb/pkg/table"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util"
"github.com/pingcap/tidb/pkg/util/intest"
Expand Down Expand Up @@ -303,6 +305,7 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err
if !ok {
return nil
}
var tblInfo table.Table
wrapper := &statsWrapper{}
if item.IsIndex {
index, loadNeeded := tbl.IndexIsLoadNeeded(item.ID)
Expand All @@ -321,8 +324,17 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err
}
if col != nil {
wrapper.colInfo = col.Info
} else if colInfo := tbl.ColAndIdxExistenceMap.GetCol(item.ID); colInfo != nil {
wrapper.colInfo = colInfo
} else {
wrapper.colInfo = tbl.ColAndIdxExistenceMap.GetCol(item.ID)
// Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats.
// so we have to get the column info from the domain.
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
tblInfo, ok = s.statsHandle.TableInfoByID(is, item.TableID)
if !ok {
return nil
}
wrapper.colInfo = tblInfo.Meta().GetColumnByID(item.ID)
}
// If this column is not analyzed yet and we don't have it in memory.
// We create a fake one for the pseudo estimation.
Expand Down Expand Up @@ -545,14 +557,13 @@ func (s *statsSyncLoad) updateCachedItem(item model.TableItemID, colHist *statis
}
tbl = tbl.Copy()
tbl.SetCol(item.ID, colHist)
// If the column is analyzed we refresh the map for the possible change.
if colHist.StatsAvailable() {
tbl.ColAndIdxExistenceMap.InsertCol(item.ID, colHist.Info, true)
}

// All the objects shares the same stats version. Update it here.
if colHist.StatsVer != statistics.Version0 {
tbl.StatsVer = statistics.Version0
}
// we have to refresh the map for the possible change to ensure that the map information is not missing.
tbl.ColAndIdxExistenceMap.InsertCol(item.ID, colHist.Info, colHist.StatsAvailable())
} else if item.IsIndex && idxHist != nil {
index := tbl.GetIdx(item.ID)
// - If the stats is fully loaded,
Expand Down
17 changes: 10 additions & 7 deletions pkg/statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -809,20 +809,23 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool
if t.Pseudo {
return nil, false, false
}
// when we use non-lite init stats, it cannot init the stats for common columns.
// so we need to foce to load the stats.
col, ok := t.columns[id]
if !ok {
return nil, true, true
}
hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false)

// If it's not analyzed yet.
if !hasAnalyzed {
// If we don't have it in memory, we create a fake hist for pseudo estimation (see handleOneItemTask()).
if !ok {
// If we don't have this column. We skip it.
// It's something ridiculous. But it's possible that the stats don't have some ColumnInfo.
// We need to find a way to maintain it more correctly.
return nil, t.ColAndIdxExistenceMap.Has(id, false), false
}
// It's something ridiculous. But it's possible that the stats don't have some ColumnInfo.
// We need to find a way to maintain it more correctly.
// Otherwise we don't need to load it.
return nil, false, false
result := t.ColAndIdxExistenceMap.Has(id, false)
// If the column is not in the ColAndIdxExistenceMap, we need to load it.
return nil, !result, !result
}

// Restore the condition from the simplified form:
Expand Down
3 changes: 0 additions & 3 deletions tests/integrationtest/r/executor/show.result
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,6 @@ Table Create Table
t CREATE TABLE `t` (
`created_at` datetime DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin /*T![ttl] TTL=`created_at` + INTERVAL 100 YEAR */ /*T![ttl] TTL_ENABLE='ON' */ /*T![ttl] TTL_JOB_INTERVAL='1d' */
show histograms_in_flight;
HistogramsInFlight
0
show open tables;
Database Table In_use Name_locked
show open tables in executor__show;
Expand Down
2 changes: 1 addition & 1 deletion tests/integrationtest/t/executor/show.test
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ create table t (created_at datetime) TTL = created_at + INTERVAL 100 YEAR TTL_JO
show create table t;

# TestShowHistogramsInFlight
show histograms_in_flight;
# show histograms_in_flight; // it is unstable.

# TestShowOpenTables
show open tables;
Expand Down

0 comments on commit 87244ed

Please sign in to comment.