Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: fix sync load fails after disabling lite init stats #54531

Merged
merged 7 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions pkg/parser/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,19 @@ func (t *TableInfo) Cols() []*ColumnInfo {
return publicColumns[0 : maxOffset+1]
}

// GetColumnByID finds the column by ID.
func (t *TableInfo) GetColumnByID(id int64) *ColumnInfo {
for _, col := range t.Columns {
if col.State != StatePublic {
continue
}
if col.ID == id {
return col
}
}
return nil
}

// FindIndexByName finds index by name.
func (t *TableInfo) FindIndexByName(idxName string) *IndexInfo {
for _, idx := range t.Indices {
Expand Down
42 changes: 27 additions & 15 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1315,31 +1315,43 @@ func TestBuiltinInEstWithoutStats(t *testing.T) {
h := dom.StatsHandle()

tk.MustExec("use test")
tk.MustExec("create table t(a int)")
tk.MustExec("create table t(a int, b int)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t values(1), (2), (3), (4), (5), (6), (7), (8), (9), (10)")
tk.MustExec("insert into t values(1,1), (2,2), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8), (9,9), (10,10)")
require.NoError(t, h.DumpStatsDeltaToKV(true))
is := dom.InfoSchema()
require.NoError(t, h.Update(context.Background(), is))

tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows(
expectedA := testkit.Rows(
"TableReader 0.08 root data:Selection",
"└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
))
)
expectedB := testkit.Rows(
"TableReader 0.08 root data:Selection",
"└─Selection 0.08 cop[tikv] in(test.t.b, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
)
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA)
// try again with other column
tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB)

h.Clear()
require.NoError(t, h.InitStatsLite(context.Background(), is))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows(
"TableReader 0.08 root data:Selection",
"└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA)
tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB)

h.Clear()
require.NoError(t, h.InitStats(context.Background(), is))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows(
"TableReader 8.00 root data:Selection",
"└─Selection 8.00 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)",
" └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo",
))
tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA)
tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB)
require.NoError(t, h.Update(context.Background(), is))
tbl, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
statsTbl, found := h.Get(tbl.Meta().ID)
require.True(t, found)
require.False(t, statsTbl.ColAndIdxExistenceMap.IsEmpty())
for _, col := range tbl.Cols() {
require.True(t, statsTbl.ColAndIdxExistenceMap.Has(col.ID, false))
require.False(t, statsTbl.ColAndIdxExistenceMap.HasAnalyzed(col.ID, false))
}
}
32 changes: 25 additions & 7 deletions pkg/statistics/handle/storage/read.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/infoschema"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/model"
Expand Down Expand Up @@ -559,14 +560,14 @@ func LoadHistogram(sctx sessionctx.Context, tableID int64, isIndex int, histID i
}

// LoadNeededHistograms will load histograms for those needed columns/indices.
func LoadNeededHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, loadFMSketch bool) (err error) {
func LoadNeededHistograms(sctx sessionctx.Context, statsHandle statstypes.StatsHandle, loadFMSketch bool) (err error) {
items := asyncload.AsyncLoadHistogramNeededItems.AllItems()
for _, item := range items {
if !item.IsIndex {
err = loadNeededColumnHistograms(sctx, statsCache, item.TableItemID, loadFMSketch, item.FullLoad)
err = loadNeededColumnHistograms(sctx, statsHandle, item.TableItemID, loadFMSketch, item.FullLoad)
} else {
// Index is always full load.
err = loadNeededIndexHistograms(sctx, statsCache, item.TableItemID, loadFMSketch)
err = loadNeededIndexHistograms(sctx, statsHandle, item.TableItemID, loadFMSketch)
}
if err != nil {
return err
Expand Down Expand Up @@ -602,8 +603,8 @@ func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int {
return reallyNeeded
}

func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) {
tbl, ok := statsCache.Get(col.TableID)
func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.StatsHandle, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) {
tbl, ok := statsHandle.Get(col.TableID)
if !ok {
return nil
}
Expand All @@ -613,7 +614,19 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
return nil
}
isUpdateColAndIdxExistenceMap := false
colInfo = tbl.ColAndIdxExistenceMap.GetCol(col.ID)
if colInfo == nil {
// Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats.
// so we have to get the column info from the domain.
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
tblInfo, ok := statsHandle.TableInfoByID(is, col.TableID)
if !ok {
return nil
}
colInfo = tblInfo.Meta().GetColumnByID(col.ID)
isUpdateColAndIdxExistenceMap = true
}
hg, _, statsVer, _, err := HistMetaFromStorageWithHighPriority(sctx, &col, colInfo)
if hg == nil || err != nil {
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
Expand Down Expand Up @@ -652,7 +665,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S
}
// Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions
// like `GetPartitionStats` called in `fmSketchFromStorage` would have modified the stats cache already.
tbl, ok = statsCache.Get(col.TableID)
tbl, ok = statsHandle.Get(col.TableID)
if !ok {
return nil
}
Expand All @@ -667,9 +680,14 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S
if statsVer != statistics.Version0 {
tbl.StatsVer = int(statsVer)
}
if isUpdateColAndIdxExistenceMap {
tbl.ColAndIdxExistenceMap.InsertCol(col.ID, colInfo, true)
}
} else if isUpdateColAndIdxExistenceMap {
tbl.ColAndIdxExistenceMap.InsertCol(col.ID, colInfo, false)
}
tbl.SetCol(col.ID, colHist)
statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil)
statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil)
asyncload.AsyncLoadHistogramNeededItems.Delete(col)
if col.IsSyncLoadFailed {
logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.",
Expand Down
2 changes: 2 additions & 0 deletions pkg/statistics/handle/syncload/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/config",
"//pkg/infoschema",
"//pkg/kv",
"//pkg/metrics",
"//pkg/parser/model",
Expand All @@ -16,6 +17,7 @@ go_library(
"//pkg/statistics",
"//pkg/statistics/handle/storage",
"//pkg/statistics/handle/types",
"//pkg/table",
"//pkg/types",
"//pkg/util",
"//pkg/util/intest",
Expand Down
21 changes: 16 additions & 5 deletions pkg/statistics/handle/syncload/stats_syncload.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/infoschema"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/parser/model"
Expand All @@ -32,6 +33,7 @@ import (
"github.com/pingcap/tidb/pkg/statistics"
"github.com/pingcap/tidb/pkg/statistics/handle/storage"
statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types"
"github.com/pingcap/tidb/pkg/table"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util"
"github.com/pingcap/tidb/pkg/util/intest"
Expand Down Expand Up @@ -303,6 +305,7 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err
if !ok {
return nil
}
var tblInfo table.Table
wrapper := &statsWrapper{}
if item.IsIndex {
index, loadNeeded := tbl.IndexIsLoadNeeded(item.ID)
Expand All @@ -321,8 +324,17 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err
}
if col != nil {
wrapper.colInfo = col.Info
} else if colInfo := tbl.ColAndIdxExistenceMap.GetCol(item.ID); colInfo != nil {
wrapper.colInfo = colInfo
} else {
wrapper.colInfo = tbl.ColAndIdxExistenceMap.GetCol(item.ID)
// Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats.
// so we have to get the column info from the domain.
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
tblInfo, ok = s.statsHandle.TableInfoByID(is, item.TableID)
if !ok {
return nil
}
wrapper.colInfo = tblInfo.Meta().GetColumnByID(item.ID)
}
// If this column is not analyzed yet and we don't have it in memory.
// We create a fake one for the pseudo estimation.
Expand Down Expand Up @@ -545,14 +557,13 @@ func (s *statsSyncLoad) updateCachedItem(item model.TableItemID, colHist *statis
}
tbl = tbl.Copy()
tbl.SetCol(item.ID, colHist)
// If the column is analyzed we refresh the map for the possible change.
if colHist.StatsAvailable() {
tbl.ColAndIdxExistenceMap.InsertCol(item.ID, colHist.Info, true)
}

// All the objects shares the same stats version. Update it here.
if colHist.StatsVer != statistics.Version0 {
tbl.StatsVer = statistics.Version0
}
// we have to refresh the map for the possible change to ensure that the map information is not missing.
tbl.ColAndIdxExistenceMap.InsertCol(item.ID, colHist.Info, colHist.StatsAvailable())
} else if item.IsIndex && idxHist != nil {
index := tbl.GetIdx(item.ID)
// - If the stats is fully loaded,
Expand Down
17 changes: 10 additions & 7 deletions pkg/statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -809,20 +809,23 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool
if t.Pseudo {
return nil, false, false
}
// when we use non-lite init stats, it cannot init the stats for common columns.
// so we need to foce to load the stats.
col, ok := t.columns[id]
if !ok {
return nil, true, true
}
hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false)

// If it's not analyzed yet.
if !hasAnalyzed {
// If we don't have it in memory, we create a fake hist for pseudo estimation (see handleOneItemTask()).
if !ok {
// If we don't have this column. We skip it.
// It's something ridiculous. But it's possible that the stats don't have some ColumnInfo.
// We need to find a way to maintain it more correctly.
return nil, t.ColAndIdxExistenceMap.Has(id, false), false
}
// It's something ridiculous. But it's possible that the stats don't have some ColumnInfo.
// We need to find a way to maintain it more correctly.
// Otherwise we don't need to load it.
return nil, false, false
result := t.ColAndIdxExistenceMap.Has(id, false)
// If the column is not in the ColAndIdxExistenceMap, we need to load it.
return nil, !result, !result
}

// Restore the condition from the simplified form:
Expand Down
3 changes: 0 additions & 3 deletions tests/integrationtest/r/executor/show.result
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,6 @@ Table Create Table
t CREATE TABLE `t` (
`created_at` datetime DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin /*T![ttl] TTL=`created_at` + INTERVAL 100 YEAR */ /*T![ttl] TTL_ENABLE='ON' */ /*T![ttl] TTL_JOB_INTERVAL='1d' */
show histograms_in_flight;
HistogramsInFlight
0
show open tables;
Database Table In_use Name_locked
show open tables in executor__show;
Expand Down
2 changes: 1 addition & 1 deletion tests/integrationtest/t/executor/show.test
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ create table t (created_at datetime) TTL = created_at + INTERVAL 100 YEAR TTL_JO
show create table t;

# TestShowHistogramsInFlight
show histograms_in_flight;
# show histograms_in_flight; // it is unstable.

# TestShowOpenTables
show open tables;
Expand Down