diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index 8504e18f3e53a..e78c2b41a14dd 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -244,15 +244,20 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache stats Flag: row.GetInt64(10), StatsVer: statsVer, } - // primary key column has no stats info, because primary key's is_index is false. so it cannot load the topn - col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus() lastAnalyzePos.Copy(&col.LastAnalyzePos) table.SetCol(hist.ID, col) table.ColAndIdxExistenceMap.InsertCol(colInfo.ID, statsVer != statistics.Version0 || ndv > 0 || nullCount > 0) if statsVer != statistics.Version0 { // The LastAnalyzeVersion is added by ALTER table so its value might be 0. table.LastAnalyzeVersion = max(table.LastAnalyzeVersion, version) + // We will also set int primary key's loaded status to evicted. + col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus() + } else if col.NDV > 0 || col.NullCount > 0 { + // If NDV > 0 or NullCount > 0, we also treat it as the one having its statistics. See the comments of StatsAvailable in column.go. + // So we align its status as evicted too. + col.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus() } + // Otherwise the column's stats is not initialized. } } if table != nil { @@ -261,8 +266,19 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache stats } } +// initStatsHistogramsSQLGen generates the SQL to load all stats_histograms records. +// We need to read all the records since we need to do initialization of table.ColAndIdxExistenceMap. +func initStatsHistogramsSQLGen(isPaging bool) string { + selectPrefix := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl) */ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms" + orderSuffix := " order by table_id" + if !isPaging { + return selectPrefix + orderSuffix + } + return selectPrefix + " where table_id >= %? and table_id < %?" + orderSuffix +} + func (h *Handle) initStatsHistogramsLite(ctx context.Context, cache statstypes.StatsCache) error { - sql := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms order by table_id" + sql := initStatsHistogramsSQLGen(false) rc, err := util.Exec(h.initStatsCtx, sql) if err != nil { return errors.Trace(err) @@ -285,7 +301,7 @@ func (h *Handle) initStatsHistogramsLite(ctx context.Context, cache statstypes.S } func (h *Handle) initStatsHistograms(is infoschema.InfoSchema, cache statstypes.StatsCache) error { - sql := "select /*+ ORDER_INDEX(mysql.stats_histograms,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms order by table_id" + sql := initStatsHistogramsSQLGen(false) rc, err := util.Exec(h.initStatsCtx, sql) if err != nil { return errors.Trace(err) @@ -319,10 +335,7 @@ func (h *Handle) initStatsHistogramsByPaging(is infoschema.InfoSchema, cache sta }() sctx := se.(sessionctx.Context) - // Why do we need to add `is_index=1` in the SQL? - // because it is aligned to the `initStatsTopN` function, which only loads the topn of the index too. - // the other will be loaded by sync load. - sql := "select HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms where table_id >= %? and table_id < %? and is_index=1" + sql := initStatsHistogramsSQLGen(true) rc, err := util.Exec(sctx, sql, task.StartTid, task.EndTid) if err != nil { return errors.Trace(err) @@ -401,8 +414,20 @@ func (*Handle) initStatsTopN4Chunk(cache statstypes.StatsCache, iter *chunk.Iter } } +// initStatsTopNSQLGen generates the SQL to load all stats_top_n records. +// We only need to load the indexes' since we only record the existence of columns in ColAndIdxExistenceMap. +// The stats of the column is not loaded during the bootstrap process. +func initStatsTopNSQLGen(isPaging bool) string { + selectPrefix := "select /*+ ORDER_INDEX(mysql.stats_top_n,tbl) */ HIGH_PRIORITY table_id, hist_id, value, count from mysql.stats_top_n where is_index = 1" + orderSuffix := " order by table_id" + if !isPaging { + return selectPrefix + orderSuffix + } + return selectPrefix + " and table_id >= %? and table_id < %?" + orderSuffix +} + func (h *Handle) initStatsTopN(cache statstypes.StatsCache, totalMemory uint64) error { - sql := "select /*+ ORDER_INDEX(mysql.stats_top_n,tbl)*/ HIGH_PRIORITY table_id, hist_id, value, count from mysql.stats_top_n where is_index = 1 order by table_id" + sql := initStatsTopNSQLGen(false) rc, err := util.Exec(h.initStatsCtx, sql) if err != nil { return errors.Trace(err) @@ -435,7 +460,7 @@ func (h *Handle) initStatsTopNByPaging(cache statstypes.StatsCache, task initsta } }() sctx := se.(sessionctx.Context) - sql := "select HIGH_PRIORITY table_id, hist_id, value, count from mysql.stats_top_n where is_index = 1 and table_id >= %? and table_id < %? order by table_id" + sql := initStatsTopNSQLGen(true) rc, err := util.Exec(sctx, sql, task.StartTid, task.EndTid) if err != nil { return errors.Trace(err) @@ -619,6 +644,18 @@ func (*Handle) initStatsBuckets4Chunk(cache statstypes.StatsCache, iter *chunk.I } } +// initStatsBucketsSQLGen generates the SQL to load all stats_top_n records. +// We only need to load the indexes' since we only record the existence of columns in ColAndIdxExistenceMap. +// The stats of the column is not loaded during the bootstrap process. +func initStatsBucketsSQLGen(isPaging bool) string { + selectPrefix := "select /*+ ORDER_INDEX(mysql.stats_buckets,tbl) */ HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where is_index=1" + orderSuffix := " order by table_id" + if !isPaging { + return selectPrefix + orderSuffix + } + return selectPrefix + " and table_id >= %? and table_id < %?" + orderSuffix +} + func (h *Handle) initStatsBuckets(cache statstypes.StatsCache, totalMemory uint64) error { if isFullCache(cache, totalMemory) { return nil @@ -629,7 +666,7 @@ func (h *Handle) initStatsBuckets(cache statstypes.StatsCache, totalMemory uint6 return errors.Trace(err) } } else { - sql := "select /*+ ORDER_INDEX(mysql.stats_buckets,tbl)*/ HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets order by table_id, is_index, hist_id, bucket_id" + sql := initStatsBucketsSQLGen(false) rc, err := util.Exec(h.initStatsCtx, sql) if err != nil { return errors.Trace(err) @@ -668,7 +705,7 @@ func (h *Handle) initStatsBucketsByPaging(cache statstypes.StatsCache, task init } }() sctx := se.(sessionctx.Context) - sql := "select HIGH_PRIORITY table_id, is_index, hist_id, count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where table_id >= %? and table_id < %? order by table_id, is_index, hist_id, bucket_id" + sql := initStatsBucketsSQLGen(true) rc, err := util.Exec(sctx, sql, task.StartTid, task.EndTid) if err != nil { return errors.Trace(err) @@ -719,8 +756,10 @@ func (h *Handle) initStatsBucketsConcurrency(cache statstypes.StatsCache, totalM // InitStatsLite initiates the stats cache. The function is liter and faster than InitStats. // 1. Basic stats meta data is loaded.(count, modify count, etc.) -// 2. Column/index stats are loaded. (only histogram) +// 2. Column/index stats are marked as existing or not by initializing the table.ColAndIdxExistenceMap, based on data from mysql.stats_histograms) // 3. TopN, Bucket, FMSketch are not loaded. +// And to work with auto analyze's needs, we need to read all the tables' stats meta into memory. +// The sync/async load of the stats or other process haven't done a full initialization of the table.ColAndIdxExistenceMap. So we need to it here. func (h *Handle) InitStatsLite(ctx context.Context) (err error) { defer func() { _, err1 := util.Exec(h.initStatsCtx, "commit") @@ -750,7 +789,10 @@ func (h *Handle) InitStatsLite(ctx context.Context) (err error) { // InitStats initiates the stats cache. // 1. Basic stats meta data is loaded.(count, modify count, etc.) -// 2. Column/index stats are loaded. (histogram, topn, buckets, FMSketch) +// 2. Index stats are fully loaded. (histogram, topn, buckets) +// 2. Column stats are marked as existing or not by initializing the table.ColAndIdxExistenceMap, based on data from mysql.stats_histograms) +// To work with auto-analyze's needs, we need to read all stats meta info into memory. +// The sync/async load of the stats or other process haven't done a full initialization of the table.ColAndIdxExistenceMap. So we need to it here. func (h *Handle) InitStats(ctx context.Context, is infoschema.InfoSchema) (err error) { totalMemory, err := memory.MemTotal() if err != nil { diff --git a/pkg/statistics/handle/handletest/statstest/stats_test.go b/pkg/statistics/handle/handletest/statstest/stats_test.go index a755d28410fee..728f89a69e07e 100644 --- a/pkg/statistics/handle/handletest/statstest/stats_test.go +++ b/pkg/statistics/handle/handletest/statstest/stats_test.go @@ -305,6 +305,9 @@ func TestInitStats(t *testing.T) { require.NoError(t, h.Update(context.Background(), is)) // Index and pk are loaded. needed := fmt.Sprintf(`Table:%v RealtimeCount:6 +column:1 ndv:6 totColSize:0 +column:2 ndv:6 totColSize:6 +column:3 ndv:6 totColSize:6 index:1 ndv:6 num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0 num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0 @@ -363,7 +366,7 @@ func TestInitStatsVer2(t *testing.T) { }() config.GetGlobalConfig().Performance.LiteInitStats = false config.GetGlobalConfig().Performance.ConcurrentlyInitStats = false - initStatsVer2(t, false) + initStatsVer2(t) } func TestInitStatsVer2Concurrency(t *testing.T) { @@ -375,18 +378,21 @@ func TestInitStatsVer2Concurrency(t *testing.T) { }() config.GetGlobalConfig().Performance.LiteInitStats = false config.GetGlobalConfig().Performance.ConcurrentlyInitStats = true - initStatsVer2(t, true) + initStatsVer2(t) } -func initStatsVer2(t *testing.T, isConcurrency bool) { +func initStatsVer2(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") tk.MustExec("set @@session.tidb_analyze_version=2") - tk.MustExec("create table t(a int, b int, c int, index idx(a), index idxab(a, b))") + tk.MustExec("create table t(a int, b int, c int, d int, index idx(a), index idxab(a, b))") + dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh()) analyzehelper.TriggerPredicateColumnsCollection(t, tk, store, "t", "c") - tk.MustExec("insert into t values(1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4), (4, 4, 4), (4, 4, 4)") + tk.MustExec("insert into t values(1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3), (4, 4, 4, 4), (4, 4, 4, 4), (4, 4, 4, 4)") tk.MustExec("analyze table t with 2 topn, 3 buckets") + tk.MustExec("alter table t add column e int default 1") + dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh()) h := dom.StatsHandle() is := dom.InfoSchema() tbl, err := is.TableByName(context.Background(), model.NewCIStr("test"), model.NewCIStr("t")) @@ -398,16 +404,15 @@ func initStatsVer2(t *testing.T, isConcurrency bool) { h.Clear() require.NoError(t, h.InitStats(context.Background(), is)) table0 := h.GetTableStats(tbl.Meta()) - if isConcurrency { - require.Equal(t, uint8(0x3), table0.GetIdx(1).LastAnalyzePos.GetBytes()[0]) - require.Equal(t, uint8(0x3), table0.GetIdx(2).LastAnalyzePos.GetBytes()[0]) - } else { - require.Equal(t, uint8(0x33), table0.GetCol(1).LastAnalyzePos.GetBytes()[0]) - require.Equal(t, uint8(0x33), table0.GetCol(2).LastAnalyzePos.GetBytes()[0]) - require.Equal(t, uint8(0x33), table0.GetCol(3).LastAnalyzePos.GetBytes()[0]) - require.Equal(t, uint8(0x3), table0.GetIdx(1).LastAnalyzePos.GetBytes()[0]) - require.Equal(t, uint8(0x3), table0.GetIdx(2).LastAnalyzePos.GetBytes()[0]) - } + require.Equal(t, 5, table0.ColNum()) + require.True(t, table0.GetCol(1).IsAllEvicted()) + require.True(t, table0.GetCol(2).IsAllEvicted()) + require.True(t, table0.GetCol(3).IsAllEvicted()) + require.True(t, !table0.GetCol(4).IsStatsInitialized()) + require.True(t, table0.GetCol(5).IsStatsInitialized()) + require.Equal(t, 2, table0.IdxNum()) + require.Equal(t, uint8(0x3), table0.GetIdx(1).LastAnalyzePos.GetBytes()[0]) + require.Equal(t, uint8(0x3), table0.GetIdx(2).LastAnalyzePos.GetBytes()[0]) h.Clear() require.NoError(t, h.InitStats(context.Background(), is)) table1 := h.GetTableStats(tbl.Meta()) diff --git a/pkg/statistics/handle/syncload/BUILD.bazel b/pkg/statistics/handle/syncload/BUILD.bazel index 77e595a5885d5..c24971d473c17 100644 --- a/pkg/statistics/handle/syncload/BUILD.bazel +++ b/pkg/statistics/handle/syncload/BUILD.bazel @@ -17,7 +17,6 @@ go_library( "//pkg/statistics", "//pkg/statistics/handle/storage", "//pkg/statistics/handle/types", - "//pkg/table", "//pkg/types", "//pkg/util", "//pkg/util/intest", @@ -35,7 +34,7 @@ go_test( srcs = ["stats_syncload_test.go"], flaky = True, race = "on", - shard_count = 6, + shard_count = 7, deps = [ ":syncload", "//pkg/config", diff --git a/pkg/statistics/handle/syncload/stats_syncload.go b/pkg/statistics/handle/syncload/stats_syncload.go index 1101d1962acb1..d23e9ddd47fe8 100644 --- a/pkg/statistics/handle/syncload/stats_syncload.go +++ b/pkg/statistics/handle/syncload/stats_syncload.go @@ -33,7 +33,6 @@ import ( "github.com/pingcap/tidb/pkg/statistics" "github.com/pingcap/tidb/pkg/statistics/handle/storage" statstypes "github.com/pingcap/tidb/pkg/statistics/handle/types" - "github.com/pingcap/tidb/pkg/table" "github.com/pingcap/tidb/pkg/types" "github.com/pingcap/tidb/pkg/util" "github.com/pingcap/tidb/pkg/util/intest" @@ -305,30 +304,31 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err } }() item := task.Item.TableItemID - tbl, ok := s.statsHandle.Get(item.TableID) + statsTbl, ok := s.statsHandle.Get(item.TableID) if !ok { return nil } is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) - tblInfo, ok := s.statsHandle.TableInfoByID(is, item.TableID) + tbl, ok := s.statsHandle.TableInfoByID(is, item.TableID) if !ok { return nil } - isPkIsHandle := tblInfo.Meta().PKIsHandle + tblInfo := tbl.Meta() + isPkIsHandle := tblInfo.PKIsHandle wrapper := &statsWrapper{} if item.IsIndex { - index, loadNeeded := tbl.IndexIsLoadNeeded(item.ID) + index, loadNeeded := statsTbl.IndexIsLoadNeeded(item.ID) if !loadNeeded { return nil } if index != nil { wrapper.idxInfo = index.Info } else { - wrapper.idxInfo = tblInfo.Meta().FindIndexByID(item.ID) + wrapper.idxInfo = tblInfo.FindIndexByID(item.ID) } } else { - col, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(item.ID, task.Item.FullLoad) + col, loadNeeded, analyzed := statsTbl.ColumnIsLoadNeeded(item.ID, task.Item.FullLoad) if !loadNeeded { return nil } @@ -337,7 +337,7 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err } else { // Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats. // so we have to get the column info from the domain. - wrapper.colInfo = tblInfo.Meta().GetColumnByID(item.ID) + wrapper.colInfo = tblInfo.GetColumnByID(item.ID) } // If this column is not analyzed yet and we don't have it in memory. // We create a fake one for the pseudo estimation. @@ -393,7 +393,8 @@ func (*statsSyncLoad) readStatsForOneItem(sctx sessionctx.Context, item model.Ta } if hg == nil { logutil.BgLogger().Warn("fail to get hist meta for this histogram, possibly a deleted one", zap.Int64("table_id", item.TableID), - zap.Int64("hist_id", item.ID), zap.Bool("is_index", item.IsIndex)) + zap.Int64("hist_id", item.ID), zap.Bool("is_index", item.IsIndex), + ) return nil, errGetHistMeta } if item.IsIndex { @@ -543,7 +544,7 @@ func (*statsSyncLoad) writeToResultChan(resultCh chan stmtctx.StatsLoadResult, r } // updateCachedItem updates the column/index hist to global statsCache. -func (s *statsSyncLoad) updateCachedItem(tblInfo table.Table, item model.TableItemID, colHist *statistics.Column, idxHist *statistics.Index, fullLoaded bool) (updated bool) { +func (s *statsSyncLoad) updateCachedItem(tblInfo *model.TableInfo, item model.TableItemID, colHist *statistics.Column, idxHist *statistics.Index, fullLoaded bool) (updated bool) { s.StatsLoad.Lock() defer s.StatsLoad.Unlock() // Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions @@ -555,13 +556,13 @@ func (s *statsSyncLoad) updateCachedItem(tblInfo table.Table, item model.TableIt if !tbl.ColAndIdxExistenceMap.Checked() { tbl = tbl.Copy() for _, col := range tbl.HistColl.GetColSlice() { - if tblInfo.Meta().FindColumnByID(col.ID) == nil { + if tblInfo.FindColumnByID(col.ID) == nil { tbl.HistColl.DelCol(col.ID) tbl.ColAndIdxExistenceMap.DeleteColAnalyzed(col.ID) } } for _, idx := range tbl.HistColl.GetIdxSlice() { - if tblInfo.Meta().FindIndexByID(idx.ID) == nil { + if tblInfo.FindIndexByID(idx.ID) == nil { tbl.HistColl.DelIdx(idx.ID) tbl.ColAndIdxExistenceMap.DeleteIdxAnalyzed(idx.ID) } diff --git a/pkg/statistics/handle/syncload/stats_syncload_test.go b/pkg/statistics/handle/syncload/stats_syncload_test.go index c34edcdb8962b..6be6e7dfbc2b8 100644 --- a/pkg/statistics/handle/syncload/stats_syncload_test.go +++ b/pkg/statistics/handle/syncload/stats_syncload_test.go @@ -393,3 +393,69 @@ func TestSendLoadRequestsWaitTooLong(t *testing.T) { require.Error(t, rs1.Err) } } + +func TestSyncLoadOnObjectWhichCanNotFoundInStorage(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int, c int, primary key(a))") + h := dom.StatsHandle() + // Skip create table event. + <-h.DDLEventCh() + tk.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)") + tk.MustExec("analyze table t columns a, b") + tbl, err := dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t")) + require.NoError(t, h.InitStatsLite(context.TODO())) + require.NoError(t, err) + require.NotNil(t, tbl) + tblInfo := tbl.Meta() + statsTbl, ok := h.Get(tblInfo.ID) + require.True(t, ok) + // Only a and b. + require.Equal(t, 2, statsTbl.ColAndIdxExistenceMap.ColNum()) + require.True(t, statsTbl.ColAndIdxExistenceMap.HasAnalyzed(tblInfo.Columns[0].ID, false)) + require.True(t, statsTbl.ColAndIdxExistenceMap.HasAnalyzed(tblInfo.Columns[1].ID, false)) + require.False(t, statsTbl.ColAndIdxExistenceMap.Has(tblInfo.Columns[2].ID, false)) + + // Do some DDL, one successfully handled by handleDDLEvent, the other not. + tk.MustExec("alter table t add column d int default 2") + require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) + require.NoError(t, h.Update(context.Background(), dom.InfoSchema())) + tbl, err = dom.InfoSchema().TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t")) + require.NoError(t, err) + require.NotNil(t, tbl) + tblInfo = tbl.Meta() + statsTbl, ok = h.Get(tblInfo.ID) + require.True(t, ok) + require.True(t, statsTbl.ColAndIdxExistenceMap.Has(tblInfo.Columns[3].ID, false)) + require.True(t, statsTbl.ColAndIdxExistenceMap.HasAnalyzed(tblInfo.Columns[3].ID, false)) + + // Try sync load. + tk.MustExec("select * from t where a >= 1 and b = 2 and c = 3 and d = 4") + statsTbl, ok = h.Get(tblInfo.ID) + require.True(t, ok) + require.True(t, statsTbl.GetCol(tblInfo.Columns[0].ID).IsFullLoad()) + require.True(t, statsTbl.GetCol(tblInfo.Columns[1].ID).IsFullLoad()) + require.True(t, statsTbl.GetCol(tblInfo.Columns[3].ID).IsFullLoad()) + require.Nil(t, statsTbl.GetCol(tblInfo.Columns[2].ID)) + _, loadNeeded, analyzed := statsTbl.ColumnIsLoadNeeded(tblInfo.Columns[2].ID, false) + // After the sync load. The column without any thing in storage should not be marked as loadNeeded any more. + require.False(t, loadNeeded) + require.False(t, analyzed) + + // Analyze c then test sync load again + tk.MustExec("analyze table t columns a, b, c") + require.NoError(t, h.InitStatsLite(context.TODO())) + tk.MustExec("select * from t where a >= 1 and b = 2 and c = 3 and d = 4") + statsTbl, ok = h.Get(tblInfo.ID) + require.True(t, ok) + // a, b, d's status is not changed. + require.True(t, statsTbl.GetCol(tblInfo.Columns[0].ID).IsFullLoad()) + require.True(t, statsTbl.GetCol(tblInfo.Columns[1].ID).IsFullLoad()) + require.True(t, statsTbl.GetCol(tblInfo.Columns[3].ID).IsFullLoad()) + // c's stats is loaded. + _, loadNeeded, analyzed = statsTbl.ColumnIsLoadNeeded(tblInfo.Columns[2].ID, false) + require.False(t, loadNeeded) + require.True(t, analyzed) + require.True(t, statsTbl.GetCol(tblInfo.Columns[2].ID).IsFullLoad()) +} diff --git a/pkg/statistics/handle/types/interfaces.go b/pkg/statistics/handle/types/interfaces.go index 6bcefbddd71fc..f954dfedd635a 100644 --- a/pkg/statistics/handle/types/interfaces.go +++ b/pkg/statistics/handle/types/interfaces.go @@ -146,6 +146,8 @@ type IndicatorsJSON struct { } // StatsAnalyze is used to handle auto-analyze and manage analyze jobs. +// We need to read all the tables's last_analyze_time, modified_count, and row_count into memory. +// Because the current auto analyze' scheduling needs the whole information. type StatsAnalyze interface { owner.Listener @@ -220,6 +222,7 @@ type StatsCache interface { Clear() // Update reads stats meta from store and updates the stats map. + // To work with auto-analyze's needs, we'll update all table's stats meta into memory. Update(ctx context.Context, is infoschema.InfoSchema, tableAndPartitionIDs ...int64) error // MemConsumed returns its memory usage. diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 804e115056c7b..91734dfed013c 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -84,6 +84,7 @@ type Table struct { // ColAndIdxExistenceMap is the meta map for statistics.Table. // It can tell whether a column/index really has its statistics. So we won't send useless kv request when we do online stats loading. +// We use this map to decide the stats status of a column/index. So it should be fully initialized before we check whether a column/index is analyzed or not. type ColAndIdxExistenceMap struct { checked bool colAnalyzed map[int64]bool @@ -127,6 +128,16 @@ func (m *ColAndIdxExistenceMap) HasAnalyzed(id int64, isIndex bool) bool { return ok && analyzed } +// Has checks whether a column/index stats exists. +func (m *ColAndIdxExistenceMap) Has(id int64, isIndex bool) bool { + if isIndex { + _, ok := m.idxAnalyzed[id] + return ok + } + _, ok := m.colAnalyzed[id] + return ok +} + // InsertCol inserts a column with its meta into the map. func (m *ColAndIdxExistenceMap) InsertCol(id int64, analyzed bool) { m.colAnalyzed[id] = analyzed @@ -819,16 +830,18 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool if t.Pseudo { return nil, false, false } - // when we use non-lite init stats, it cannot init the stats for common columns. - // so we need to force to load the stats. + hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false) col, ok := t.columns[id] if !ok { - return nil, true, true + // If The column have no stats object in memory. We need to check it by existence map. + // If existence map says it even has no unitialized record in storage, we don't need to do anything. => Has=false, HasAnalyzed=false + // If existence map says it has analyzed stats, we need to load it from storage. => Has=true, HasAnalyzed=true + // If existence map says it has no analyzed stats but have a uninitialized record in storage, we need to also create a fake object. => Has=true, HasAnalyzed=false + return nil, t.ColAndIdxExistenceMap.Has(id, false), hasAnalyzed } - hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false) // If it's not analyzed yet. - // The real check condition: !ok && !hashAnalyzed. + // The real check condition: !ok && !hashAnalyzed.(Has must be true since we've have the memory object so we should have the storage object) // After this check, we will always have ok && hasAnalyzed. if !hasAnalyzed { return nil, false, false