From 35a2237149e99238bcc96e0bf8b62434fed70d71 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Tue, 9 Jul 2024 23:31:21 +0800 Subject: [PATCH] update --- pkg/planner/cardinality/selectivity_test.go | 32 ++++++++++--------- pkg/statistics/handle/bootstrap.go | 6 ---- pkg/statistics/handle/storage/read.go | 32 +++++++++++++++---- .../handle/syncload/stats_syncload.go | 7 ++-- pkg/statistics/table.go | 17 +++++----- tests/integrationtest/r/executor/show.result | 2 +- 6 files changed, 56 insertions(+), 40 deletions(-) diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go index d4863252aba641..f917f0a0b8c11b 100644 --- a/pkg/planner/cardinality/selectivity_test.go +++ b/pkg/planner/cardinality/selectivity_test.go @@ -1313,31 +1313,33 @@ func TestBuiltinInEstWithoutStats(t *testing.T) { h := dom.StatsHandle() tk.MustExec("use test") - tk.MustExec("create table t(a int)") + tk.MustExec("create table t(a int, b int)") require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) - tk.MustExec("insert into t values(1), (2), (3), (4), (5), (6), (7), (8), (9), (10)") + tk.MustExec("insert into t values(1,1), (2,2), (3,3), (4,4), (5,5), (6,6), (7,7), (8,8), (9,9), (10,10)") require.NoError(t, h.DumpStatsDeltaToKV(true)) is := dom.InfoSchema() require.NoError(t, h.Update(is)) - - tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows( + expectedA := testkit.Rows( "TableReader 0.08 root data:Selection", "└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)", " └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo", - )) + ) + expectedB := testkit.Rows( + "TableReader 0.08 root data:Selection", + "└─Selection 0.08 cop[tikv] in(test.t.b, 1, 2, 3, 4, 5, 6, 7, 8)", + " └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo", + ) + tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA) + // try again with other column + tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB) h.Clear() require.NoError(t, h.InitStatsLite(is)) - tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows( - "TableReader 0.08 root data:Selection", - "└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)", - " └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo", - )) + tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA) + tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB) + h.Clear() require.NoError(t, h.InitStats(is)) - tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows( - "TableReader 0.08 root data:Selection", - "└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)", - " └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo", - )) + tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedA) + tk.MustQuery("explain format='brief' select * from t where b in (1, 2, 3, 4, 5, 6, 7, 8)").Check(expectedB) } diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index 24e0c4b0d998af..951e8b5bc8773f 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -194,12 +194,6 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, cache stats id, ndv, nullCount, version, totColSize := row.GetInt64(2), row.GetInt64(3), row.GetInt64(5), row.GetUint64(4), row.GetInt64(7) lastAnalyzePos := row.GetDatum(11, types.NewFieldType(mysql.TypeBlob)) tbl, _ := h.TableInfoByID(is, table.PhysicalID) - for _, col := range tbl.Meta().Columns { - table.ColAndIdxExistenceMap.InsertCol(col.ID, col, statsVer != statistics.Version0) - } - for _, col := range tbl.Meta().Indices { - table.ColAndIdxExistenceMap.InsertIndex(col.ID, col, statsVer != statistics.Version0) - } if row.GetInt64(1) > 0 { var idxInfo *model.IndexInfo for _, idx := range tbl.Meta().Indices { diff --git a/pkg/statistics/handle/storage/read.go b/pkg/statistics/handle/storage/read.go index 410f66b53c63a6..69dca70db9d509 100644 --- a/pkg/statistics/handle/storage/read.go +++ b/pkg/statistics/handle/storage/read.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/config" + "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/kv" "github.com/pingcap/tidb/pkg/parser/ast" "github.com/pingcap/tidb/pkg/parser/model" @@ -559,14 +560,14 @@ func LoadHistogram(sctx sessionctx.Context, tableID int64, isIndex int, histID i } // LoadNeededHistograms will load histograms for those needed columns/indices. -func LoadNeededHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, loadFMSketch bool) (err error) { +func LoadNeededHistograms(sctx sessionctx.Context, statsHandle statstypes.StatsHandle, loadFMSketch bool) (err error) { items := asyncload.AsyncLoadHistogramNeededItems.AllItems() for _, item := range items { if !item.IsIndex { - err = loadNeededColumnHistograms(sctx, statsCache, item.TableItemID, loadFMSketch, item.FullLoad) + err = loadNeededColumnHistograms(sctx, statsHandle, item.TableItemID, loadFMSketch, item.FullLoad) } else { // Index is always full load. - err = loadNeededIndexHistograms(sctx, statsCache, item.TableItemID, loadFMSketch) + err = loadNeededIndexHistograms(sctx, statsHandle, item.TableItemID, loadFMSketch) } if err != nil { return err @@ -602,8 +603,8 @@ func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int { return reallyNeeded } -func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.StatsCache, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) { - tbl, ok := statsCache.Get(col.TableID) +func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.StatsHandle, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) { + tbl, ok := statsHandle.Get(col.TableID) if !ok { return nil } @@ -613,7 +614,19 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S asyncload.AsyncLoadHistogramNeededItems.Delete(col) return nil } + isUpdateColAndIdxExistenceMap := false colInfo = tbl.ColAndIdxExistenceMap.GetCol(col.ID) + if colInfo == nil { + // Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats. + // so we have to get the column info from the domain. + is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) + tblInfo, ok := statsHandle.TableInfoByID(is, col.TableID) + if !ok { + return nil + } + colInfo = tblInfo.Meta().GetColumnByID(col.ID) + isUpdateColAndIdxExistenceMap = true + } hg, _, statsVer, _, err := HistMetaFromStorageWithHighPriority(sctx, &col, colInfo) if hg == nil || err != nil { asyncload.AsyncLoadHistogramNeededItems.Delete(col) @@ -652,7 +665,7 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S } // Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions // like `GetPartitionStats` called in `fmSketchFromStorage` would have modified the stats cache already. - tbl, ok = statsCache.Get(col.TableID) + tbl, ok = statsHandle.Get(col.TableID) if !ok { return nil } @@ -667,9 +680,14 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S if statsVer != statistics.Version0 { tbl.StatsVer = int(statsVer) } + if isUpdateColAndIdxExistenceMap { + tbl.ColAndIdxExistenceMap.InsertCol(col.ID, colInfo, true) + } + } else if isUpdateColAndIdxExistenceMap { + tbl.ColAndIdxExistenceMap.InsertCol(col.ID, colInfo, false) } tbl.SetCol(col.ID, colHist) - statsCache.UpdateStatsCache([]*statistics.Table{tbl}, nil) + statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil) asyncload.AsyncLoadHistogramNeededItems.Delete(col) if col.IsSyncLoadFailed { logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.", diff --git a/pkg/statistics/handle/syncload/stats_syncload.go b/pkg/statistics/handle/syncload/stats_syncload.go index 2f9a769dcae1f2..0c52699027f75f 100644 --- a/pkg/statistics/handle/syncload/stats_syncload.go +++ b/pkg/statistics/handle/syncload/stats_syncload.go @@ -323,12 +323,13 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err if col != nil { wrapper.colInfo = col.Info } else if colInfo := tbl.ColAndIdxExistenceMap.GetCol(item.ID); colInfo != nil { - wrapper.colInfo = tbl.ColAndIdxExistenceMap.GetCol(item.ID) + wrapper.colInfo = colInfo } else { + // Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats. + // so we have to get the column info from the domain. is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) tblInfo, ok := s.statsHandle.TableInfoByID(is, item.TableID) if !ok { - fmt.Println("fuck") return nil } wrapper.colInfo = tblInfo.Meta().GetColumnByID(item.ID) @@ -557,6 +558,8 @@ func (s *statsSyncLoad) updateCachedItem(item model.TableItemID, colHist *statis // If the column is analyzed we refresh the map for the possible change. if colHist.StatsAvailable() { tbl.ColAndIdxExistenceMap.InsertCol(item.ID, colHist.Info, true) + } else { + tbl.ColAndIdxExistenceMap.InsertCol(item.ID, colHist.Info, false) } // All the objects shares the same stats version. Update it here. if colHist.StatsVer != statistics.Version0 { diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 84aecc592dbf97..1f52dbfba56a8f 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -787,23 +787,22 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool if t.Pseudo { return nil, false, false } - if len(t.columns) == 0 { + // when we use non-lite init stats, it cannot init the stats for common columns. + // so we need to foce to load the stats. + col, ok := t.columns[id] + if !ok { return nil, true, true } - col, ok := t.columns[id] hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false) // If it's not analyzed yet. if !hasAnalyzed { // If we don't have it in memory, we create a fake hist for pseudo estimation (see handleOneItemTask()). - if !ok { - // If we don't have this column. We skip it. - // It's something ridiculous. But it's possible that the stats don't have some ColumnInfo. - // We need to find a way to maintain it more correctly. - return nil, false, true - } + // If we don't have this column. We skip it. + // It's something ridiculous. But it's possible that the stats don't have some ColumnInfo. + // We need to find a way to maintain it more correctly. // Otherwise we don't need to load it. - return nil, false, false + return nil, t.ColAndIdxExistenceMap.Has(id, false), false } // Restore the condition from the simplified form: diff --git a/tests/integrationtest/r/executor/show.result b/tests/integrationtest/r/executor/show.result index 22bfede1fb8366..8d5a1674e8f9f9 100644 --- a/tests/integrationtest/r/executor/show.result +++ b/tests/integrationtest/r/executor/show.result @@ -147,7 +147,7 @@ t CREATE TABLE `t` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin /*T![ttl] TTL=`created_at` + INTERVAL 100 YEAR */ /*T![ttl] TTL_ENABLE='ON' */ /*T![ttl] TTL_JOB_INTERVAL='1d' */ show histograms_in_flight; HistogramsInFlight -0 +2 show open tables; Database Table In_use Name_locked show open tables in executor__show;