diff --git a/pkg/planner/cardinality/BUILD.bazel b/pkg/planner/cardinality/BUILD.bazel index af8f1416a89a4..1ce4d3e41b678 100644 --- a/pkg/planner/cardinality/BUILD.bazel +++ b/pkg/planner/cardinality/BUILD.bazel @@ -59,7 +59,7 @@ go_test( data = glob(["testdata/**"]), embed = [":cardinality"], flaky = True, - shard_count = 27, + shard_count = 28, deps = [ "//pkg/config", "//pkg/domain", diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go index b9503ef045462..2c0ce384f49af 100644 --- a/pkg/planner/cardinality/selectivity_test.go +++ b/pkg/planner/cardinality/selectivity_test.go @@ -1310,3 +1310,39 @@ func TestSubsetIdxCardinality(t *testing.T) { testKit.MustQuery(input[i]).Check(testkit.Rows(output[i].Result...)) } } + +func TestBuiltinInEstWithoutStats(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + h := dom.StatsHandle() + + tk.MustExec("use test") + tk.MustExec("create table t(a int)") + require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) + tk.MustExec("insert into t values(1), (2), (3), (4), (5), (6), (7), (8), (9), (10)") + require.NoError(t, h.DumpStatsDeltaToKV(true)) + is := dom.InfoSchema() + require.NoError(t, h.Update(is)) + + tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows( + "TableReader 0.08 root data:Selection", + "└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)", + " └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo", + )) + + h.Clear() + require.NoError(t, h.InitStatsLite(is)) + tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows( + "TableReader 0.08 root data:Selection", + "└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)", + " └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo", + )) + + h.Clear() + require.NoError(t, h.InitStats(is)) + tk.MustQuery("explain format='brief' select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8)").Check(testkit.Rows( + "TableReader 0.08 root data:Selection", + "└─Selection 0.08 cop[tikv] in(test.t.a, 1, 2, 3, 4, 5, 6, 7, 8)", + " └─TableFullScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo", + )) +} diff --git a/pkg/planner/core/collect_column_stats_usage.go b/pkg/planner/core/collect_column_stats_usage.go index 684ccc86deb4a..0d166d380192b 100644 --- a/pkg/planner/core/collect_column_stats_usage.go +++ b/pkg/planner/core/collect_column_stats_usage.go @@ -195,11 +195,21 @@ func (c *columnStatsUsageCollector) addHistNeededColumns(ds *DataSource) { colIDSet := intset.NewFastIntSet() for _, col := range columns { + // If the column is plan-generated one, Skip it. + // TODO: we may need to consider the ExtraHandle. + if col.ID < 0 { + continue + } tblColID := model.TableItemID{TableID: ds.physicalTableID, ID: col.ID, IsIndex: false} colIDSet.Insert(int(col.ID)) c.histNeededCols[tblColID] = true } for _, col := range ds.Columns { + // If the column is plan-generated one, Skip it. + // TODO: we may need to consider the ExtraHandle. + if col.ID < 0 { + continue + } if !colIDSet.Has(int(col.ID)) && !col.Hidden { tblColID := model.TableItemID{TableID: ds.physicalTableID, ID: col.ID, IsIndex: false} if _, ok := c.histNeededCols[tblColID]; ok { diff --git a/pkg/statistics/handle/storage/read.go b/pkg/statistics/handle/storage/read.go index b49b4e559299d..7586726d3c5f8 100644 --- a/pkg/statistics/handle/storage/read.go +++ b/pkg/statistics/handle/storage/read.go @@ -572,7 +572,9 @@ func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int { if item.IsIndex { _, loadNeeded = tbl.IndexIsLoadNeeded(item.ID) } else { - _, loadNeeded = tbl.ColumnIsLoadNeeded(item.ID, true) + var analyzed bool + _, loadNeeded, analyzed = tbl.ColumnIsLoadNeeded(item.ID, true) + loadNeeded = loadNeeded && analyzed } if !loadNeeded { statistics.HistogramNeededItems.Delete(item) @@ -589,8 +591,8 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache statstypes.S return nil } var colInfo *model.ColumnInfo - _, loadNeeded := tbl.ColumnIsLoadNeeded(col.ID, true) - if !loadNeeded { + _, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(col.ID, true) + if !loadNeeded || !analyzed { statistics.HistogramNeededItems.Delete(col) return nil } diff --git a/pkg/statistics/handle/syncload/stats_syncload.go b/pkg/statistics/handle/syncload/stats_syncload.go index ec89ed7b52361..3e9db7716182e 100644 --- a/pkg/statistics/handle/syncload/stats_syncload.go +++ b/pkg/statistics/handle/syncload/stats_syncload.go @@ -155,7 +155,7 @@ func (s *statsSyncLoad) removeHistLoadedColumns(neededItems []model.StatsLoadIte } continue } - _, loadNeeded := tbl.ColumnIsLoadNeeded(item.ID, item.FullLoad) + _, loadNeeded, _ := tbl.ColumnIsLoadNeeded(item.ID, item.FullLoad) if loadNeeded { remainedItems = append(remainedItems, item) } @@ -270,7 +270,7 @@ func (s *statsSyncLoad) handleOneItemTask(sctx sessionctx.Context, task *statsty wrapper.idxInfo = tbl.ColAndIdxExistenceMap.GetIndex(item.ID) } } else { - col, loadNeeded := tbl.ColumnIsLoadNeeded(item.ID, task.Item.FullLoad) + col, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(item.ID, task.Item.FullLoad) if !loadNeeded { return result, nil } @@ -279,6 +279,20 @@ func (s *statsSyncLoad) handleOneItemTask(sctx sessionctx.Context, task *statsty } else { wrapper.colInfo = tbl.ColAndIdxExistenceMap.GetCol(item.ID) } + // If this column is not analyzed yet and we don't have it in memory. + // We create a fake one for the pseudo estimation. + if loadNeeded && !analyzed { + wrapper.col = &statistics.Column{ + PhysicalID: item.TableID, + Info: wrapper.colInfo, + Histogram: *statistics.NewHistogram(item.ID, 0, 0, 0, &wrapper.colInfo.FieldType, 0, 0), + IsHandle: tbl.IsPkIsHandle && mysql.HasPriKeyFlag(wrapper.colInfo.GetFlag()), + } + if s.updateCachedItem(item, wrapper.col, wrapper.idx, task.Item.FullLoad) { + return result, nil + } + return nil, nil + } } t := time.Now() needUpdate := false diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index f954dc98c7057..83b649631cd54 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -626,13 +626,25 @@ func (t *Table) GetStatsHealthy() (int64, bool) { // The Column should be visible in the table and really has analyzed statistics in the stroage. // Also, if the stats has been loaded into the memory, we also don't need to load it. // We return the Column together with the checking result, to avoid accessing the map multiple times. -func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool) { +// The first bool is whether we have it in memory. The second bool is whether this column has stats in the system table or not. +func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool) { + if t.Pseudo { + return nil, false, false + } col, ok := t.Columns[id] hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false) - // If it's not analyzed yet. Don't need to load it. + // If it's not analyzed yet. if !hasAnalyzed { - return nil, false + // If we don't have it in memory, we create a fake hist for pseudo estimation (see handleOneItemTask()). + if !ok { + // If we don't have this column. We skip it. + // It's something ridiculous. But it's possible that the stats don't have some ColumnInfo. + // We need to find a way to maintain it more correctly. + return nil, t.ColAndIdxExistenceMap.Has(id, false), false + } + // Otherwise we don't need to load it. + return nil, false, false } // Restore the condition from the simplified form: @@ -640,11 +652,11 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool) { // 2. ok && hasAnalyzed && fullLoad && !col.IsFullLoad => need load // 3. ok && hasAnalyzed && !fullLoad && !col.statsInitialized => need load if !ok || (fullLoad && !col.IsFullLoad()) || (!fullLoad && !col.statsInitialized) { - return col, true + return col, true, true } // Otherwise don't need load it. - return col, false + return col, false, true } // IndexIsLoadNeeded checks whether the index needs trigger the async/sync load.