Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: do not analyze non exist table anymore #57244

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ func NewAnalysisJobFactory(sctx sessionctx.Context, autoAnalyzeRatio float64, cu

// CreateNonPartitionedTableAnalysisJob creates a job for non-partitioned tables.
func (f *AnalysisJobFactory) CreateNonPartitionedTableAnalysisJob(
tableSchema string,
tblInfo *model.TableInfo,
tblStats *statistics.Table,
) AnalysisJob {
Expand All @@ -78,8 +77,6 @@ func (f *AnalysisJobFactory) CreateNonPartitionedTableAnalysisJob(
}

return NewNonPartitionedTableAnalysisJob(
tableSchema,
tblInfo.Name.O,
tblInfo.ID,
indexes,
tableStatsVer,
Expand All @@ -91,10 +88,8 @@ func (f *AnalysisJobFactory) CreateNonPartitionedTableAnalysisJob(

// CreateStaticPartitionAnalysisJob creates a job for static partitions.
func (f *AnalysisJobFactory) CreateStaticPartitionAnalysisJob(
tableSchema string,
globalTblInfo *model.TableInfo,
partitionID int64,
partitionName string,
partitionStats *statistics.Table,
) AnalysisJob {
if !partitionStats.IsEligibleForAnalysis() {
Expand All @@ -117,10 +112,7 @@ func (f *AnalysisJobFactory) CreateStaticPartitionAnalysisJob(
}

return NewStaticPartitionTableAnalysisJob(
tableSchema,
globalTblInfo.Name.O,
globalTblInfo.ID,
partitionName,
partitionID,
indexes,
tableStatsVer,
Expand All @@ -132,7 +124,6 @@ func (f *AnalysisJobFactory) CreateStaticPartitionAnalysisJob(

// CreateDynamicPartitionedTableAnalysisJob creates a job for dynamic partitioned tables.
func (f *AnalysisJobFactory) CreateDynamicPartitionedTableAnalysisJob(
tableSchema string,
globalTblInfo *model.TableInfo,
globalTblStats *statistics.Table,
partitionStats map[PartitionIDAndName]*statistics.Table,
Expand All @@ -145,21 +136,19 @@ func (f *AnalysisJobFactory) CreateDynamicPartitionedTableAnalysisJob(
tableStatsVer := f.sctx.GetSessionVars().AnalyzeVersion
statistics.CheckAnalyzeVerOnTable(globalTblStats, &tableStatsVer)

avgChange, avgSize, minLastAnalyzeDuration, partitionNames := f.CalculateIndicatorsForPartitions(globalTblStats, partitionStats)
avgChange, avgSize, minLastAnalyzeDuration, partitionIDs := f.CalculateIndicatorsForPartitions(globalTblStats, partitionStats)
partitionIndexes := f.CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(globalTblInfo, partitionStats)

// No need to analyze.
// We perform a separate check because users may set the auto analyze ratio to 0,
// yet still wish to analyze newly added indexes and tables that have not been analyzed.
if len(partitionNames) == 0 && len(partitionIndexes) == 0 {
if len(partitionIDs) == 0 && len(partitionIndexes) == 0 {
return nil
}

return NewDynamicPartitionedTableAnalysisJob(
tableSchema,
globalTblInfo.Name.O,
globalTblInfo.ID,
partitionNames,
partitionIDs,
partitionIndexes,
tableStatsVer,
avgChange,
Expand Down Expand Up @@ -225,26 +214,26 @@ func (f *AnalysisJobFactory) FindLastAnalyzeTime(tblStats *statistics.Table) tim
}

// CheckIndexesNeedAnalyze checks if the indexes need to be analyzed.
func (*AnalysisJobFactory) CheckIndexesNeedAnalyze(tblInfo *model.TableInfo, tblStats *statistics.Table) []string {
func (*AnalysisJobFactory) CheckIndexesNeedAnalyze(tblInfo *model.TableInfo, tblStats *statistics.Table) map[int64]struct{} {
// If table is not analyzed, we need to analyze whole table.
// So we don't need to check indexes.
if !tblStats.IsAnalyzed() {
return nil
}

indexes := make([]string, 0, len(tblInfo.Indices))
indexIDs := make(map[int64]struct{}, len(tblInfo.Indices))
// Check if missing index stats.
for _, idx := range tblInfo.Indices {
if idxStats := tblStats.GetIdx(idx.ID); idxStats == nil && !tblStats.ColAndIdxExistenceMap.HasAnalyzed(idx.ID, true) && idx.State == model.StatePublic {
// Vector index doesn't have stats currently.
if idx.VectorInfo != nil {
continue
}
indexes = append(indexes, idx.Name.O)
indexIDs[idx.ID] = struct{}{}
}
}

return indexes
return indexIDs
}

// CalculateIndicatorsForPartitions calculates the average change percentage,
Expand All @@ -259,12 +248,12 @@ func (f *AnalysisJobFactory) CalculateIndicatorsForPartitions(
avgChange float64,
avgSize float64,
avgLastAnalyzeDuration time.Duration,
partitionNames []string,
partitionIDs map[int64]struct{},
) {
totalChangePercent := 0.0
totalSize := 0.0
count := 0.0
partitionNames = make([]string, 0, len(partitionStats))
partitionIDs = make(map[int64]struct{}, len(partitionStats))
cols := float64(globalStats.ColAndIdxExistenceMap.ColNum())
intest.Assert(cols != 0, "Column count should not be 0")
totalLastAnalyzeDuration := time.Duration(0)
Expand All @@ -282,18 +271,18 @@ func (f *AnalysisJobFactory) CalculateIndicatorsForPartitions(
totalSize += float64(tblStats.RealtimeCount) * cols
lastAnalyzeDuration := f.GetTableLastAnalyzeDuration(tblStats)
totalLastAnalyzeDuration += lastAnalyzeDuration
partitionNames = append(partitionNames, pIDAndName.Name)
partitionIDs[pIDAndName.ID] = struct{}{}
count++
}
if len(partitionNames) == 0 {
return 0, 0, 0, partitionNames
if len(partitionIDs) == 0 {
return 0, 0, 0, partitionIDs
}

avgChange = totalChangePercent / count
avgSize = totalSize / count
avgLastAnalyzeDuration = totalLastAnalyzeDuration / time.Duration(count)

return avgChange, avgSize, avgLastAnalyzeDuration, partitionNames
return avgChange, avgSize, avgLastAnalyzeDuration, partitionIDs
}

// CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable checks if the indexes of the partitioned table need to be analyzed.
Expand All @@ -302,8 +291,8 @@ func (f *AnalysisJobFactory) CalculateIndicatorsForPartitions(
func (*AnalysisJobFactory) CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(
tblInfo *model.TableInfo,
partitionStats map[PartitionIDAndName]*statistics.Table,
) map[string][]string {
partitionIndexes := make(map[string][]string, len(tblInfo.Indices))
) map[int64][]int64 {
partitionIndexes := make(map[int64][]int64, len(tblInfo.Indices))

for _, idx := range tblInfo.Indices {
// No need to analyze the index if it's not public.
Expand All @@ -317,15 +306,15 @@ func (*AnalysisJobFactory) CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(
}

// Find all the partitions that need to analyze this index.
names := make([]string, 0, len(partitionStats))
ids := make([]int64, 0, len(partitionStats))
for pIDAndName, tblStats := range partitionStats {
if idxStats := tblStats.GetIdx(idx.ID); idxStats == nil && !tblStats.ColAndIdxExistenceMap.HasAnalyzed(idx.ID, true) {
names = append(names, pIDAndName.Name)
ids = append(ids, pIDAndName.ID)
}
}

if len(names) > 0 {
partitionIndexes[idx.Name.O] = names
if len(ids) > 0 {
partitionIndexes[idx.ID] = ids
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func TestCheckIndexesNeedAnalyze(t *testing.T) {
name string
tblInfo *model.TableInfo
tblStats *statistics.Table
want []string
want map[int64]struct{}
}{
{
name: "Test Table not analyzed",
Expand Down Expand Up @@ -168,7 +168,7 @@ func TestCheckIndexesNeedAnalyze(t *testing.T) {
ColAndIdxExistenceMap: analyzedMap,
LastAnalyzeVersion: 1,
},
want: []string{"index1"},
want: map[int64]struct{}{1: {}},
},
}

Expand Down Expand Up @@ -203,7 +203,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
wantAvgChangePercentage float64
wantAvgSize float64
wantAvgLastAnalyzeDuration time.Duration
wantPartitions []string
wantPartitions map[int64]struct{}
}{
{
name: "Test Table not analyzed",
Expand Down Expand Up @@ -241,7 +241,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
wantAvgChangePercentage: 1,
wantAvgSize: 2002,
wantAvgLastAnalyzeDuration: 1800 * time.Second,
wantPartitions: []string{"p0", "p1"},
wantPartitions: map[int64]struct{}{1: {}, 2: {}},
},
{
name: "Test Table analyzed and only one partition meets the threshold",
Expand Down Expand Up @@ -303,7 +303,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
wantAvgChangePercentage: 2,
wantAvgSize: 2002,
wantAvgLastAnalyzeDuration: 24 * time.Hour,
wantPartitions: []string{"p0"},
wantPartitions: map[int64]struct{}{1: {}},
},
{
name: "No partition meets the threshold",
Expand Down Expand Up @@ -365,7 +365,7 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
wantAvgChangePercentage: 0,
wantAvgSize: 0,
wantAvgLastAnalyzeDuration: 0,
wantPartitions: []string{},
wantPartitions: map[int64]struct{}{},
},
}
for _, tt := range tests {
Expand All @@ -382,9 +382,6 @@ func TestCalculateIndicatorsForPartitions(t *testing.T) {
require.Equal(t, tt.wantAvgChangePercentage, gotAvgChangePercentage)
require.Equal(t, tt.wantAvgSize, gotAvgSize)
require.Equal(t, tt.wantAvgLastAnalyzeDuration, gotAvgLastAnalyzeDuration)
// Sort the partitions.
sort.Strings(tt.wantPartitions)
sort.Strings(gotPartitions)
require.Equal(t, tt.wantPartitions, gotPartitions)
})
}
Expand Down Expand Up @@ -436,16 +433,20 @@ func TestCheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(t *testing.T) {

factory := priorityqueue.NewAnalysisJobFactory(nil, 0, 0)
partitionIndexes := factory.CheckNewlyAddedIndexesNeedAnalyzeForPartitionedTable(&tblInfo, partitionStats)
expected := map[string][]string{"index1": {"p0", "p1"}, "index2": {"p0"}}
expected := map[int64][]int64{1: {1, 2}, 2: {1}}
require.Equal(t, len(expected), len(partitionIndexes))

for k, v := range expected {
sort.Strings(v)
if val, ok := partitionIndexes[k]; ok {
sort.Strings(val)
sort.Slice(val, func(i, j int) bool {
return val[i] > val[j]
})
sort.Slice(v, func(i, j int) bool {
return v[i] > v[j]
})
require.Equal(t, v, val)
} else {
require.Fail(t, "key not found in partitionIndexes: "+k)
require.Failf(t, "key not found in partitionIndexes", "key: %d", k)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,19 +128,21 @@ func TestGetSpecialEvent(t *testing.T) {
pc := priorityqueue.NewPriorityCalculator()

jobWithIndex1 := &priorityqueue.DynamicPartitionedTableAnalysisJob{
PartitionIndexes: map[string][]string{
"index1": {"p1", "p2"},
PartitionIndexIDs: map[int64][]int64{
1: {1, 2},
},
}
require.Equal(t, priorityqueue.EventNewIndex, pc.GetSpecialEvent(jobWithIndex1))

jobWithIndex2 := &priorityqueue.NonPartitionedTableAnalysisJob{
Indexes: []string{"index1"},
IndexIDs: map[int64]struct{}{
1: {},
},
}
require.Equal(t, priorityqueue.EventNewIndex, pc.GetSpecialEvent(jobWithIndex2))

jobWithoutIndex := &priorityqueue.DynamicPartitionedTableAnalysisJob{
PartitionIndexes: map[string][]string{},
PartitionIndexIDs: map[int64][]int64{},
}
require.Equal(t, priorityqueue.EventNone, pc.GetSpecialEvent(jobWithoutIndex))
}
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,12 @@ func (j *TestJob) Analyze(statsHandle types.StatsHandle, sysProcTracker sysproct
}

// RegisterSuccessHook implements AnalysisJob.
func (j *TestJob) RegisterSuccessHook(hook priorityqueue.JobHook) {
func (j *TestJob) RegisterSuccessHook(hook priorityqueue.SuccessJobHook) {
panic("unimplemented")
}

// RegisterFailureHook implements AnalysisJob.
func (j *TestJob) RegisterFailureHook(hook priorityqueue.JobHook) {
func (j *TestJob) RegisterFailureHook(hook priorityqueue.FailureJobHook) {
panic("unimplemented")
}

Expand All @@ -254,8 +254,8 @@ func (j *TestJob) GetWeight() float64 {
panic("unimplemented")
}

// IsValidToAnalyze implements AnalysisJob.
func (j *TestJob) IsValidToAnalyze(sctx sessionctx.Context) (bool, string) {
// ValidateAndPrepare implements AnalysisJob.
func (j *TestJob) ValidateAndPrepare(sctx sessionctx.Context) (bool, string) {
panic("unimplemented")
}

Expand Down
Loading