From 4279cd6f230c6825a677b45a8decc79d36bb118f Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Fri, 24 Nov 2023 21:56:43 +0800 Subject: [PATCH] *: avoid copy in the SortSampleItems (#48683) ref pingcap/tidb#47275 --- pkg/executor/analyze_col.go | 2 +- pkg/executor/analyze_col_v2.go | 2 +- pkg/statistics/builder.go | 14 +++++++++++--- pkg/statistics/builder_ext_stats.go | 6 ++++-- pkg/statistics/builder_test.go | 2 +- pkg/statistics/main_test.go | 5 ++--- pkg/statistics/sample.go | 9 +++------ pkg/statistics/sample_test.go | 2 +- pkg/statistics/statistics_test.go | 2 +- 9 files changed, 25 insertions(+), 19 deletions(-) diff --git a/pkg/executor/analyze_col.go b/pkg/executor/analyze_col.go index 3d592e1506bb8..5727d4ef2e7c2 100644 --- a/pkg/executor/analyze_col.go +++ b/pkg/executor/analyze_col.go @@ -266,7 +266,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range, needExtStats boo if e.StatsVersion < 2 { hg, err = statistics.BuildColumn(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), col.ID, collectors[i], &col.FieldType) } else { - hg, topn, err = statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), int(e.opts[ast.AnalyzeOptNumTopN]), col.ID, collectors[i], &col.FieldType, true, nil) + hg, topn, err = statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), int(e.opts[ast.AnalyzeOptNumTopN]), col.ID, collectors[i], &col.FieldType, true, nil, true) topNs = append(topNs, topn) } if err != nil { diff --git a/pkg/executor/analyze_col_v2.go b/pkg/executor/analyze_col_v2.go index f2a66118628a9..279b2ea997e91 100644 --- a/pkg/executor/analyze_col_v2.go +++ b/pkg/executor/analyze_col_v2.go @@ -850,7 +850,7 @@ workLoop: e.memTracker.Release(collector.MemSize) } } - hist, topn, err := statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), int(e.opts[ast.AnalyzeOptNumTopN]), task.id, collector, task.tp, task.isColumn, e.memTracker) + hist, topn, err := statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), int(e.opts[ast.AnalyzeOptNumTopN]), task.id, collector, task.tp, task.isColumn, e.memTracker, e.ctx.GetSessionVars().EnableExtendedStats) if err != nil { resultCh <- err releaseCollectorMemory() diff --git a/pkg/statistics/builder.go b/pkg/statistics/builder.go index 914760f0980c5..8d04945c9cb36 100644 --- a/pkg/statistics/builder.go +++ b/pkg/statistics/builder.go @@ -127,7 +127,7 @@ func BuildColumnHist(ctx sessionctx.Context, numBuckets, id int64, collector *Sa } sc := ctx.GetSessionVars().StmtCtx samples := collector.Samples - samples, err := SortSampleItems(sc, samples) + err := sortSampleItems(sc, samples) if err != nil { return nil, err } @@ -241,6 +241,7 @@ func BuildHistAndTopN( tp *types.FieldType, isColumn bool, memTracker *memory.Tracker, + needExtStats bool, ) (*Histogram, *TopN, error) { bufferedMemSize := int64(0) bufferedReleaseSize := int64(0) @@ -278,8 +279,15 @@ func BuildHistAndTopN( return NewHistogram(id, ndv, nullCount, 0, tp, 0, collector.TotalSize), nil, nil } sc := ctx.GetSessionVars().StmtCtx - samples := collector.Samples - samples, err := SortSampleItems(sc, samples) + var samples []*SampleItem + // if we need to build extended stats, we need to copy the samples to avoid modifying the original samples. + if needExtStats { + samples = make([]*SampleItem, len(collector.Samples)) + copy(samples, collector.Samples) + } else { + samples = collector.Samples + } + err := sortSampleItems(sc, samples) if err != nil { return nil, nil, err } diff --git a/pkg/statistics/builder_ext_stats.go b/pkg/statistics/builder_ext_stats.go index 4be47ffbfc2cd..098e6b35203e3 100644 --- a/pkg/statistics/builder_ext_stats.go +++ b/pkg/statistics/builder_ext_stats.go @@ -105,7 +105,7 @@ func fillExtStatsCorrVals(sctx sessionctx.Context, item *ExtendedStatsItem, cols sc := sctx.GetSessionVars().StmtCtx var err error - samplesX, err = SortSampleItems(sc, samplesX) + err = sortSampleItems(sc, samplesX) if err != nil { return nil } @@ -118,7 +118,9 @@ func fillExtStatsCorrVals(sctx sessionctx.Context, item *ExtendedStatsItem, cols itemY.Ordinal = i samplesYInXOrder = append(samplesYInXOrder, itemY) } - samplesYInYOrder, err := SortSampleItems(sc, samplesYInXOrder) + samplesYInYOrder := make([]*SampleItem, len(samplesYInXOrder)) + copy(samplesYInYOrder, samplesYInXOrder) + err = sortSampleItems(sc, samplesYInYOrder) if err != nil { return nil } diff --git a/pkg/statistics/builder_test.go b/pkg/statistics/builder_test.go index 54f2875c399cd..732b490004c1b 100644 --- a/pkg/statistics/builder_test.go +++ b/pkg/statistics/builder_test.go @@ -72,6 +72,6 @@ func BenchmarkBuildHistAndTopN(b *testing.B) { memoryTracker := memory.NewTracker(10, 1024*1024*1024) b.ResetTimer() for i := 0; i < b.N; i++ { - _, _, _ = BuildHistAndTopN(ctx, 256, 500, 0, collector, filedType, true, memoryTracker) + _, _, _ = BuildHistAndTopN(ctx, 256, 500, 0, collector, filedType, true, memoryTracker, false) } } diff --git a/pkg/statistics/main_test.go b/pkg/statistics/main_test.go index 0a460ffa6139c..6e50369e37b56 100644 --- a/pkg/statistics/main_test.go +++ b/pkg/statistics/main_test.go @@ -105,10 +105,9 @@ func createTestStatisticsSamples(t *testing.T) *testStatisticsSamples { } sc := stmtctx.NewStmtCtx() - var err error - s.samples, err = SortSampleItems(sc, samples) + err := sortSampleItems(sc, samples) require.NoError(t, err) - + s.samples = samples rc := &recordSet{ data: make([]types.Datum, s.count), count: s.count, diff --git a/pkg/statistics/sample.go b/pkg/statistics/sample.go index 5d056abd115d9..77d444e2163af 100644 --- a/pkg/statistics/sample.go +++ b/pkg/statistics/sample.go @@ -61,12 +61,9 @@ func CopySampleItems(items []*SampleItem) []*SampleItem { return n } -// SortSampleItems shallow copies and sorts a slice of SampleItem. -func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) ([]*SampleItem, error) { - sortedItems := make([]*SampleItem, len(items)) - copy(sortedItems, items) +func sortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) error { var err error - slices.SortStableFunc(sortedItems, func(i, j *SampleItem) int { + slices.SortStableFunc(items, func(i, j *SampleItem) int { var cmp int cmp, err = i.Value.Compare(sc.TypeCtx(), &j.Value, collate.GetBinaryCollator()) if err != nil { @@ -74,7 +71,7 @@ func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) ([]*Samp } return cmp }) - return sortedItems, err + return err } // SampleCollector will collect Samples and calculate the count and ndv of an attribute. diff --git a/pkg/statistics/sample_test.go b/pkg/statistics/sample_test.go index ea7bc8f47eb12..5153b3fa89fe2 100644 --- a/pkg/statistics/sample_test.go +++ b/pkg/statistics/sample_test.go @@ -180,7 +180,7 @@ func TestBuildStatsOnRowSample(t *testing.T) { TotalSize: int64(len(data)) * 8, } tp := types.NewFieldType(mysql.TypeLonglong) - hist, topN, err := BuildHistAndTopN(ctx, 5, 4, 1, collector, tp, true, nil) + hist, topN, err := BuildHistAndTopN(ctx, 5, 4, 1, collector, tp, true, nil, false) require.Nilf(t, err, "%+v", err) topNStr, err := topN.DecodedString(ctx, []byte{tp.GetType()}) require.NoError(t, err) diff --git a/pkg/statistics/statistics_test.go b/pkg/statistics/statistics_test.go index 2ff46f2104e0e..7655bf38131a6 100644 --- a/pkg/statistics/statistics_test.go +++ b/pkg/statistics/statistics_test.go @@ -537,7 +537,7 @@ func SubTestBuild() func(*testing.T) { count = col.LessRowCount(nil, types.NewIntDatum(1)) require.Equal(t, 5, int(count)) - colv2, topnv2, err := BuildHistAndTopN(ctx, int(bucketCount), topNCount, 2, collector, types.NewFieldType(mysql.TypeLonglong), true, nil) + colv2, topnv2, err := BuildHistAndTopN(ctx, int(bucketCount), topNCount, 2, collector, types.NewFieldType(mysql.TypeLonglong), true, nil, false) require.NoError(t, err) require.NotNil(t, topnv2.TopN) // The most common one's occurrence is 9990, the second most common one's occurrence is 30.