Skip to content

Commit

Permalink
*: avoid copy in the SortSampleItems (#48683)
Browse files Browse the repository at this point in the history
ref #47275
  • Loading branch information
hawkingrei authored Nov 24, 2023
1 parent 8583ab5 commit 4279cd6
Show file tree
Hide file tree
Showing 9 changed files with 25 additions and 19 deletions.
2 changes: 1 addition & 1 deletion pkg/executor/analyze_col.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range, needExtStats boo
if e.StatsVersion < 2 {
hg, err = statistics.BuildColumn(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), col.ID, collectors[i], &col.FieldType)
} else {
hg, topn, err = statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), int(e.opts[ast.AnalyzeOptNumTopN]), col.ID, collectors[i], &col.FieldType, true, nil)
hg, topn, err = statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), int(e.opts[ast.AnalyzeOptNumTopN]), col.ID, collectors[i], &col.FieldType, true, nil, true)
topNs = append(topNs, topn)
}
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/executor/analyze_col_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,7 @@ workLoop:
e.memTracker.Release(collector.MemSize)
}
}
hist, topn, err := statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), int(e.opts[ast.AnalyzeOptNumTopN]), task.id, collector, task.tp, task.isColumn, e.memTracker)
hist, topn, err := statistics.BuildHistAndTopN(e.ctx, int(e.opts[ast.AnalyzeOptNumBuckets]), int(e.opts[ast.AnalyzeOptNumTopN]), task.id, collector, task.tp, task.isColumn, e.memTracker, e.ctx.GetSessionVars().EnableExtendedStats)
if err != nil {
resultCh <- err
releaseCollectorMemory()
Expand Down
14 changes: 11 additions & 3 deletions pkg/statistics/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func BuildColumnHist(ctx sessionctx.Context, numBuckets, id int64, collector *Sa
}
sc := ctx.GetSessionVars().StmtCtx
samples := collector.Samples
samples, err := SortSampleItems(sc, samples)
err := sortSampleItems(sc, samples)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -241,6 +241,7 @@ func BuildHistAndTopN(
tp *types.FieldType,
isColumn bool,
memTracker *memory.Tracker,
needExtStats bool,
) (*Histogram, *TopN, error) {
bufferedMemSize := int64(0)
bufferedReleaseSize := int64(0)
Expand Down Expand Up @@ -278,8 +279,15 @@ func BuildHistAndTopN(
return NewHistogram(id, ndv, nullCount, 0, tp, 0, collector.TotalSize), nil, nil
}
sc := ctx.GetSessionVars().StmtCtx
samples := collector.Samples
samples, err := SortSampleItems(sc, samples)
var samples []*SampleItem
// if we need to build extended stats, we need to copy the samples to avoid modifying the original samples.
if needExtStats {
samples = make([]*SampleItem, len(collector.Samples))
copy(samples, collector.Samples)
} else {
samples = collector.Samples
}
err := sortSampleItems(sc, samples)
if err != nil {
return nil, nil, err
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/statistics/builder_ext_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func fillExtStatsCorrVals(sctx sessionctx.Context, item *ExtendedStatsItem, cols
sc := sctx.GetSessionVars().StmtCtx

var err error
samplesX, err = SortSampleItems(sc, samplesX)
err = sortSampleItems(sc, samplesX)
if err != nil {
return nil
}
Expand All @@ -118,7 +118,9 @@ func fillExtStatsCorrVals(sctx sessionctx.Context, item *ExtendedStatsItem, cols
itemY.Ordinal = i
samplesYInXOrder = append(samplesYInXOrder, itemY)
}
samplesYInYOrder, err := SortSampleItems(sc, samplesYInXOrder)
samplesYInYOrder := make([]*SampleItem, len(samplesYInXOrder))
copy(samplesYInYOrder, samplesYInXOrder)
err = sortSampleItems(sc, samplesYInYOrder)
if err != nil {
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/statistics/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,6 @@ func BenchmarkBuildHistAndTopN(b *testing.B) {
memoryTracker := memory.NewTracker(10, 1024*1024*1024)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _, _ = BuildHistAndTopN(ctx, 256, 500, 0, collector, filedType, true, memoryTracker)
_, _, _ = BuildHistAndTopN(ctx, 256, 500, 0, collector, filedType, true, memoryTracker, false)
}
}
5 changes: 2 additions & 3 deletions pkg/statistics/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,9 @@ func createTestStatisticsSamples(t *testing.T) *testStatisticsSamples {
}
sc := stmtctx.NewStmtCtx()

var err error
s.samples, err = SortSampleItems(sc, samples)
err := sortSampleItems(sc, samples)
require.NoError(t, err)

s.samples = samples
rc := &recordSet{
data: make([]types.Datum, s.count),
count: s.count,
Expand Down
9 changes: 3 additions & 6 deletions pkg/statistics/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,17 @@ func CopySampleItems(items []*SampleItem) []*SampleItem {
return n
}

// SortSampleItems shallow copies and sorts a slice of SampleItem.
func SortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) ([]*SampleItem, error) {
sortedItems := make([]*SampleItem, len(items))
copy(sortedItems, items)
func sortSampleItems(sc *stmtctx.StatementContext, items []*SampleItem) error {
var err error
slices.SortStableFunc(sortedItems, func(i, j *SampleItem) int {
slices.SortStableFunc(items, func(i, j *SampleItem) int {
var cmp int
cmp, err = i.Value.Compare(sc.TypeCtx(), &j.Value, collate.GetBinaryCollator())
if err != nil {
return -1
}
return cmp
})
return sortedItems, err
return err
}

// SampleCollector will collect Samples and calculate the count and ndv of an attribute.
Expand Down
2 changes: 1 addition & 1 deletion pkg/statistics/sample_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ func TestBuildStatsOnRowSample(t *testing.T) {
TotalSize: int64(len(data)) * 8,
}
tp := types.NewFieldType(mysql.TypeLonglong)
hist, topN, err := BuildHistAndTopN(ctx, 5, 4, 1, collector, tp, true, nil)
hist, topN, err := BuildHistAndTopN(ctx, 5, 4, 1, collector, tp, true, nil, false)
require.Nilf(t, err, "%+v", err)
topNStr, err := topN.DecodedString(ctx, []byte{tp.GetType()})
require.NoError(t, err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/statistics/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ func SubTestBuild() func(*testing.T) {
count = col.LessRowCount(nil, types.NewIntDatum(1))
require.Equal(t, 5, int(count))

colv2, topnv2, err := BuildHistAndTopN(ctx, int(bucketCount), topNCount, 2, collector, types.NewFieldType(mysql.TypeLonglong), true, nil)
colv2, topnv2, err := BuildHistAndTopN(ctx, int(bucketCount), topNCount, 2, collector, types.NewFieldType(mysql.TypeLonglong), true, nil, false)
require.NoError(t, err)
require.NotNil(t, topnv2.TopN)
// The most common one's occurrence is 9990, the second most common one's occurrence is 30.
Expand Down

0 comments on commit 4279cd6

Please sign in to comment.