Skip to content

Commit

Permalink
[processor/tailsampling] Late span age histogram should include sampl…
Browse files Browse the repository at this point in the history
…ed traces (open-telemetry#37180)

Currently, the [late span age
histogram](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/981e9f5cc58149ea37c8909a4a6eb30faee96351/processor/tailsamplingprocessor/internal/metadata/generated_telemetry.go#L106-L110)
only includes non-sampled traces. The histogram should also include
sampled traces. This pull request includes anything with a decision
time.

_Note: The decision cache still wrecks it 😅_

---------

Signed-off-by: Sean Porter <[email protected]>
Co-authored-by: Juraci Paixão Kröhling <[email protected]>
  • Loading branch information
2 people authored and chengchuanpeng committed Jan 26, 2025
1 parent a357b6e commit 019bd36
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 15 deletions.
27 changes: 27 additions & 0 deletions .chloggen/tailsamplingprocessor-late-span-age-histogram.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: tailsamplingprocessor

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Late span age histogram should include sampled traces.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [37180]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
5 changes: 4 additions & 1 deletion processor/tailsamplingprocessor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,14 @@ func (tsp *tailSamplingSpanProcessor) processTraces(resourceSpans ptrace.Resourc
tsp.releaseSampledTrace(tsp.ctx, id, traceTd)
case sampling.NotSampled:
tsp.nonSampledIDCache.Put(id, true)
tsp.telemetry.ProcessorTailSamplingSamplingLateSpanAge.Record(tsp.ctx, int64(time.Since(actualData.DecisionTime)/time.Second))
default:
tsp.logger.Warn("Encountered unexpected sampling decision",
zap.Int("decision", int(finalDecision)))
}

if !actualData.DecisionTime.IsZero() {
tsp.telemetry.ProcessorTailSamplingSamplingLateSpanAge.Record(tsp.ctx, int64(time.Since(actualData.DecisionTime)/time.Second))
}
}
}

Expand Down
41 changes: 27 additions & 14 deletions processor/tailsamplingprocessor/processor_telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,10 @@ func TestProcessorTailSamplingSamplingLateSpanAge(t *testing.T) {
PolicyCfgs: []PolicyCfg{
{
sharedPolicyCfg: sharedPolicyCfg{
Name: "never-sample",
Name: "sample-half",
Type: Probabilistic,
ProbabilisticCfg: ProbabilisticCfg{
SamplingPercentage: 0,
SamplingPercentage: 50,
},
},
},
Expand All @@ -476,22 +476,24 @@ func TestProcessorTailSamplingSamplingLateSpanAge(t *testing.T) {
err = proc.Start(context.Background(), componenttest.NewNopHost())
require.NoError(t, err)

traces := simpleTraces()
traceID := traces.ResourceSpans().At(0).ScopeSpans().AppendEmpty().Spans().AppendEmpty().TraceID()

lateSpan := ptrace.NewTraces()
lateSpan.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty().Spans().AppendEmpty().SetTraceID(traceID)

// test
err = proc.ConsumeTraces(context.Background(), traces)
require.NoError(t, err)
traceIDs, batches := generateIDsAndBatches(10)
for _, batch := range batches {
err = proc.ConsumeTraces(context.Background(), batch)
require.NoError(t, err)
}

tsp := proc.(*tailSamplingSpanProcessor)
tsp.policyTicker.OnTick() // the first tick always gets an empty batch
tsp.policyTicker.OnTick()

err = proc.ConsumeTraces(context.Background(), lateSpan)
require.NoError(t, err)
for _, traceID := range traceIDs {
lateSpan := ptrace.NewTraces()
lateSpan.ResourceSpans().AppendEmpty().ScopeSpans().AppendEmpty().Spans().AppendEmpty().SetTraceID(traceID)

err = proc.ConsumeTraces(context.Background(), lateSpan)
require.NoError(t, err)
}

// verify
var md metricdata.ResourceMetrics
Expand All @@ -503,11 +505,22 @@ func TestProcessorTailSamplingSamplingLateSpanAge(t *testing.T) {
Unit: "s",
Data: metricdata.Histogram[int64]{
Temporality: metricdata.CumulativeTemporality,
DataPoints: []metricdata.HistogramDataPoint[int64]{{}},
DataPoints: []metricdata.HistogramDataPoint[int64]{
{
Count: 10,
Bounds: []float64{0, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000},
BucketCounts: []uint64{10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
Min: metricdata.NewExtrema[int64](0),
Max: metricdata.NewExtrema[int64](0),
Sum: 0,
},
},
},
}

got := s.getMetric(m.Name, md)
metricdatatest.AssertEqual(t, m, got, metricdatatest.IgnoreTimestamp(), metricdatatest.IgnoreValue())

metricdatatest.AssertEqual(t, m, got, metricdatatest.IgnoreTimestamp())
}

type testTelemetry struct {
Expand Down

0 comments on commit 019bd36

Please sign in to comment.