Skip to content

Commit

Permalink
improve kept/seen metrics for all trace samplers
Browse files Browse the repository at this point in the history
Signed-off-by: keisku <[email protected]>
  • Loading branch information
keisku committed Feb 5, 2025
1 parent 0ef59f7 commit 910a344
Show file tree
Hide file tree
Showing 15 changed files with 741 additions and 450 deletions.
41 changes: 27 additions & 14 deletions pkg/trace/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ type Agent struct {
RareSampler *sampler.RareSampler
NoPrioritySampler *sampler.NoPrioritySampler
ProbabilisticSampler *sampler.ProbabilisticSampler
SamplerMetrics *sampler.Metrics
EventProcessor *event.Processor
TraceWriter TraceWriter
StatsWriter *writer.DatadogStatsWriter
Expand Down Expand Up @@ -151,11 +152,12 @@ func NewAgent(ctx context.Context, conf *config.AgentConfig, telemetryCollector
ClientStatsAggregator: stats.NewClientStatsAggregator(conf, statsWriter, statsd),
Blacklister: filters.NewBlacklister(conf.Ignore["resource"]),
Replacer: filters.NewReplacer(conf.ReplaceTags),
PrioritySampler: sampler.NewPrioritySampler(conf, dynConf, statsd),
ErrorsSampler: sampler.NewErrorsSampler(conf, statsd),
RareSampler: sampler.NewRareSampler(conf, statsd),
NoPrioritySampler: sampler.NewNoPrioritySampler(conf, statsd),
ProbabilisticSampler: sampler.NewProbabilisticSampler(conf, statsd),
PrioritySampler: sampler.NewPrioritySampler(conf, dynConf),
ErrorsSampler: sampler.NewErrorsSampler(conf),
RareSampler: sampler.NewRareSampler(conf),
NoPrioritySampler: sampler.NewNoPrioritySampler(conf),
ProbabilisticSampler: sampler.NewProbabilisticSampler(conf),
SamplerMetrics: sampler.NewMetrics(statsd, 10*time.Second),
EventProcessor: newEventProcessor(conf, statsd),
StatsWriter: statsWriter,
obfuscatorConf: &oconf,
Expand All @@ -166,6 +168,7 @@ func NewAgent(ctx context.Context, conf *config.AgentConfig, telemetryCollector
Statsd: statsd,
Timing: timing,
}
agnt.SamplerMetrics.Add(agnt.PrioritySampler, agnt.ErrorsSampler, agnt.NoPrioritySampler, agnt.RareSampler)
agnt.Receiver = api.NewHTTPReceiver(conf, dynConf, in, agnt, telemetryCollector, statsd, timing)
agnt.OTLPReceiver = api.NewOTLPReceiver(in, conf, statsd, timing)
agnt.RemoteConfigHandler = remoteconfighandler.New(conf, agnt.PrioritySampler, agnt.RareSampler, agnt.ErrorsSampler)
Expand All @@ -181,10 +184,7 @@ func (a *Agent) Run() {
a.Receiver,
a.Concentrator,
a.ClientStatsAggregator,
a.PrioritySampler,
a.ErrorsSampler,
a.NoPrioritySampler,
a.ProbabilisticSampler,
a.SamplerMetrics,
a.EventProcessor,
a.OTLPReceiver,
a.RemoteConfigHandler,
Expand Down Expand Up @@ -266,11 +266,7 @@ func (a *Agent) loop() {
a.ClientStatsAggregator,
a.TraceWriter,
a.StatsWriter,
a.PrioritySampler,
a.ErrorsSampler,
a.NoPrioritySampler,
a.ProbabilisticSampler,
a.RareSampler,
a.SamplerMetrics,
a.EventProcessor,
a.obfuscator,
a.DebugServer,
Expand Down Expand Up @@ -638,8 +634,14 @@ func (a *Agent) getAnalyzedEvents(pt *traceutil.ProcessedTrace, ts *info.TagStat
// set, the NoPrioritySampler is run. Finally, if the trace has not been sampled by the other
// samplers, the error sampler is run.
func (a *Agent) runSamplers(now time.Time, ts *info.TagStats, pt traceutil.ProcessedTrace) (keep bool, checkAnalyticsEvents bool) {
samplerName := sampler.NameUnknown
samplingPriority := sampler.PriorityNone
defer func() {
a.SamplerMetrics.Record(keep, sampler.NewMetricsKey(pt.Root.Service, pt.TracerEnv, samplerName, samplingPriority))
}()
// ETS: chunks that don't contain errors (or spans with exception span events) are all dropped.
if a.conf.ErrorTrackingStandalone {
samplerName = sampler.NameError
if traceContainsError(pt.TraceChunk.Spans, true) {
pt.TraceChunk.Tags["_dd.error_tracking_standalone.error"] = "true"
return a.ErrorsSampler.Sample(now, pt.TraceChunk.Spans, pt.Root, pt.TracerEnv), false
Expand All @@ -651,23 +653,32 @@ func (a *Agent) runSamplers(now time.Time, ts *info.TagStats, pt traceutil.Proce
rare := a.RareSampler.Sample(now, pt.TraceChunk, pt.TracerEnv)

if a.conf.ProbabilisticSamplerEnabled {
samplerName = sampler.NameProbabilistic
if rare {
samplerName = sampler.NameRare
return true, true
}
if a.ProbabilisticSampler.Sample(pt.Root) {
pt.TraceChunk.Tags[tagDecisionMaker] = probabilitySampling
return true, true
}
if traceContainsError(pt.TraceChunk.Spans, false) {
samplerName = sampler.NameError
return a.ErrorsSampler.Sample(now, pt.TraceChunk.Spans, pt.Root, pt.TracerEnv), true
}
return false, true
}

priority, hasPriority := sampler.GetSamplingPriority(pt.TraceChunk)
if hasPriority {
samplerName = sampler.NamePriority
samplingPriority = priority
if dm, ok := pt.TraceChunk.Tags[tagDecisionMaker]; ok && dm == probabilitySampling {
samplerName = sampler.NameProbabilistic
}
ts.TracesPerSamplingPriority.CountSamplingPriority(priority)
} else {
samplerName = sampler.NameNoPriority
ts.TracesPriorityNone.Inc()
}
if a.conf.HasFeature("error_rare_sample_tracer_drop") {
Expand All @@ -684,6 +695,7 @@ func (a *Agent) runSamplers(now time.Time, ts *info.TagStats, pt traceutil.Proce
}

if rare {
samplerName = sampler.NameRare
return true, true
}

Expand All @@ -696,6 +708,7 @@ func (a *Agent) runSamplers(now time.Time, ts *info.TagStats, pt traceutil.Proce
}

if traceContainsError(pt.TraceChunk.Spans, false) {
samplerName = sampler.NameError
return a.ErrorsSampler.Sample(now, pt.TraceChunk.Spans, pt.Root, pt.TracerEnv), true
}

Expand Down
Loading

0 comments on commit 910a344

Please sign in to comment.