Skip to content

Commit

Permalink
metrics: fix initialization race
Browse files Browse the repository at this point in the history
When using a counter group or gauge group, the initialization of the
time series happens on first use. However, if two different goroutines
tried to access the same set of labels concurrently, there was a rare race
where one goroutine could try to increment the time series state before
the other goroutine had finished initializing the data structure.

Fix this by waiting for the initialization to complete before returning
the counter on a call to .With(). This wait is cheap and involves only
a single atomic load in the fast path after initialization has completed.

Thanks Dan Upton and Neal Lathia for the report.

Fixes #977
  • Loading branch information
eandre committed Dec 7, 2023
1 parent 3c2ceaa commit c15735c
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 1 deletion.
3 changes: 2 additions & 1 deletion runtimes/go/metrics/bits_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,11 @@ type initGate struct {
}

func (g *initGate) Start() {
g.mu.Lock()
if !atomic.CompareAndSwapUint32(&g.state, 0, 1) {
g.mu.Unlock() // don't leave the mutex in a locked state if we panic
panic("initGate: already started")
}
g.mu.Lock()
}

func (g *initGate) Done() {
Expand Down
7 changes: 7 additions & 0 deletions runtimes/go/metrics/histogram_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ func (c *HistogramGroup[L, V]) get(labels L) *timeseries[*nativehist.Histogram]
ts, setup := getTS[*nativehist.Histogram](c.reg, c.name, labels, c.metricInfo)

if !setup {
// Initialize this histogram timeseries on first use.
ts.init.Start()
defer ts.init.Done()

n := c.reg.numSvcs
if c.svcNum > 0 {
n = 1
Expand All @@ -108,6 +112,9 @@ func (c *HistogramGroup[L, V]) get(labels L) *timeseries[*nativehist.Histogram]
for i := range ts.value {
ts.value[i] = nativehist.New(bucketFactor)
}
} else {
// Wait for the timeseries to be initialized before we continue.
ts.init.Wait()
}

return ts
Expand Down
6 changes: 6 additions & 0 deletions runtimes/go/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ func (c *CounterGroup[L, V]) get(labels L) *timeseries[V] {
ts, setup := c.metricInfo.getTS(labels)
if !setup {
ts.setup(c.labelMapper(labels))
} else {
// Wait for the timeseries to be initialized before we continue.
ts.init.Wait()
}
return ts
}
Expand Down Expand Up @@ -139,6 +142,9 @@ func (g *GaugeGroup[L, V]) get(labels L) *timeseries[V] {
ts, setup := g.metricInfo.getTS(labels)
if !setup {
ts.setup(g.labelMapper(labels))
} else {
// Wait for the timeseries to be initialized before we continue.
ts.init.Wait()
}
return ts
}
Expand Down

0 comments on commit c15735c

Please sign in to comment.