From 7b79fc7b0ab07d4d6a55ae0a4a9816ef91c752f8 Mon Sep 17 00:00:00 2001 From: aymericDD Date: Tue, 7 Dec 2021 14:56:17 +0100 Subject: [PATCH] query: fix deadlock in endpointset (#4795) (#4926) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid RLock()ing twice as described here: https://github.com/thanos-io/thanos/issues/4766#issuecomment-948743455 (due to https://stackoverflow.com/questions/30547916/goroutine-blocks-when-calling-rwmutex-rlock-twice-after-an-rwmutex-unlock/30549188). Fix it by removing HasClients() and simply changing it with `er.clients != nil`. Signed-off-by: Giedrius Statkevičius Signed-off-by: Aymeric Co-authored-by: Giedrius Statkevičius --- pkg/query/endpointset.go | 17 ++++--------- pkg/query/endpointset_test.go | 45 +++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/pkg/query/endpointset.go b/pkg/query/endpointset.go index 1c05ca6975..6fd9a1a577 100644 --- a/pkg/query/endpointset.go +++ b/pkg/query/endpointset.go @@ -688,46 +688,39 @@ func (er *endpointRef) ComponentType() component.Component { return component.FromString(er.metadata.ComponentType) } -func (er *endpointRef) HasClients() bool { - er.mtx.RLock() - defer er.mtx.RUnlock() - - return er.clients != nil -} - func (er *endpointRef) HasStoreAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.HasClients() && er.clients.store != nil + return er.clients != nil && er.clients.store != nil } func (er *endpointRef) HasRulesAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.HasClients() && er.clients.rule != nil + return er.clients != nil && er.clients.rule != nil } func (er *endpointRef) HasTargetsAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.HasClients() && er.clients.target != nil + return er.clients != nil && er.clients.target != nil } func (er *endpointRef) HasMetricMetadataAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.HasClients() && er.clients.metricMetadata != nil + return er.clients != nil && er.clients.metricMetadata != nil } func (er *endpointRef) HasExemplarsAPI() bool { er.mtx.RLock() defer er.mtx.RUnlock() - return er.HasClients() && er.clients.exemplar != nil + return er.clients != nil && er.clients.exemplar != nil } func (er *endpointRef) LabelSets() []labels.Labels { diff --git a/pkg/query/endpointset_test.go b/pkg/query/endpointset_test.go index 5dc7eefa45..f6904e8223 100644 --- a/pkg/query/endpointset_test.go +++ b/pkg/query/endpointset_test.go @@ -12,6 +12,7 @@ import ( "testing" "time" + "golang.org/x/sync/errgroup" "google.golang.org/grpc" "github.com/pkg/errors" @@ -1185,3 +1186,47 @@ func assertRegisteredAPIs(t *testing.T, expectedAPIs *APIs, er *endpointRef) { testutil.Equals(t, expectedAPIs.metricMetadata, er.HasMetricMetadataAPI()) testutil.Equals(t, expectedAPIs.exemplars, er.HasExemplarsAPI()) } + +// Regression test for: https://github.com/thanos-io/thanos/issues/4766. +func TestDeadlockLocking(t *testing.T) { + t.Parallel() + + mockEndpointRef := &endpointRef{ + addr: "mockedStore", + metadata: &endpointMetadata{ + &infopb.InfoResponse{}, + }, + clients: &endpointClients{}, + } + + g := &errgroup.Group{} + deadline := time.Now().Add(3 * time.Second) + + g.Go(func() error { + for { + if time.Now().After(deadline) { + break + } + mockEndpointRef.Update(&endpointMetadata{ + InfoResponse: &infopb.InfoResponse{}, + }) + } + return nil + }) + + g.Go(func() error { + for { + if time.Now().After(deadline) { + break + } + mockEndpointRef.HasStoreAPI() + mockEndpointRef.HasExemplarsAPI() + mockEndpointRef.HasMetricMetadataAPI() + mockEndpointRef.HasRulesAPI() + mockEndpointRef.HasTargetsAPI() + } + return nil + }) + + testutil.Ok(t, g.Wait()) +}