Skip to content

Commit

Permalink
Sidecar: mark as unqueryable if prometheus is down (thanos-io#7297)
Browse files Browse the repository at this point in the history
If the prometheus that belongs to a sidecar is down we dont need to
query the sidecar. This PR makes it so that we take the sidecar out of
the endpoint set then. We do the same for all other store APIs by
retuning an error in the info/Info gRPC call if they are marked as not
ready.

Signed-off-by: Michael Hoffmann <[email protected]>
  • Loading branch information
MichaHoffmann authored and jnyi committed Jun 1, 2024
1 parent 1eaffc7 commit 9651013
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#7233](https://github.com/thanos-io/thanos/pull/7233): UI: Showing Block Size Stats
- [#7280](https://github.com/thanos-io/thanos/pull/7281): Adding User-Agent to request logs
- [#7219](https://github.com/thanos-io/thanos/pull/7219): Receive: add `--remote-write.client-tls-secure` and `--remote-write.client-tls-skip-verify` flags to stop relying on grpc server config to determine grpc client secure/skipVerify.
- [#7297](https://github.com/thanos-io/thanos/pull/7297): *: mark as not queryable if status is not ready

### Changed

Expand Down
6 changes: 3 additions & 3 deletions cmd/thanos/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ func runQuery(
infoSrv := info.NewInfoServer(
component.Query.String(),
info.WithLabelSetFunc(func() []labelpb.ZLabelSet { return proxy.LabelSet() }),
info.WithStoreInfoFunc(func() *infopb.StoreInfo {
info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) {
if httpProbe.IsReady() {
mint, maxt := proxy.TimeRange()
return &infopb.StoreInfo{
Expand All @@ -812,9 +812,9 @@ func runQuery(
SupportsSharding: true,
SupportsWithoutReplicaLabels: true,
TsdbInfos: proxy.TSDBInfos(),
}
}, nil
}
return nil
return nil, errors.New("Not ready")
}),
info.WithExemplarsInfoFunc(),
info.WithRulesInfoFunc(),
Expand Down
6 changes: 3 additions & 3 deletions cmd/thanos/receive.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ func runReceive(
infoSrv := info.NewInfoServer(
component.Receive.String(),
info.WithLabelSetFunc(func() []labelpb.ZLabelSet { return proxy.LabelSet() }),
info.WithStoreInfoFunc(func() *infopb.StoreInfo {
info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) {
if httpProbe.IsReady() {
minTime, maxTime := proxy.TimeRange()
return &infopb.StoreInfo{
Expand All @@ -356,9 +356,9 @@ func runReceive(
SupportsSharding: true,
SupportsWithoutReplicaLabels: true,
TsdbInfos: proxy.TSDBInfos(),
}
}, nil
}
return nil
return nil, errors.New("Not ready")
}),
info.WithExemplarsInfoFunc(),
)
Expand Down
6 changes: 3 additions & 3 deletions cmd/thanos/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ func runRule(
info.WithLabelSetFunc(func() []labelpb.ZLabelSet {
return tsdbStore.LabelSet()
}),
info.WithStoreInfoFunc(func() *infopb.StoreInfo {
info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) {
if httpProbe.IsReady() {
mint, maxt := tsdbStore.TimeRange()
return &infopb.StoreInfo{
Expand All @@ -750,9 +750,9 @@ func runRule(
SupportsSharding: true,
SupportsWithoutReplicaLabels: true,
TsdbInfos: tsdbStore.TSDBInfos(),
}
}, nil
}
return nil
return nil, errors.New("Not ready")
}),
)
storeServer := store.NewLimitedStoreServer(store.NewInstrumentedStoreServer(reg, tsdbStore), reg, conf.storeRateLimits)
Expand Down
6 changes: 3 additions & 3 deletions cmd/thanos/sidecar.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ func runSidecar(
info.WithLabelSetFunc(func() []labelpb.ZLabelSet {
return promStore.LabelSet()
}),
info.WithStoreInfoFunc(func() *infopb.StoreInfo {
info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) {
if httpProbe.IsReady() {
mint, maxt := promStore.Timestamps()
return &infopb.StoreInfo{
Expand All @@ -289,9 +289,9 @@ func runSidecar(
SupportsSharding: true,
SupportsWithoutReplicaLabels: true,
TsdbInfos: promStore.TSDBInfos(),
}
}, nil
}
return nil
return nil, errors.New("Not ready")
}),
info.WithExemplarsInfoFunc(),
info.WithRulesInfoFunc(),
Expand Down
6 changes: 3 additions & 3 deletions cmd/thanos/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ func runStore(
info.WithLabelSetFunc(func() []labelpb.ZLabelSet {
return bs.LabelSet()
}),
info.WithStoreInfoFunc(func() *infopb.StoreInfo {
info.WithStoreInfoFunc(func() (*infopb.StoreInfo, error) {
if httpProbe.IsReady() {
mint, maxt := bs.TimeRange()
return &infopb.StoreInfo{
Expand All @@ -500,9 +500,9 @@ func runStore(
SupportsSharding: true,
SupportsWithoutReplicaLabels: true,
TsdbInfos: bs.TSDBInfos(),
}
}, nil
}
return nil
return nil, errors.New("Not ready")
}),
)

Expand Down
17 changes: 11 additions & 6 deletions pkg/info/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ package info
import (
"context"

"google.golang.org/grpc"

"github.com/thanos-io/thanos/pkg/info/infopb"
"github.com/thanos-io/thanos/pkg/store/labelpb"
"google.golang.org/grpc"
)

// InfoServer implements the corresponding protobuf interface
Expand All @@ -20,7 +21,7 @@ type InfoServer struct {
component string

getLabelSet func() []labelpb.ZLabelSet
getStoreInfo func() *infopb.StoreInfo
getStoreInfo func() (*infopb.StoreInfo, error)
getExemplarsInfo func() *infopb.ExemplarsInfo
getRulesInfo func() *infopb.RulesInfo
getTargetsInfo func() *infopb.TargetsInfo
Expand All @@ -38,7 +39,7 @@ func NewInfoServer(
component: component,
// By default, do not return info for any API.
getLabelSet: func() []labelpb.ZLabelSet { return nil },
getStoreInfo: func() *infopb.StoreInfo { return nil },
getStoreInfo: func() (*infopb.StoreInfo, error) { return nil, nil },
getExemplarsInfo: func() *infopb.ExemplarsInfo { return nil },
getRulesInfo: func() *infopb.RulesInfo { return nil },
getTargetsInfo: func() *infopb.TargetsInfo { return nil },
Expand Down Expand Up @@ -74,10 +75,10 @@ func WithLabelSetFunc(getLabelSet ...func() []labelpb.ZLabelSet) ServerOptionFun
// WithStoreInfoFunc determines the function that should be executed to obtain
// the store information. If no function is provided, the default empty
// store info is returned. Only the first function from the list is considered.
func WithStoreInfoFunc(getStoreInfo ...func() *infopb.StoreInfo) ServerOptionFunc {
func WithStoreInfoFunc(getStoreInfo ...func() (*infopb.StoreInfo, error)) ServerOptionFunc {
if len(getStoreInfo) == 0 {
return func(s *InfoServer) {
s.getStoreInfo = func() *infopb.StoreInfo { return &infopb.StoreInfo{} }
s.getStoreInfo = func() (*infopb.StoreInfo, error) { return &infopb.StoreInfo{}, nil }
}
}

Expand Down Expand Up @@ -170,10 +171,14 @@ func RegisterInfoServer(infoSrv infopb.InfoServer) func(*grpc.Server) {

// Info returns the information about label set and available APIs exposed by the component.
func (srv *InfoServer) Info(ctx context.Context, req *infopb.InfoRequest) (*infopb.InfoResponse, error) {
storeInfo, err := srv.getStoreInfo()
if err != nil {
return nil, err
}
return &infopb.InfoResponse{
LabelSets: srv.getLabelSet(),
ComponentType: srv.component,
Store: srv.getStoreInfo(),
Store: storeInfo,
Exemplars: srv.getExemplarsInfo(),
Rules: srv.getRulesInfo(),
Targets: srv.getTargetsInfo(),
Expand Down
11 changes: 8 additions & 3 deletions pkg/query/endpointset.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@ import (
"time"
"unicode/utf8"

"github.com/thanos-io/thanos/pkg/api/query/querypb"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/model/labels"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

"github.com/thanos-io/thanos/pkg/api/query/querypb"
"github.com/thanos-io/thanos/pkg/component"
"github.com/thanos-io/thanos/pkg/exemplars/exemplarspb"
"github.com/thanos-io/thanos/pkg/info/infopb"
Expand Down Expand Up @@ -108,7 +109,11 @@ func (es *GRPCEndpointSpec) ReplicaKey() string {
func (es *endpointRef) Metadata(ctx context.Context, infoClient infopb.InfoClient, storeClient storepb.StoreClient) (*endpointMetadata, error) {
if infoClient != nil {
resp, err := infoClient.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true))
if err == nil {
if err != nil {
if status.Convert(err).Code() != codes.Unimplemented {
return nil, err
}
} else {
return &endpointMetadata{resp}, nil
}
}
Expand Down
11 changes: 5 additions & 6 deletions pkg/query/endpointset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,19 @@ import (
"testing"
"time"

"github.com/efficientgo/core/testutil"
"github.com/pkg/errors"
"github.com/stretchr/testify/require"

"github.com/prometheus/prometheus/model/labels"
"github.com/thanos-io/thanos/pkg/store"

"golang.org/x/sync/errgroup"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"

"github.com/efficientgo/core/testutil"
"github.com/pkg/errors"
promtestutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/prometheus/model/labels"

"github.com/thanos-io/thanos/pkg/component"
"github.com/thanos-io/thanos/pkg/info/infopb"
"github.com/thanos-io/thanos/pkg/store"
"github.com/thanos-io/thanos/pkg/store/labelpb"
"github.com/thanos-io/thanos/pkg/store/storepb"
)
Expand Down

0 comments on commit 9651013

Please sign in to comment.