From 1d2d2a770159ee727e52ed722f117723bf3c4285 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 8 Feb 2024 11:32:04 +0100 Subject: [PATCH] store-gateway: remove cortex_bucket_store_blocks_loaded_by_duration (#7309) * store-gateway: remove cortex_bucket_store_blocks_loaded_by_duration The PR which added this metric 6074, the motivation was to help detect compactors which are falling behind. There is another metric `cortex_bucket_store_series_blocks_queried` added by jhalterman in 7112 which serves better the purpose of detecting uncompacted blocks. The reason is that it has an explicit label for uncompacted blocks, and it also is more sensitive to recent blocks (assuming those are queried much more often than old uncompacted blocks). Signed-off-by: Dimitar Dimitrov * Add CHANGELOG.md entry Signed-off-by: Dimitar Dimitrov * Update CHANGELOG.md Co-authored-by: Nick Pillitteri <56quarters@users.noreply.github.com> * Update code comment Signed-off-by: Dimitar Dimitrov --------- Signed-off-by: Dimitar Dimitrov Co-authored-by: Nick Pillitteri <56quarters@users.noreply.github.com> --- CHANGELOG.md | 1 + pkg/storegateway/bucket_stores.go | 36 +++++++++---------------------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3c6b4427f5..0214d05dcb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ * [CHANGE] Distributor: default the optimization `-distributor.write-requests-buffer-pooling-enabled` to `true`. #7165 * [CHANGE] Tracing: Move query information to span attributes instead of span logs. #7046 * [CHANGE] Distributor: the default value of circuit breaker's CLI flag `-ingester.client.circuit-breaker.cooldown-period` has been changed from `1m` to `10s`. #7310 +* [CHANGE] Store-gateway: remove `cortex_bucket_store_blocks_loaded_by_duration`. `cortex_bucket_store_series_blocks_queried` is better suited for detecting when compactors are not able to keep up with the number of blocks to compact. #7309 * [FEATURE] Introduce `-server.log-source-ips-full` option to log all IPs from `Forwarded`, `X-Real-IP`, `X-Forwarded-For` headers. #7250 * [FEATURE] Introduce `-tenant-federation.max-tenants` option to limit the max number of tenants allowed for requests when federation is enabled. #6959 * [FEATURE] Cardinality API: added a new `count_method` parameter which enables counting active label values. #7085 diff --git a/pkg/storegateway/bucket_stores.go b/pkg/storegateway/bucket_stores.go index eb53bf7dee3..d987f58e7e4 100644 --- a/pkg/storegateway/bucket_stores.go +++ b/pkg/storegateway/bucket_stores.go @@ -21,7 +21,6 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/prometheus/common/model" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" "github.com/prometheus/prometheus/tsdb/hashcache" "github.com/thanos-io/objstore" @@ -78,12 +77,11 @@ type BucketStores struct { stores map[string]*BucketStore // Metrics. - syncTimes prometheus.Histogram - syncLastSuccess prometheus.Gauge - tenantsDiscovered prometheus.Gauge - tenantsSynced prometheus.Gauge - blocksLoaded *prometheus.Desc - blocksLoadedByDuration *prometheus.Desc + syncTimes prometheus.Histogram + syncLastSuccess prometheus.Gauge + tenantsDiscovered prometheus.Gauge + tenantsSynced prometheus.Gauge + blocksLoaded *prometheus.Desc } // NewBucketStores makes a new BucketStores. @@ -157,11 +155,6 @@ func NewBucketStores(cfg tsdb.BlocksStorageConfig, shardingStrategy ShardingStra "Number of currently loaded blocks.", nil, nil, ) - u.blocksLoadedByDuration = prometheus.NewDesc( - "cortex_bucket_store_blocks_loaded_by_duration", - "Number of currently loaded blocks, bucketed by block duration.", - []string{"duration"}, nil, - ) // Init the index cache. if u.indexCache, err = tsdb.NewIndexCache(cfg.BucketStore.IndexCache, logger, reg); err != nil { @@ -547,10 +540,8 @@ func (u *BucketStores) closeBucketStoreAndDeleteLocalFilesForExcludedTenants(inc } } -// countBlocksLoaded returns the total number of blocks loaded and the number of blocks -// loaded bucketed by the provided block durations, summed for all users. -func (u *BucketStores) countBlocksLoaded(durations []time.Duration) (int, map[time.Duration]int) { - byDuration := make(map[time.Duration]int) +// countBlocksLoaded returns the total number of blocks loaded, summed for all users. +func (u *BucketStores) countBlocksLoaded(durations []time.Duration) int { total := 0 u.storesMu.RLock() @@ -558,28 +549,21 @@ func (u *BucketStores) countBlocksLoaded(durations []time.Duration) (int, map[ti for _, store := range u.stores { stats := store.Stats(durations) - for d, n := range stats.BlocksLoaded { - byDuration[d] += n + for _, n := range stats.BlocksLoaded { total += n } } - return total, byDuration + return total } func (u *BucketStores) Describe(descs chan<- *prometheus.Desc) { descs <- u.blocksLoaded - descs <- u.blocksLoadedByDuration } func (u *BucketStores) Collect(metrics chan<- prometheus.Metric) { - total, byDuration := u.countBlocksLoaded(defaultBlockDurations) + total := u.countBlocksLoaded(defaultBlockDurations) metrics <- prometheus.MustNewConstMetric(u.blocksLoaded, prometheus.GaugeValue, float64(total)) - for d, n := range byDuration { - // Convert time.Duration to model.Duration here since the string format is nicer - // to read for round numbers than the stdlib version. E.g. "2h" vs "2h0m0s" - metrics <- prometheus.MustNewConstMetric(u.blocksLoadedByDuration, prometheus.GaugeValue, float64(n), model.Duration(d).String()) - } } func getUserIDFromGRPCContext(ctx context.Context) string {