diff --git a/CHANGELOG.md b/CHANGELOG.md index f6718244b9..ce71760392 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,8 +16,9 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4487](https://github.com/thanos-io/thanos/pull/4487) Query: Add memcached auto discovery support. - [#4444](https://github.com/thanos-io/thanos/pull/4444) UI: Add search block UI. - [#4509](https://github.com/thanos-io/thanos/pull/4509) Logging: Adds duration_ms in int64 to the logs. -- [#4462](https://github.com/thanos-io/thanos/pull/4462) UI: Add find overlap block UI +- [#4462](https://github.com/thanos-io/thanos/pull/4462) UI: Add find overlap block UI. - [#4469](https://github.com/thanos-io/thanos/pull/4469) Compact: Add flag `compact.skip-block-with-out-of-order-chunks` to skip blocks with out-of-order chunks during compaction instead of halting +- [#4552](https://github.com/thanos-io/thanos/pull/4552) Compact: Adds `thanos_compact_downsample_duration_seconds` histogram. ### Fixed diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index 849c72db36..05ca4fdef4 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -38,6 +38,7 @@ import ( type DownsampleMetrics struct { downsamples *prometheus.CounterVec downsampleFailures *prometheus.CounterVec + downsampleDuration *prometheus.HistogramVec } func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics { @@ -51,6 +52,11 @@ func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics { Name: "thanos_compact_downsample_failures_total", Help: "Total number of failed downsampling attempts.", }, []string{"group"}) + m.downsampleDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ + Name: "thanos_compact_downsample_duration_seconds", + Help: "Duration of downsample runs", + Buckets: []float64{60, 300, 900, 1800, 3600, 7200, 14400}, // 1m, 5m, 15m, 30m, 60m, 120m, 240m + }, []string{"group"}) return m } @@ -237,7 +243,7 @@ func downsampleBucket( resolution = downsample.ResLevel2 errMsg = "downsampling to 60 min" } - if err := processDownsampling(ctx, logger, bkt, m, dir, resolution, hashFunc); err != nil { + if err := processDownsampling(ctx, logger, bkt, m, dir, resolution, hashFunc, metrics); err != nil { metrics.downsampleFailures.WithLabelValues(compact.DefaultGroupKey(m.Thanos)).Inc() return errors.Wrap(err, errMsg) } @@ -309,7 +315,16 @@ func downsampleBucket( return nil } -func processDownsampling(ctx context.Context, logger log.Logger, bkt objstore.Bucket, m *metadata.Meta, dir string, resolution int64, hashFunc metadata.HashFunc) error { +func processDownsampling( + ctx context.Context, + logger log.Logger, + bkt objstore.Bucket, + m *metadata.Meta, + dir string, + resolution int64, + hashFunc metadata.HashFunc, + metrics *DownsampleMetrics, +) error { begin := time.Now() bdir := filepath.Join(dir, m.ULID.String()) @@ -344,8 +359,10 @@ func processDownsampling(ctx context.Context, logger log.Logger, bkt objstore.Bu } resdir := filepath.Join(dir, id.String()) + downsampleDuration := time.Since(begin) level.Info(logger).Log("msg", "downsampled block", - "from", m.ULID, "to", id, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) + "from", m.ULID, "to", id, "duration", downsampleDuration, "duration_ms", downsampleDuration.Milliseconds()) + metrics.downsampleDuration.WithLabelValues(compact.DefaultGroupKey(m.Thanos)).Observe(downsampleDuration.Seconds()) if err := block.VerifyIndex(logger, filepath.Join(resdir, block.IndexFilename), m.MinTime, m.MaxTime); err != nil { return errors.Wrap(err, "output block index not valid")