Skip to content

Commit

Permalink
compactor: adds downsample duration histogram (#4552)
Browse files Browse the repository at this point in the history
* Adds downsampleDuration histogram

Signed-off-by: Anugrah Vijay <[email protected]>

* Adds resolution label to downsampleDuration histogram

Signed-off-by: Anugrah Vijay <[email protected]>

* Changes downsampleDuration histogram label from resolution to group

Signed-off-by: Anugrah Vijay <[email protected]>

* Changes downsampleDuration histogram bucket intervals

Signed-off-by: Anugrah Vijay <[email protected]>

* Updates changelog

Signed-off-by: Anugrah Vijay <[email protected]>
  • Loading branch information
vanugrah authored Aug 23, 2021
1 parent 291b930 commit ce1c4fe
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#4487](https://github.com/thanos-io/thanos/pull/4487) Query: Add memcached auto discovery support.
- [#4444](https://github.com/thanos-io/thanos/pull/4444) UI: Add search block UI.
- [#4509](https://github.com/thanos-io/thanos/pull/4509) Logging: Adds duration_ms in int64 to the logs.
- [#4462](https://github.com/thanos-io/thanos/pull/4462) UI: Add find overlap block UI
- [#4462](https://github.com/thanos-io/thanos/pull/4462) UI: Add find overlap block UI.
- [#4469](https://github.com/thanos-io/thanos/pull/4469) Compact: Add flag `compact.skip-block-with-out-of-order-chunks` to skip blocks with out-of-order chunks during compaction instead of halting
- [#4552](https://github.com/thanos-io/thanos/pull/4552) Compact: Adds `thanos_compact_downsample_duration_seconds` histogram.

### Fixed

Expand Down
23 changes: 20 additions & 3 deletions cmd/thanos/downsample.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
type DownsampleMetrics struct {
downsamples *prometheus.CounterVec
downsampleFailures *prometheus.CounterVec
downsampleDuration *prometheus.HistogramVec
}

func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics {
Expand All @@ -51,6 +52,11 @@ func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics {
Name: "thanos_compact_downsample_failures_total",
Help: "Total number of failed downsampling attempts.",
}, []string{"group"})
m.downsampleDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
Name: "thanos_compact_downsample_duration_seconds",
Help: "Duration of downsample runs",
Buckets: []float64{60, 300, 900, 1800, 3600, 7200, 14400}, // 1m, 5m, 15m, 30m, 60m, 120m, 240m
}, []string{"group"})

return m
}
Expand Down Expand Up @@ -237,7 +243,7 @@ func downsampleBucket(
resolution = downsample.ResLevel2
errMsg = "downsampling to 60 min"
}
if err := processDownsampling(ctx, logger, bkt, m, dir, resolution, hashFunc); err != nil {
if err := processDownsampling(ctx, logger, bkt, m, dir, resolution, hashFunc, metrics); err != nil {
metrics.downsampleFailures.WithLabelValues(compact.DefaultGroupKey(m.Thanos)).Inc()
return errors.Wrap(err, errMsg)
}
Expand Down Expand Up @@ -309,7 +315,16 @@ func downsampleBucket(
return nil
}

func processDownsampling(ctx context.Context, logger log.Logger, bkt objstore.Bucket, m *metadata.Meta, dir string, resolution int64, hashFunc metadata.HashFunc) error {
func processDownsampling(
ctx context.Context,
logger log.Logger,
bkt objstore.Bucket,
m *metadata.Meta,
dir string,
resolution int64,
hashFunc metadata.HashFunc,
metrics *DownsampleMetrics,
) error {
begin := time.Now()
bdir := filepath.Join(dir, m.ULID.String())

Expand Down Expand Up @@ -344,8 +359,10 @@ func processDownsampling(ctx context.Context, logger log.Logger, bkt objstore.Bu
}
resdir := filepath.Join(dir, id.String())

downsampleDuration := time.Since(begin)
level.Info(logger).Log("msg", "downsampled block",
"from", m.ULID, "to", id, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds())
"from", m.ULID, "to", id, "duration", downsampleDuration, "duration_ms", downsampleDuration.Milliseconds())
metrics.downsampleDuration.WithLabelValues(compact.DefaultGroupKey(m.Thanos)).Observe(downsampleDuration.Seconds())

if err := block.VerifyIndex(logger, filepath.Join(resdir, block.IndexFilename), m.MinTime, m.MaxTime); err != nil {
return errors.Wrap(err, "output block index not valid")
Expand Down

0 comments on commit ce1c4fe

Please sign in to comment.