From 2ed01b6838e926880e75685137ddd5567dcfcf66 Mon Sep 17 00:00:00 2001 From: Anugrah Vijay Date: Tue, 10 Aug 2021 18:20:09 -0700 Subject: [PATCH 1/5] Adds downsampleDuration histogram Signed-off-by: Anugrah Vijay --- cmd/thanos/downsample.go | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index 849c72db36..89a9e3221a 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -38,6 +38,7 @@ import ( type DownsampleMetrics struct { downsamples *prometheus.CounterVec downsampleFailures *prometheus.CounterVec + downsampleDuration prometheus.Histogram } func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics { @@ -51,6 +52,11 @@ func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics { Name: "thanos_compact_downsample_failures_total", Help: "Total number of failed downsampling attempts.", }, []string{"group"}) + m.downsampleDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_compact_downsample_duration_seconds", + Help: "Duration of downsample runs", + Buckets: []float64{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192}, + }) return m } @@ -237,7 +243,7 @@ func downsampleBucket( resolution = downsample.ResLevel2 errMsg = "downsampling to 60 min" } - if err := processDownsampling(ctx, logger, bkt, m, dir, resolution, hashFunc); err != nil { + if err := processDownsampling(ctx, logger, bkt, m, dir, resolution, hashFunc, metrics); err != nil { metrics.downsampleFailures.WithLabelValues(compact.DefaultGroupKey(m.Thanos)).Inc() return errors.Wrap(err, errMsg) } @@ -309,7 +315,16 @@ func downsampleBucket( return nil } -func processDownsampling(ctx context.Context, logger log.Logger, bkt objstore.Bucket, m *metadata.Meta, dir string, resolution int64, hashFunc metadata.HashFunc) error { +func processDownsampling( + ctx context.Context, + logger log.Logger, + bkt objstore.Bucket, + m *metadata.Meta, + dir string, + resolution int64, + hashFunc metadata.HashFunc, + metrics *DownsampleMetrics, +) error { begin := time.Now() bdir := filepath.Join(dir, m.ULID.String()) @@ -344,8 +359,10 @@ func processDownsampling(ctx context.Context, logger log.Logger, bkt objstore.Bu } resdir := filepath.Join(dir, id.String()) + downsampleDuration := time.Since(begin) level.Info(logger).Log("msg", "downsampled block", - "from", m.ULID, "to", id, "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds()) + "from", m.ULID, "to", id, "duration", downsampleDuration, "duration_ms", downsampleDuration.Milliseconds()) + metrics.downsampleDuration.Observe(downsampleDuration.Seconds()) if err := block.VerifyIndex(logger, filepath.Join(resdir, block.IndexFilename), m.MinTime, m.MaxTime); err != nil { return errors.Wrap(err, "output block index not valid") From 07c8b5c59b3200f67ac12b05890816fbd05fddc8 Mon Sep 17 00:00:00 2001 From: Anugrah Vijay Date: Tue, 10 Aug 2021 22:47:32 -0700 Subject: [PATCH 2/5] Adds resolution label to downsampleDuration histogram Signed-off-by: Anugrah Vijay --- cmd/thanos/downsample.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index 89a9e3221a..66278d8aa1 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "time" extflag "github.com/efficientgo/tools/extkingpin" @@ -38,7 +39,7 @@ import ( type DownsampleMetrics struct { downsamples *prometheus.CounterVec downsampleFailures *prometheus.CounterVec - downsampleDuration prometheus.Histogram + downsampleDuration *prometheus.HistogramVec } func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics { @@ -52,11 +53,11 @@ func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics { Name: "thanos_compact_downsample_failures_total", Help: "Total number of failed downsampling attempts.", }, []string{"group"}) - m.downsampleDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + m.downsampleDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ Name: "thanos_compact_downsample_duration_seconds", Help: "Duration of downsample runs", Buckets: []float64{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192}, - }) + }, []string{"resolution"}) return m } @@ -362,7 +363,7 @@ func processDownsampling( downsampleDuration := time.Since(begin) level.Info(logger).Log("msg", "downsampled block", "from", m.ULID, "to", id, "duration", downsampleDuration, "duration_ms", downsampleDuration.Milliseconds()) - metrics.downsampleDuration.Observe(downsampleDuration.Seconds()) + metrics.downsampleDuration.WithLabelValues(strconv.FormatInt(resolution, 10)).Observe(downsampleDuration.Seconds()) if err := block.VerifyIndex(logger, filepath.Join(resdir, block.IndexFilename), m.MinTime, m.MaxTime); err != nil { return errors.Wrap(err, "output block index not valid") From 57d59745f0c86839af9b44ef26a69e3a4efec32b Mon Sep 17 00:00:00 2001 From: Anugrah Vijay Date: Wed, 11 Aug 2021 00:56:23 -0700 Subject: [PATCH 3/5] Changes downsampleDuration histogram label from resolution to group Signed-off-by: Anugrah Vijay --- cmd/thanos/downsample.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index 66278d8aa1..15b1b347ff 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -8,7 +8,6 @@ import ( "os" "path/filepath" "sort" - "strconv" "time" extflag "github.com/efficientgo/tools/extkingpin" @@ -56,8 +55,8 @@ func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics { m.downsampleDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ Name: "thanos_compact_downsample_duration_seconds", Help: "Duration of downsample runs", - Buckets: []float64{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192}, - }, []string{"resolution"}) + Buckets: []float64{64, 128, 256, 512, 1024, 2048, 4096, 8192}, // or 1m, 5m, 15m, 30m, 60m, 120m, 240m + }, []string{"group"}) return m } @@ -363,7 +362,7 @@ func processDownsampling( downsampleDuration := time.Since(begin) level.Info(logger).Log("msg", "downsampled block", "from", m.ULID, "to", id, "duration", downsampleDuration, "duration_ms", downsampleDuration.Milliseconds()) - metrics.downsampleDuration.WithLabelValues(strconv.FormatInt(resolution, 10)).Observe(downsampleDuration.Seconds()) + metrics.downsampleDuration.WithLabelValues(compact.DefaultGroupKey(m.Thanos)).Observe(downsampleDuration.Seconds()) if err := block.VerifyIndex(logger, filepath.Join(resdir, block.IndexFilename), m.MinTime, m.MaxTime); err != nil { return errors.Wrap(err, "output block index not valid") From 3b03fbd0f0678f1b420be747214cd9867f0e03e1 Mon Sep 17 00:00:00 2001 From: Anugrah Vijay Date: Wed, 11 Aug 2021 16:58:31 -0700 Subject: [PATCH 4/5] Changes downsampleDuration histogram bucket intervals Signed-off-by: Anugrah Vijay --- cmd/thanos/downsample.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/thanos/downsample.go b/cmd/thanos/downsample.go index 15b1b347ff..05ca4fdef4 100644 --- a/cmd/thanos/downsample.go +++ b/cmd/thanos/downsample.go @@ -55,7 +55,7 @@ func newDownsampleMetrics(reg *prometheus.Registry) *DownsampleMetrics { m.downsampleDuration = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ Name: "thanos_compact_downsample_duration_seconds", Help: "Duration of downsample runs", - Buckets: []float64{64, 128, 256, 512, 1024, 2048, 4096, 8192}, // or 1m, 5m, 15m, 30m, 60m, 120m, 240m + Buckets: []float64{60, 300, 900, 1800, 3600, 7200, 14400}, // 1m, 5m, 15m, 30m, 60m, 120m, 240m }, []string{"group"}) return m From 68ac99d15ae88a43a0deafb4785f31b79902b159 Mon Sep 17 00:00:00 2001 From: Anugrah Vijay Date: Wed, 11 Aug 2021 17:09:50 -0700 Subject: [PATCH 5/5] Updates changelog Signed-off-by: Anugrah Vijay --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24c754e964..73a4f2ad8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4487](https://github.com/thanos-io/thanos/pull/4487) Query: Add memcached auto discovery support. - [#4444](https://github.com/thanos-io/thanos/pull/4444) UI: Add search block UI. - [#4509](https://github.com/thanos-io/thanos/pull/4509) Logging: Adds duration_ms in int64 to the logs. +- [#4552](https://github.com/thanos-io/thanos/pull/4552) Compactor: Adds `thanos_compact_downsample_duration_seconds` histogram. ### Fixed