From af0292bc603313bb8100355b1949be2f15e35c93 Mon Sep 17 00:00:00 2001 From: metonymic-smokey Date: Mon, 8 Nov 2021 11:47:15 +0530 Subject: [PATCH 1/6] draft: added retention progress calculate Signed-off-by: metonymic-smokey --- cmd/thanos/compact.go | 12 ++++++++++- pkg/compact/compact.go | 49 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 111af4912e..2ca0331938 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -462,6 +462,7 @@ func runCompact( if conf.compactionProgressMetrics { g.Add(func() error { ps := compact.NewCompactionProgressCalculator(reg, tsdbPlanner) + rs := compact.NewRetentionProgressCalculator(reg, retentionByResolution) var ds *compact.DownsampleProgressCalculator if !conf.disableDownsampling { ds = compact.NewDownsampleProgressCalculator(reg) @@ -476,13 +477,22 @@ func runCompact( metas := sy.Metas() groups, err := grouper.Groups(metas) if err != nil { - return errors.Wrapf(err, "could not group metadata") + return errors.Wrapf(err, "could not group metadata for compaction") } if err = ps.ProgressCalculate(ctx, groups); err != nil { return errors.Wrapf(err, "could not calculate compaction progress") } + retGroups, err := grouper.Groups(metas) + if err != nil { + return errors.Wrapf(err, "could not group metadata for retention") + } + + if err = rs.ProgressCalculate(ctx, retGroups); err != nil { + return errors.Wrapf(err, "could not calculate retention progress") + } + if !conf.disableDownsampling { groups, err = grouper.Groups(metas) if err != nil { diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go index 0beb8c2b4e..0fc4206313 100644 --- a/pkg/compact/compact.go +++ b/pkg/compact/compact.go @@ -667,6 +667,55 @@ func (ds *DownsampleProgressCalculator) ProgressCalculate(ctx context.Context, g return nil } +// RetentionProgressMetrics contains Prometheus metrics related to retention progress. +type RetentionProgressMetrics struct { + NumberOfBlocksDeleted *prometheus.GaugeVec +} + +// RetentionProgressCalculator contains RetentionProgressMetrics, which are updated during the retention simulation process. +type RetentionProgressCalculator struct { + *RetentionProgressMetrics + retentionByResolution map[ResolutionLevel]time.Duration +} + +// NewRetentionProgressCalculator creates a new RetentionProgressCalculator. +func NewRetentionProgressCalculator(reg prometheus.Registerer, retentionByResolution map[ResolutionLevel]time.Duration) *RetentionProgressCalculator { + return &RetentionProgressCalculator{ + retentionByResolution: retentionByResolution, + RetentionProgressMetrics: &RetentionProgressMetrics{ + NumberOfBlocksDeleted: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "thanos_compact_todo_deleted_blocks", + Help: "number of blocks that have crossed their retention period", + }, []string{"group"}), + }, + } +} + +// ProgressCalculate calculates the number of blocks to be retained for the given groups. +func (rs *RetentionProgressCalculator) ProgressCalculate(ctx context.Context, groups []*Group) error { + groupBlocks := make(map[string]int, len(groups)) + + for _, group := range groups { + for _, m := range group.metasByMinTime { + retentionDuration := rs.retentionByResolution[ResolutionLevel(m.Thanos.Downsample.Resolution)] + if retentionDuration.Seconds() == 0 { + continue + } + maxTime := time.Unix(m.MaxTime/1000, 0) + if time.Now().After(maxTime.Add(retentionDuration)) { + groupBlocks[group.key]++ + } + } + } + + rs.RetentionProgressMetrics.NumberOfBlocksDeleted.Reset() + for key, blocks := range groupBlocks { + rs.RetentionProgressMetrics.NumberOfBlocksDeleted.WithLabelValues(key).Add(float64(blocks)) + } + + return nil +} + // Planner returns blocks to compact. type Planner interface { // Plan returns a list of blocks that should be compacted into single one. From 06d418f9d6cf2db777fb9d929041d140e382820f Mon Sep 17 00:00:00 2001 From: metonymic-smokey Date: Wed, 10 Nov 2021 19:55:09 +0530 Subject: [PATCH 2/6] added unit tests Signed-off-by: metonymic-smokey --- pkg/compact/compact_test.go | 142 ++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go index 8119a66429..d897e2e1a3 100644 --- a/pkg/compact/compact_test.go +++ b/pkg/compact/compact_test.go @@ -205,6 +205,148 @@ func createBlockMeta(id uint64, minTime, maxTime int64, labels map[string]string return m } +func TestRetentionProgressCalculate(t *testing.T) { + logger := log.NewNopLogger() + reg := prometheus.NewRegistry() + unRegisterer := &receive.UnRegisterer{Registerer: reg} + + var bkt objstore.Bucket + temp := promauto.With(reg).NewCounter(prometheus.CounterOpts{Name: "test_metric_for_group", Help: "this is a test metric for compact progress tests"}) + grouper := NewDefaultGrouper(logger, bkt, false, false, reg, temp, temp, temp, "") + + type groupedResult map[string]float64 + + type retInput struct { + meta []*metadata.Meta + resMap map[ResolutionLevel]time.Duration + } + + keys := make([]string, 3) + m := make([]metadata.Meta, 3) + m[0].Thanos.Labels = map[string]string{"a": "1"} + m[0].Thanos.Downsample.Resolution = downsample.ResLevel0 + m[1].Thanos.Labels = map[string]string{"b": "2"} + m[1].Thanos.Downsample.Resolution = downsample.ResLevel1 + m[2].Thanos.Labels = map[string]string{"a": "1", "b": "2"} + m[2].Thanos.Downsample.Resolution = downsample.ResLevel2 + for ind, meta := range m { + keys[ind] = DefaultGroupKey(meta.Thanos) + } + + for _, tcase := range []struct { + testName string + input retInput + expected groupedResult + }{ + { + // In this test case, blocks belonging to multiple groups are tested. All blocks in the first group and the first block in the second group are beyond their retention period. In the second group, the second block still has some time before its retention period and hence, is not marked to be deleted. + testName: "multi_group_test", + input: retInput{ + meta: []*metadata.Meta{ + createBlockMeta(6, 1, int64(time.Now().Add(-6*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(9, 1, int64(time.Now().Add(-9*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(7, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(8, 1, int64(time.Now().Add(-1*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(10, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{}), + }, + resMap: map[ResolutionLevel]time.Duration{ + ResolutionLevel(downsample.ResLevel0): 5 * 30 * 24 * time.Hour, // 5 months retention. + ResolutionLevel(downsample.ResLevel1): 3 * 30 * 24 * time.Hour, // 3 months retention. + ResolutionLevel(downsample.ResLevel2): 6 * 30 * 24 * time.Hour, // 6 months retention. + }, + }, + expected: groupedResult{ + keys[0]: 2.0, + keys[1]: 1.0, + keys[2]: 0.0, + }, + }, { + // In this test case, all the blocks are retained since they have not yet crossed their retention period. + testName: "retain_test", + input: retInput{ + meta: []*metadata.Meta{ + createBlockMeta(6, 1, int64(time.Now().Add(-6*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(7, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(8, 1, int64(time.Now().Add(-7*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{}), + }, + resMap: map[ResolutionLevel]time.Duration{ + ResolutionLevel(downsample.ResLevel0): 10 * 30 * 24 * time.Hour, // 10 months retention. + ResolutionLevel(downsample.ResLevel1): 12 * 30 * 24 * time.Hour, // 12 months retention. + ResolutionLevel(downsample.ResLevel2): 16 * 30 * 24 * time.Hour, // 6 months retention. + }, + }, + expected: groupedResult{ + keys[0]: 0, + keys[1]: 0, + keys[2]: 0, + }, + }, + { + // In this test case, all the blocks are deleted since they are past their retention period. + testName: "delete_test", + input: retInput{ + meta: []*metadata.Meta{ + createBlockMeta(6, 1, int64(time.Now().Add(-6*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(7, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(8, 1, int64(time.Now().Add(-7*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{}), + }, + resMap: map[ResolutionLevel]time.Duration{ + ResolutionLevel(downsample.ResLevel0): 3 * 30 * 24 * time.Hour, // 3 months retention. + ResolutionLevel(downsample.ResLevel1): 1 * 30 * 24 * time.Hour, // 1 months retention. + ResolutionLevel(downsample.ResLevel2): 6 * 30 * 24 * time.Hour, // 6 months retention. + }, + }, + expected: groupedResult{ + keys[0]: 1, + keys[1]: 1, + keys[2]: 1, + }, + }, + { + // In this test case, all the blocks are marked for deletion since the retention period is 0d i.e. indefinitely long retention. + testName: "zero_day_test", + input: retInput{ + meta: []*metadata.Meta{ + createBlockMeta(6, 1, int64(time.Now().Add(-6*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(7, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(8, 1, int64(time.Now().Add(-7*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{}), + }, + resMap: map[ResolutionLevel]time.Duration{ + ResolutionLevel(downsample.ResLevel0): 0, + ResolutionLevel(downsample.ResLevel1): 0, + ResolutionLevel(downsample.ResLevel2): 0, + }, + }, + expected: groupedResult{ + keys[0]: 0, + keys[1]: 0, + keys[2]: 0, + }, + }, + } { + if ok := t.Run(tcase.testName, func(t *testing.T) { + blocks := make(map[ulid.ULID]*metadata.Meta, len(tcase.input.meta)) + for _, meta := range tcase.input.meta { + blocks[meta.ULID] = meta + } + groups, err := grouper.Groups(blocks) + testutil.Ok(t, err) + ps := NewRetentionProgressCalculator(unRegisterer, tcase.input.resMap) + err = ps.ProgressCalculate(context.Background(), groups) + testutil.Ok(t, err) + metrics := ps.RetentionProgressMetrics + testutil.Ok(t, err) + for key := range tcase.expected { + a, err := metrics.NumberOfBlocksDeleted.GetMetricWithLabelValues(key) + testutil.Ok(t, err) + testutil.Equals(t, tcase.expected[key], promtestutil.ToFloat64(a)) + } + }); !ok { + return + } + } +} + func TestCompactProgressCalculate(t *testing.T) { type planResult struct { compactionBlocks, compactionRuns float64 From 603d824284dd550b40cc29c17670ecd660e4330d Mon Sep 17 00:00:00 2001 From: metonymic-smokey Date: Wed, 10 Nov 2021 20:07:38 +0530 Subject: [PATCH 3/6] small fix in comment Signed-off-by: metonymic-smokey --- pkg/compact/compact_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go index d897e2e1a3..b05878da12 100644 --- a/pkg/compact/compact_test.go +++ b/pkg/compact/compact_test.go @@ -303,7 +303,7 @@ func TestRetentionProgressCalculate(t *testing.T) { }, }, { - // In this test case, all the blocks are marked for deletion since the retention period is 0d i.e. indefinitely long retention. + // In this test case, none of the blocks are marked for deletion since the retention period is 0d i.e. indefinitely long retention. testName: "zero_day_test", input: retInput{ meta: []*metadata.Meta{ From f49cc03371c1666be6636894ab8869beef441ba7 Mon Sep 17 00:00:00 2001 From: metonymic-smokey Date: Thu, 11 Nov 2021 14:51:11 +0530 Subject: [PATCH 4/6] changed metric names Signed-off-by: metonymic-smokey --- pkg/compact/compact.go | 10 +++++----- pkg/compact/compact_test.go | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go index 0fc4206313..aebe2bf050 100644 --- a/pkg/compact/compact.go +++ b/pkg/compact/compact.go @@ -669,7 +669,7 @@ func (ds *DownsampleProgressCalculator) ProgressCalculate(ctx context.Context, g // RetentionProgressMetrics contains Prometheus metrics related to retention progress. type RetentionProgressMetrics struct { - NumberOfBlocksDeleted *prometheus.GaugeVec + NumberOfBlocksToDelete *prometheus.GaugeVec } // RetentionProgressCalculator contains RetentionProgressMetrics, which are updated during the retention simulation process. @@ -683,8 +683,8 @@ func NewRetentionProgressCalculator(reg prometheus.Registerer, retentionByResolu return &RetentionProgressCalculator{ retentionByResolution: retentionByResolution, RetentionProgressMetrics: &RetentionProgressMetrics{ - NumberOfBlocksDeleted: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ - Name: "thanos_compact_todo_deleted_blocks", + NumberOfBlocksToDelete: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "thanos_compact_todo_deletion_blocks", Help: "number of blocks that have crossed their retention period", }, []string{"group"}), }, @@ -708,9 +708,9 @@ func (rs *RetentionProgressCalculator) ProgressCalculate(ctx context.Context, gr } } - rs.RetentionProgressMetrics.NumberOfBlocksDeleted.Reset() + rs.RetentionProgressMetrics.NumberOfBlocksToDelete.Reset() for key, blocks := range groupBlocks { - rs.RetentionProgressMetrics.NumberOfBlocksDeleted.WithLabelValues(key).Add(float64(blocks)) + rs.RetentionProgressMetrics.NumberOfBlocksToDelete.WithLabelValues(key).Add(float64(blocks)) } return nil diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go index b05878da12..52c1dbecb6 100644 --- a/pkg/compact/compact_test.go +++ b/pkg/compact/compact_test.go @@ -337,7 +337,7 @@ func TestRetentionProgressCalculate(t *testing.T) { metrics := ps.RetentionProgressMetrics testutil.Ok(t, err) for key := range tcase.expected { - a, err := metrics.NumberOfBlocksDeleted.GetMetricWithLabelValues(key) + a, err := metrics.NumberOfBlocksToDelete.GetMetricWithLabelValues(key) testutil.Ok(t, err) testutil.Equals(t, tcase.expected[key], promtestutil.ToFloat64(a)) } From ab65cfe25993f6ce4d9109b1f157919df0c227f5 Mon Sep 17 00:00:00 2001 From: metonymic-smokey Date: Thu, 11 Nov 2021 14:53:38 +0530 Subject: [PATCH 5/6] changelog entry Signed-off-by: metonymic-smokey --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7dacbd316..35869f0266 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4576](https://github.com/thanos-io/thanos/pull/4576) UI: add filter compaction level to the Block UI. - [#4731](https://github.com/thanos-io/thanos/pull/4731) Rule: add stateless mode to ruler according to https://thanos.io/tip/proposals-accepted/202005-scalable-rule-storage.md/. Continue https://github.com/thanos-io/thanos/pull/4250. - [#4612](https://github.com/thanos-io/thanos/pull/4612) Sidecar: add `--prometheus.http-client` and `--prometheus.http-client-file` flag for sidecar to connect Prometheus with basic auth or TLS. +- [#4848](https://github.com/thanos-io/thanos/pull/4848) Compactor: added Prometheus metric for tracking the progress of retention. ### Fixed From 475e7588a63fcc4c8660aff874749a816e4242ab Mon Sep 17 00:00:00 2001 From: metonymic-smokey Date: Fri, 12 Nov 2021 07:38:26 +0530 Subject: [PATCH 6/6] removed unregisterer Signed-off-by: metonymic-smokey --- pkg/compact/compact_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go index 52c1dbecb6..03edf34d7b 100644 --- a/pkg/compact/compact_test.go +++ b/pkg/compact/compact_test.go @@ -26,7 +26,6 @@ import ( "github.com/thanos-io/thanos/pkg/errutil" "github.com/thanos-io/thanos/pkg/extprom" "github.com/thanos-io/thanos/pkg/objstore" - "github.com/thanos-io/thanos/pkg/receive" "github.com/thanos-io/thanos/pkg/testutil" ) @@ -208,7 +207,6 @@ func createBlockMeta(id uint64, minTime, maxTime int64, labels map[string]string func TestRetentionProgressCalculate(t *testing.T) { logger := log.NewNopLogger() reg := prometheus.NewRegistry() - unRegisterer := &receive.UnRegisterer{Registerer: reg} var bkt objstore.Bucket temp := promauto.With(reg).NewCounter(prometheus.CounterOpts{Name: "test_metric_for_group", Help: "this is a test metric for compact progress tests"}) @@ -233,6 +231,8 @@ func TestRetentionProgressCalculate(t *testing.T) { keys[ind] = DefaultGroupKey(meta.Thanos) } + ps := NewRetentionProgressCalculator(reg, nil) + for _, tcase := range []struct { testName string input retInput @@ -331,7 +331,7 @@ func TestRetentionProgressCalculate(t *testing.T) { } groups, err := grouper.Groups(blocks) testutil.Ok(t, err) - ps := NewRetentionProgressCalculator(unRegisterer, tcase.input.resMap) + ps.retentionByResolution = tcase.input.resMap err = ps.ProgressCalculate(context.Background(), groups) testutil.Ok(t, err) metrics := ps.RetentionProgressMetrics @@ -355,7 +355,6 @@ func TestCompactProgressCalculate(t *testing.T) { logger := log.NewNopLogger() reg := prometheus.NewRegistry() - unRegisterer := &receive.UnRegisterer{Registerer: reg} planner := NewTSDBBasedPlanner(logger, []int64{ int64(1 * time.Hour / time.Millisecond), int64(2 * time.Hour / time.Millisecond), @@ -373,6 +372,8 @@ func TestCompactProgressCalculate(t *testing.T) { keys[ind] = DefaultGroupKey(meta.Thanos) } + ps := NewCompactionProgressCalculator(reg, planner) + var bkt objstore.Bucket temp := promauto.With(reg).NewCounter(prometheus.CounterOpts{Name: "test_metric_for_group", Help: "this is a test metric for compact progress tests"}) grouper := NewDefaultGrouper(logger, bkt, false, false, reg, temp, temp, temp, "") @@ -458,7 +459,6 @@ func TestCompactProgressCalculate(t *testing.T) { } groups, err := grouper.Groups(blocks) testutil.Ok(t, err) - ps := NewCompactionProgressCalculator(unRegisterer, planner) err = ps.ProgressCalculate(context.Background(), groups) testutil.Ok(t, err) metrics := ps.CompactProgressMetrics @@ -479,7 +479,6 @@ func TestCompactProgressCalculate(t *testing.T) { func TestDownsampleProgressCalculate(t *testing.T) { reg := prometheus.NewRegistry() - unRegisterer := &receive.UnRegisterer{Registerer: reg} logger := log.NewNopLogger() type groupedResult map[string]float64 @@ -495,6 +494,8 @@ func TestDownsampleProgressCalculate(t *testing.T) { keys[ind] = DefaultGroupKey(meta.Thanos) } + ds := NewDownsampleProgressCalculator(reg) + var bkt objstore.Bucket temp := promauto.With(reg).NewCounter(prometheus.CounterOpts{Name: "test_metric_for_group", Help: "this is a test metric for downsample progress tests"}) grouper := NewDefaultGrouper(logger, bkt, false, false, reg, temp, temp, temp, "") @@ -580,7 +581,6 @@ func TestDownsampleProgressCalculate(t *testing.T) { groups, err := grouper.Groups(blocks) testutil.Ok(t, err) - ds := NewDownsampleProgressCalculator(unRegisterer) err = ds.ProgressCalculate(context.Background(), groups) testutil.Ok(t, err) metrics := ds.DownsampleProgressMetrics