From 157822cf212f6420fa92f7962b606c1293d4323a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9ssica=20Lins?= Date: Fri, 12 Nov 2021 10:10:37 -0300 Subject: [PATCH] Solve conflicts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jéssica Lins --- CHANGELOG.md | 1 + cmd/thanos/compact.go | 12 ++- pkg/compact/compact.go | 49 ++++++++++++ pkg/compact/compact_test.go | 152 ++++++++++++++++++++++++++++++++++-- 4 files changed, 208 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0556f50785..191d3a8e7eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4576](https://github.com/thanos-io/thanos/pull/4576) UI: add filter compaction level to the Block UI. - [#4731](https://github.com/thanos-io/thanos/pull/4731) Rule: add stateless mode to ruler according to https://thanos.io/tip/proposals-accepted/202005-scalable-rule-storage.md/. Continue https://github.com/thanos-io/thanos/pull/4250. - [#4612](https://github.com/thanos-io/thanos/pull/4612) Sidecar: add `--prometheus.http-client` and `--prometheus.http-client-file` flag for sidecar to connect Prometheus with basic auth or TLS. +- [#4848](https://github.com/thanos-io/thanos/pull/4848) Compactor: added Prometheus metric for tracking the progress of retention. - [#4856](https://github.com/thanos-io/thanos/pull/4856) Mixin: Add Query Frontend Grafana dashboard. ### Fixed diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 111af4912e5..2ca03319388 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -462,6 +462,7 @@ func runCompact( if conf.compactionProgressMetrics { g.Add(func() error { ps := compact.NewCompactionProgressCalculator(reg, tsdbPlanner) + rs := compact.NewRetentionProgressCalculator(reg, retentionByResolution) var ds *compact.DownsampleProgressCalculator if !conf.disableDownsampling { ds = compact.NewDownsampleProgressCalculator(reg) @@ -476,13 +477,22 @@ func runCompact( metas := sy.Metas() groups, err := grouper.Groups(metas) if err != nil { - return errors.Wrapf(err, "could not group metadata") + return errors.Wrapf(err, "could not group metadata for compaction") } if err = ps.ProgressCalculate(ctx, groups); err != nil { return errors.Wrapf(err, "could not calculate compaction progress") } + retGroups, err := grouper.Groups(metas) + if err != nil { + return errors.Wrapf(err, "could not group metadata for retention") + } + + if err = rs.ProgressCalculate(ctx, retGroups); err != nil { + return errors.Wrapf(err, "could not calculate retention progress") + } + if !conf.disableDownsampling { groups, err = grouper.Groups(metas) if err != nil { diff --git a/pkg/compact/compact.go b/pkg/compact/compact.go index 0beb8c2b4e8..aebe2bf050a 100644 --- a/pkg/compact/compact.go +++ b/pkg/compact/compact.go @@ -667,6 +667,55 @@ func (ds *DownsampleProgressCalculator) ProgressCalculate(ctx context.Context, g return nil } +// RetentionProgressMetrics contains Prometheus metrics related to retention progress. +type RetentionProgressMetrics struct { + NumberOfBlocksToDelete *prometheus.GaugeVec +} + +// RetentionProgressCalculator contains RetentionProgressMetrics, which are updated during the retention simulation process. +type RetentionProgressCalculator struct { + *RetentionProgressMetrics + retentionByResolution map[ResolutionLevel]time.Duration +} + +// NewRetentionProgressCalculator creates a new RetentionProgressCalculator. +func NewRetentionProgressCalculator(reg prometheus.Registerer, retentionByResolution map[ResolutionLevel]time.Duration) *RetentionProgressCalculator { + return &RetentionProgressCalculator{ + retentionByResolution: retentionByResolution, + RetentionProgressMetrics: &RetentionProgressMetrics{ + NumberOfBlocksToDelete: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "thanos_compact_todo_deletion_blocks", + Help: "number of blocks that have crossed their retention period", + }, []string{"group"}), + }, + } +} + +// ProgressCalculate calculates the number of blocks to be retained for the given groups. +func (rs *RetentionProgressCalculator) ProgressCalculate(ctx context.Context, groups []*Group) error { + groupBlocks := make(map[string]int, len(groups)) + + for _, group := range groups { + for _, m := range group.metasByMinTime { + retentionDuration := rs.retentionByResolution[ResolutionLevel(m.Thanos.Downsample.Resolution)] + if retentionDuration.Seconds() == 0 { + continue + } + maxTime := time.Unix(m.MaxTime/1000, 0) + if time.Now().After(maxTime.Add(retentionDuration)) { + groupBlocks[group.key]++ + } + } + } + + rs.RetentionProgressMetrics.NumberOfBlocksToDelete.Reset() + for key, blocks := range groupBlocks { + rs.RetentionProgressMetrics.NumberOfBlocksToDelete.WithLabelValues(key).Add(float64(blocks)) + } + + return nil +} + // Planner returns blocks to compact. type Planner interface { // Plan returns a list of blocks that should be compacted into single one. diff --git a/pkg/compact/compact_test.go b/pkg/compact/compact_test.go index 8119a664290..03edf34d7b9 100644 --- a/pkg/compact/compact_test.go +++ b/pkg/compact/compact_test.go @@ -26,7 +26,6 @@ import ( "github.com/thanos-io/thanos/pkg/errutil" "github.com/thanos-io/thanos/pkg/extprom" "github.com/thanos-io/thanos/pkg/objstore" - "github.com/thanos-io/thanos/pkg/receive" "github.com/thanos-io/thanos/pkg/testutil" ) @@ -205,6 +204,149 @@ func createBlockMeta(id uint64, minTime, maxTime int64, labels map[string]string return m } +func TestRetentionProgressCalculate(t *testing.T) { + logger := log.NewNopLogger() + reg := prometheus.NewRegistry() + + var bkt objstore.Bucket + temp := promauto.With(reg).NewCounter(prometheus.CounterOpts{Name: "test_metric_for_group", Help: "this is a test metric for compact progress tests"}) + grouper := NewDefaultGrouper(logger, bkt, false, false, reg, temp, temp, temp, "") + + type groupedResult map[string]float64 + + type retInput struct { + meta []*metadata.Meta + resMap map[ResolutionLevel]time.Duration + } + + keys := make([]string, 3) + m := make([]metadata.Meta, 3) + m[0].Thanos.Labels = map[string]string{"a": "1"} + m[0].Thanos.Downsample.Resolution = downsample.ResLevel0 + m[1].Thanos.Labels = map[string]string{"b": "2"} + m[1].Thanos.Downsample.Resolution = downsample.ResLevel1 + m[2].Thanos.Labels = map[string]string{"a": "1", "b": "2"} + m[2].Thanos.Downsample.Resolution = downsample.ResLevel2 + for ind, meta := range m { + keys[ind] = DefaultGroupKey(meta.Thanos) + } + + ps := NewRetentionProgressCalculator(reg, nil) + + for _, tcase := range []struct { + testName string + input retInput + expected groupedResult + }{ + { + // In this test case, blocks belonging to multiple groups are tested. All blocks in the first group and the first block in the second group are beyond their retention period. In the second group, the second block still has some time before its retention period and hence, is not marked to be deleted. + testName: "multi_group_test", + input: retInput{ + meta: []*metadata.Meta{ + createBlockMeta(6, 1, int64(time.Now().Add(-6*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(9, 1, int64(time.Now().Add(-9*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(7, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(8, 1, int64(time.Now().Add(-1*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(10, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{}), + }, + resMap: map[ResolutionLevel]time.Duration{ + ResolutionLevel(downsample.ResLevel0): 5 * 30 * 24 * time.Hour, // 5 months retention. + ResolutionLevel(downsample.ResLevel1): 3 * 30 * 24 * time.Hour, // 3 months retention. + ResolutionLevel(downsample.ResLevel2): 6 * 30 * 24 * time.Hour, // 6 months retention. + }, + }, + expected: groupedResult{ + keys[0]: 2.0, + keys[1]: 1.0, + keys[2]: 0.0, + }, + }, { + // In this test case, all the blocks are retained since they have not yet crossed their retention period. + testName: "retain_test", + input: retInput{ + meta: []*metadata.Meta{ + createBlockMeta(6, 1, int64(time.Now().Add(-6*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(7, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(8, 1, int64(time.Now().Add(-7*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{}), + }, + resMap: map[ResolutionLevel]time.Duration{ + ResolutionLevel(downsample.ResLevel0): 10 * 30 * 24 * time.Hour, // 10 months retention. + ResolutionLevel(downsample.ResLevel1): 12 * 30 * 24 * time.Hour, // 12 months retention. + ResolutionLevel(downsample.ResLevel2): 16 * 30 * 24 * time.Hour, // 6 months retention. + }, + }, + expected: groupedResult{ + keys[0]: 0, + keys[1]: 0, + keys[2]: 0, + }, + }, + { + // In this test case, all the blocks are deleted since they are past their retention period. + testName: "delete_test", + input: retInput{ + meta: []*metadata.Meta{ + createBlockMeta(6, 1, int64(time.Now().Add(-6*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(7, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(8, 1, int64(time.Now().Add(-7*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{}), + }, + resMap: map[ResolutionLevel]time.Duration{ + ResolutionLevel(downsample.ResLevel0): 3 * 30 * 24 * time.Hour, // 3 months retention. + ResolutionLevel(downsample.ResLevel1): 1 * 30 * 24 * time.Hour, // 1 months retention. + ResolutionLevel(downsample.ResLevel2): 6 * 30 * 24 * time.Hour, // 6 months retention. + }, + }, + expected: groupedResult{ + keys[0]: 1, + keys[1]: 1, + keys[2]: 1, + }, + }, + { + // In this test case, none of the blocks are marked for deletion since the retention period is 0d i.e. indefinitely long retention. + testName: "zero_day_test", + input: retInput{ + meta: []*metadata.Meta{ + createBlockMeta(6, 1, int64(time.Now().Add(-6*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1"}, downsample.ResLevel0, []uint64{}), + createBlockMeta(7, 1, int64(time.Now().Add(-4*30*24*time.Hour).Unix()*1000), map[string]string{"b": "2"}, downsample.ResLevel1, []uint64{}), + createBlockMeta(8, 1, int64(time.Now().Add(-7*30*24*time.Hour).Unix()*1000), map[string]string{"a": "1", "b": "2"}, downsample.ResLevel2, []uint64{}), + }, + resMap: map[ResolutionLevel]time.Duration{ + ResolutionLevel(downsample.ResLevel0): 0, + ResolutionLevel(downsample.ResLevel1): 0, + ResolutionLevel(downsample.ResLevel2): 0, + }, + }, + expected: groupedResult{ + keys[0]: 0, + keys[1]: 0, + keys[2]: 0, + }, + }, + } { + if ok := t.Run(tcase.testName, func(t *testing.T) { + blocks := make(map[ulid.ULID]*metadata.Meta, len(tcase.input.meta)) + for _, meta := range tcase.input.meta { + blocks[meta.ULID] = meta + } + groups, err := grouper.Groups(blocks) + testutil.Ok(t, err) + ps.retentionByResolution = tcase.input.resMap + err = ps.ProgressCalculate(context.Background(), groups) + testutil.Ok(t, err) + metrics := ps.RetentionProgressMetrics + testutil.Ok(t, err) + for key := range tcase.expected { + a, err := metrics.NumberOfBlocksToDelete.GetMetricWithLabelValues(key) + testutil.Ok(t, err) + testutil.Equals(t, tcase.expected[key], promtestutil.ToFloat64(a)) + } + }); !ok { + return + } + } +} + func TestCompactProgressCalculate(t *testing.T) { type planResult struct { compactionBlocks, compactionRuns float64 @@ -213,7 +355,6 @@ func TestCompactProgressCalculate(t *testing.T) { logger := log.NewNopLogger() reg := prometheus.NewRegistry() - unRegisterer := &receive.UnRegisterer{Registerer: reg} planner := NewTSDBBasedPlanner(logger, []int64{ int64(1 * time.Hour / time.Millisecond), int64(2 * time.Hour / time.Millisecond), @@ -231,6 +372,8 @@ func TestCompactProgressCalculate(t *testing.T) { keys[ind] = DefaultGroupKey(meta.Thanos) } + ps := NewCompactionProgressCalculator(reg, planner) + var bkt objstore.Bucket temp := promauto.With(reg).NewCounter(prometheus.CounterOpts{Name: "test_metric_for_group", Help: "this is a test metric for compact progress tests"}) grouper := NewDefaultGrouper(logger, bkt, false, false, reg, temp, temp, temp, "") @@ -316,7 +459,6 @@ func TestCompactProgressCalculate(t *testing.T) { } groups, err := grouper.Groups(blocks) testutil.Ok(t, err) - ps := NewCompactionProgressCalculator(unRegisterer, planner) err = ps.ProgressCalculate(context.Background(), groups) testutil.Ok(t, err) metrics := ps.CompactProgressMetrics @@ -337,7 +479,6 @@ func TestCompactProgressCalculate(t *testing.T) { func TestDownsampleProgressCalculate(t *testing.T) { reg := prometheus.NewRegistry() - unRegisterer := &receive.UnRegisterer{Registerer: reg} logger := log.NewNopLogger() type groupedResult map[string]float64 @@ -353,6 +494,8 @@ func TestDownsampleProgressCalculate(t *testing.T) { keys[ind] = DefaultGroupKey(meta.Thanos) } + ds := NewDownsampleProgressCalculator(reg) + var bkt objstore.Bucket temp := promauto.With(reg).NewCounter(prometheus.CounterOpts{Name: "test_metric_for_group", Help: "this is a test metric for downsample progress tests"}) grouper := NewDefaultGrouper(logger, bkt, false, false, reg, temp, temp, temp, "") @@ -438,7 +581,6 @@ func TestDownsampleProgressCalculate(t *testing.T) { groups, err := grouper.Groups(blocks) testutil.Ok(t, err) - ds := NewDownsampleProgressCalculator(unRegisterer) err = ds.ProgressCalculate(context.Background(), groups) testutil.Ok(t, err) metrics := ds.DownsampleProgressMetrics