From 836ca2834fa1e0cea0694465fdc0a6ed65736f48 Mon Sep 17 00:00:00 2001 From: Seena Fallah Date: Sun, 27 Nov 2022 01:32:41 +0100 Subject: [PATCH] compact: retry on cleanPartialMarked errors if possible cleanPartialMarked is calling SyncMetas which basically can have retriable errors. By checking for retriable errors and retrying, it can prevent the compact from shutdown the HTTP server. Signed-off-by: Seena Fallah --- cmd/thanos/compact.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 33a7618416..6cb4eae3e9 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -557,7 +557,19 @@ func runCompact( // since one iteration potentially could take a long time. if conf.cleanupBlocksInterval > 0 { g.Add(func() error { - return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), cleanPartialMarked) + return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), func() error { + err := cleanPartialMarked() + if err != nil && compact.IsRetryError(err) { + // The RetryError signals that we hit an retriable error (transient error, no connection). + // You should alert on this being triggered too frequently. + level.Error(logger).Log("msg", "retriable error", "err", err) + compactMetrics.retried.Inc() + + return nil + } + + return err + }) }, func(error) { cancel() })