From 415f0b96bcb601ef0755068af767e47502d45dbc Mon Sep 17 00:00:00 2001 From: Philip Gough Date: Mon, 23 Aug 2021 17:04:02 +0100 Subject: [PATCH] reloader: Expose metrics to give info about last operation result/time Signed-off-by: Philip Gough --- CHANGELOG.md | 2 ++ pkg/reloader/reloader.go | 45 +++++++++++++++++++++++++++++++++++----- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6718244b98..3a2215e595d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#4509](https://github.com/thanos-io/thanos/pull/4509) Logging: Adds duration_ms in int64 to the logs. - [#4462](https://github.com/thanos-io/thanos/pull/4462) UI: Add find overlap block UI - [#4469](https://github.com/thanos-io/thanos/pull/4469) Compact: Add flag `compact.skip-block-with-out-of-order-chunks` to skip blocks with out-of-order chunks during compaction instead of halting +- [#4594](https://github.com/thanos-io/thanos/pull/4469) reloader: Expose metrics in config reloader to give info on the last operation. + ### Fixed diff --git a/pkg/reloader/reloader.go b/pkg/reloader/reloader.go index b618fd171b3..c76724bb550 100644 --- a/pkg/reloader/reloader.go +++ b/pkg/reloader/reloader.go @@ -7,7 +7,7 @@ // Reloader type is useful when you want to: // // * Watch on changes against certain file e.g (`cfgFile`). -// * Optionally, specify different different output file for watched `cfgFile` (`cfgOutputFile`). +// * Optionally, specify different output file for watched `cfgFile` (`cfgOutputFile`). // This will also try decompress the `cfgFile` if needed and substitute ALL the envvars using Kubernetes substitution format: (`$(var)`) // * Watch on changes against certain directories (`watchedDirs`). // @@ -95,10 +95,14 @@ type Reloader struct { lastCfgHash []byte lastWatchedDirsHash []byte - reloads prometheus.Counter - reloadErrors prometheus.Counter - configApplyErrors prometheus.Counter - configApply prometheus.Counter + reloads prometheus.Counter + reloadErrors prometheus.Counter + lastReloadSuccess prometheus.Gauge + lastReloadSuccessTimestamp prometheus.Gauge + configApplyErrors prometheus.Counter + configApply prometheus.Counter + lastConfigApplySuccess prometheus.Gauge + lastConfigApplySuccessTimestamp prometheus.Gauge } // Options bundles options for the Reloader. @@ -154,6 +158,18 @@ func New(logger log.Logger, reg prometheus.Registerer, o *Options) *Reloader { Help: "Total number of reload requests that failed.", }, ), + lastReloadSuccess: promauto.With(reg).NewGauge( + prometheus.GaugeOpts{ + Name: "reloader_last_reload_successful", + Help: "Whether the last reload attempt was successful", + }, + ), + lastReloadSuccessTimestamp: promauto.With(reg).NewGauge( + prometheus.GaugeOpts{ + Name: "reloader_last_reload_success_timestamp_seconds", + Help: "Timestamp of the last successful reload", + }, + ), configApply: promauto.With(reg).NewCounter( prometheus.CounterOpts{ Name: "reloader_config_apply_operations_total", @@ -166,6 +182,18 @@ func New(logger log.Logger, reg prometheus.Registerer, o *Options) *Reloader { Help: "Total number of config apply operations that failed.", }, ), + lastConfigApplySuccess: promauto.With(reg).NewGauge( + prometheus.GaugeOpts{ + Name: "reloader_last_config_apply_successful", + Help: "Whether the last config apply operation attempt was successful", + }, + ), + lastConfigApplySuccessTimestamp: promauto.With(reg).NewGauge( + prometheus.GaugeOpts{ + Name: "reloader_config_apply_last_success_timestamp_seconds", + Help: "Timestamp of the last successful config apply operation", + }, + ), } return r } @@ -243,7 +271,11 @@ func (r *Reloader) Watch(ctx context.Context) error { if err := r.apply(applyCtx); err != nil { r.configApplyErrors.Inc() level.Error(r.logger).Log("msg", "apply error", "err", err) + r.lastConfigApplySuccess.Set(0) + continue } + r.lastConfigApplySuccess.Set(1) + r.lastConfigApplySuccessTimestamp.SetToCurrentTime() } } @@ -357,8 +389,11 @@ func (r *Reloader) apply(ctx context.Context) error { "cfg_in", r.cfgFile, "cfg_out", r.cfgOutputFile, "watched_dirs", strings.Join(r.watchedDirs, ", ")) + r.lastReloadSuccess.Set(1) + r.lastReloadSuccessTimestamp.SetToCurrentTime() return nil }); err != nil { + r.lastReloadSuccess.Set(0) level.Error(r.logger).Log("msg", "Failed to trigger reload. Retrying.", "err", err) }