From eaa92c52547beecdfbf2308f03f03521e52954ec Mon Sep 17 00:00:00 2001 From: Bartlomiej Plotka Date: Fri, 10 Jul 2020 10:39:03 +0100 Subject: [PATCH] alerts: Fixed compactor alert to use correct aggregation function. max is aggregating across series. We need to aggregate something across time as well as series due to rollout. Signed-off-by: Bartlomiej Plotka --- examples/alerts/alerts.md | 2 +- examples/alerts/alerts.yaml | 2 +- mixin/alerts/compact.libsonnet | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/alerts/alerts.md b/examples/alerts/alerts.md index 40e8fdad47..96fdd7c0cc 100644 --- a/examples/alerts/alerts.md +++ b/examples/alerts/alerts.md @@ -54,7 +54,7 @@ rules: - alert: ThanosCompactHasNotRun annotations: message: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours. - expr: (time() - max(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"})) + expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) / 60 / 60 > 24 labels: severity: warning diff --git a/examples/alerts/alerts.yaml b/examples/alerts/alerts.yaml index b903239b63..a014e13901 100644 --- a/examples/alerts/alerts.yaml +++ b/examples/alerts/alerts.yaml @@ -47,7 +47,7 @@ groups: - alert: ThanosCompactHasNotRun annotations: message: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours. - expr: (time() - max(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"})) + expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h]))) / 60 / 60 > 24 labels: severity: warning diff --git a/mixin/alerts/compact.libsonnet b/mixin/alerts/compact.libsonnet index 87bcfa13a7..3fb8f474a5 100644 --- a/mixin/alerts/compact.libsonnet +++ b/mixin/alerts/compact.libsonnet @@ -73,7 +73,7 @@ annotations: { message: 'Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.', }, - expr: '(time() - max(thanos_objstore_bucket_last_successful_upload_time{%(selector)s})) / 60 / 60 > 24' % thanos.compact, + expr: '(time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{%(selector)s}[24h]))) / 60 / 60 > 24' % thanos.compact, labels: { severity: 'warning', },