From ee2edf7ba28b34145665855ed935ef59653fd089 Mon Sep 17 00:00:00 2001 From: Ayoub Mrini Date: Wed, 8 Jan 2025 22:08:21 +0100 Subject: [PATCH] feat: set up alerts for a smooth Prom3 upgrade --- .../prometheus-rule.yaml | 16 ++++++++++++ jsonnet/rules.libsonnet | 26 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/assets/cluster-monitoring-operator/prometheus-rule.yaml b/assets/cluster-monitoring-operator/prometheus-rule.yaml index e02d3f3bfd..54de9ffb81 100644 --- a/assets/cluster-monitoring-operator/prometheus-rule.yaml +++ b/assets/cluster-monitoring-operator/prometheus-rule.yaml @@ -30,6 +30,22 @@ spec: for: 15m labels: severity: warning + - alert: TargetInvalidContentType + annotations: + description: Prometheus {{$labels.namespace}}/{{$labels.pod}} could not determine a valid content type for some scrape targets. + summary: A valid content type could not be determined for some scrape targets. + expr: increase(prometheus_target_scrape_pool_invalid_content_type_total{job=~"prometheus-k8s|prometheus-user-workload"}[5m]) > 0 + for: 15m + labels: + severity: critical + - alert: NarrowLeQuantileSelectors + annotations: + description: XXX. + summary: XXX. + expr: increase(prometheus_target_scrape_pool_invalid_content_type_total{job=~"prometheus-k8s|prometheus-user-workload"}[5m]) > 0 + for: 15m + labels: + severity: critical - name: openshift-kubernetes.rules rules: - expr: sum(rate(container_cpu_usage_seconds_total{container="",pod!=""}[5m])) BY (pod, namespace) diff --git a/jsonnet/rules.libsonnet b/jsonnet/rules.libsonnet index 431d26eefe..5d919e4bbd 100644 --- a/jsonnet/rules.libsonnet +++ b/jsonnet/rules.libsonnet @@ -27,6 +27,32 @@ function(params) { severity: 'warning', }, }, + { + expr: 'increase(prometheus_target_scrape_pool_invalid_content_type_total{job=~"prometheus-k8s|prometheus-user-workload"}[5m]) > 0', + alert: 'TargetInvalidContentType', + 'for': '15m', + annotations: { + description: 'Prometheus {{$labels.namespace}}/{{$labels.pod}} could not determine a valid content type for some scrape targets.', + summary: 'A valid content type could not be determined for some scrape targets.', + }, + labels: { + # severity: 'warning', (before the cut, to take advantage of critical alerts treatment) + severity: 'critical', + }, + }, + { + expr: 'increase(prometheus_target_scrape_pool_invalid_content_type_total{job=~"prometheus-k8s|prometheus-user-workload"}[5m]) > 0', + alert: 'NarrowLeQuantileSelectors', + 'for': '15m', + annotations: { + description: 'XXX.', + summary: 'XXX.', + }, + labels: { + # severity: 'warning', (before the cut, to take advantage of critical alerts treatment) + severity: 'critical', + }, + }, ], }, {