Skip to content

Commit

Permalink
Add container selector to APIRulesSyncAvailabilityErrorBudgetBurning …
Browse files Browse the repository at this point in the history
…alert

Signed-off-by: Jéssica Lins <[email protected]>
  • Loading branch information
jessicalins committed Aug 25, 2022
1 parent 2aa16ae commit 1e28507
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 89 deletions.
2 changes: 1 addition & 1 deletion observability/prometheusrules.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ local renderAlerts(name, environment, mixin) = {
alertName: 'APIRulesSyncAvailabilityErrorBudgetBurning',
alertMessage: 'API /reload endpoint is burning too much error budget to guarantee availability SLOs',
metric: 'client_api_requests_total',
selectors: ['client="oauth"', upNamespaceSelector, 'code=~"^(2..|3..|5..)$"'],
selectors: ['client="oauth"', 'container="thanos-rule-syncer"', upNamespaceSelector, 'code=~"^(2..|3..|5..)$"'],
errorSelectors: ['code=~"5.+"'],
target: 0.95,
}),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1047,13 +1047,14 @@ spec:
message: API /reload endpoint is burning too much error budget to guarantee availability SLOs
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#apirulessyncavailabilityerrorbudgetburning
expr: |
sum(client_api_requests_total:burnrate5m{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (14.40 * (1-0.95000))
sum(client_api_requests_total:burnrate5m{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (14.40 * (1-0.95000))
and
sum(client_api_requests_total:burnrate1h{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (14.40 * (1-0.95000))
sum(client_api_requests_total:burnrate1h{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (14.40 * (1-0.95000))
for: 2m
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
service: telemeter
severity: critical
Expand All @@ -1063,13 +1064,14 @@ spec:
message: API /reload endpoint is burning too much error budget to guarantee availability SLOs
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#apirulessyncavailabilityerrorbudgetburning
expr: |
sum(client_api_requests_total:burnrate30m{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (6.00 * (1-0.95000))
sum(client_api_requests_total:burnrate30m{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (6.00 * (1-0.95000))
and
sum(client_api_requests_total:burnrate6h{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (6.00 * (1-0.95000))
sum(client_api_requests_total:burnrate6h{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (6.00 * (1-0.95000))
for: 15m
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
service: telemeter
severity: critical
Expand All @@ -1079,13 +1081,14 @@ spec:
message: API /reload endpoint is burning too much error budget to guarantee availability SLOs
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#apirulessyncavailabilityerrorbudgetburning
expr: |
sum(client_api_requests_total:burnrate2h{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (3.00 * (1-0.95000))
sum(client_api_requests_total:burnrate2h{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (3.00 * (1-0.95000))
and
sum(client_api_requests_total:burnrate1d{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (3.00 * (1-0.95000))
sum(client_api_requests_total:burnrate1d{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (3.00 * (1-0.95000))
for: 1h
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
service: telemeter
severity: medium
Expand All @@ -1095,77 +1098,85 @@ spec:
message: API /reload endpoint is burning too much error budget to guarantee availability SLOs
runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#apirulessyncavailabilityerrorbudgetburning
expr: |
sum(client_api_requests_total:burnrate6h{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (1.00 * (1-0.95000))
sum(client_api_requests_total:burnrate6h{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (1.00 * (1-0.95000))
and
sum(client_api_requests_total:burnrate3d{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (1.00 * (1-0.95000))
sum(client_api_requests_total:burnrate3d{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}) > (1.00 * (1-0.95000))
for: 3h
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
service: telemeter
severity: medium
- expr: |
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[1d]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[1d]))
/
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[1d]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[1d]))
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
record: client_api_requests_total:burnrate1d
- expr: |
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[1h]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[1h]))
/
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[1h]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[1h]))
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
record: client_api_requests_total:burnrate1h
- expr: |
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[2h]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[2h]))
/
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[2h]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[2h]))
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
record: client_api_requests_total:burnrate2h
- expr: |
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[30m]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[30m]))
/
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[30m]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[30m]))
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
record: client_api_requests_total:burnrate30m
- expr: |
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[3d]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[3d]))
/
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[3d]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[3d]))
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
record: client_api_requests_total:burnrate3d
- expr: |
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[5m]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[5m]))
/
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[5m]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[5m]))
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
record: client_api_requests_total:burnrate5m
- expr: |
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[6h]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$",code=~"5.+"}[6h]))
/
sum(rate(client_api_requests_total{client="oauth",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[6h]))
sum(rate(client_api_requests_total{client="oauth",container="thanos-rule-syncer",namespace="observatorium-mst-production",code=~"^(2..|3..|5..)$"}[6h]))
labels:
client: oauth
code: ^(2..|3..|5..)$
container: thanos-rule-syncer
namespace: observatorium-mst-production
record: client_api_requests_total:burnrate6h
- name: rhobs-mst-api-rules-read-availability.slo
Expand Down
Loading

0 comments on commit 1e28507

Please sign in to comment.