Skip to content

Commit

Permalink
Add suppport for Loki ruler to manage rules on object storage (rhobs#345
Browse files Browse the repository at this point in the history
)
  • Loading branch information
periklis authored and philipgough committed Oct 10, 2022
1 parent 6d080a8 commit 2d0e8ab
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 131 deletions.
4 changes: 2 additions & 2 deletions jsonnetfile.lock.json
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@
"subdir": "configuration"
}
},
"version": "36071db2f584b7cfb7932946dc9336873c363f09",
"sum": "xKdQFAsNSYo1lK+2HVOSVcyG9sjEduHVbWhxIe5eJ6o="
"version": "66634451d91d8872acae19eb15f7377670e6dadc",
"sum": "LyJurWxkxdUWx7TDDGSKfQvMaJMBMDpJmkyt3WfgdfE="
},
{
"source": {
Expand Down
82 changes: 28 additions & 54 deletions resources/services/observatorium-logs-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -685,9 +685,7 @@ objects:
"store": "memberlist"
"rule_path": "/data"
"storage":
"local":
"directory": "/tmp/rules"
"type": "local"
"type": "s3"
"wal":
"dir": "/data/loki/wal"
"max_age": "4h"
Expand Down Expand Up @@ -2091,6 +2089,10 @@ objects:
- -s3.region=$(S3_REGION)
- -s3.access-key-id=$(AWS_ACCESS_KEY_ID)
- -s3.secret-access-key=$(AWS_SECRET_ACCESS_KEY)
- -ruler.storage.s3.buckets=$(RULER_S3_BUCKETS)
- -ruler.storage.s3.region=$(RULER_S3_REGION)
- -ruler.storage.s3.access-key-id=$(RULER_AWS_ACCESS_KEY_ID)
- -ruler.storage.s3.secret-access-key=$(RULER_AWS_SECRET_ACCESS_KEY)
- -distributor.replication-factor=1
- -ruler.external.url="${ALERTMANAGER_EXTERNAL_URL}"
env:
Expand All @@ -2114,6 +2116,26 @@ objects:
secretKeyRef:
key: aws_secret_access_key
name: ${LOKI_S3_SECRET}
- name: RULER_S3_BUCKETS
valueFrom:
secretKeyRef:
key: bucket
name: ${RULES_OBJSTORE_S3_SECRET}
- name: RULER_S3_REGION
valueFrom:
secretKeyRef:
key: aws_region
name: ${RULES_OBJSTORE_S3_SECRET}
- name: RULER_AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
key: aws_access_key_id
name: ${RULES_OBJSTORE_S3_SECRET}
- name: RULER_AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
key: aws_secret_access_key
name: ${RULES_OBJSTORE_S3_SECRET}
image: ${LOKI_IMAGE}:${LOKI_IMAGE_TAG}
imagePullPolicy: IfNotPresent
livenessProbe:
Expand Down Expand Up @@ -2152,9 +2174,6 @@ objects:
- mountPath: /data
name: storage
readOnly: false
- mountPath: /tmp/rules
name: rules
readOnly: false
- args:
- --reporter.grpc.host-port=dns:///jaeger-collector-headless.${JAEGER_COLLECTOR_NAMESPACE}.svc:14250
- --reporter.type=grpc
Expand Down Expand Up @@ -2195,9 +2214,6 @@ objects:
- configMap:
name: observatorium-loki
name: config
- configMap:
name: observatorium-loki-rules
name: rules
volumeClaimTemplates:
- metadata:
labels:
Expand All @@ -2213,51 +2229,7 @@ objects:
storage: ${LOKI_RULER_PVC_REQUEST}
storageClassName: ${STORAGE_CLASS}
- apiVersion: v1
data:
rhobs-logs-ocm-alerts: |-
"groups":
- "interval": "1m"
"name": "uhc-stage-logs-based-alerts"
"rules":
- "alert": "UHC Server Errors"
"annotations":
"summary": "${labels.kubernetes_labels_app} is returning server-side errors"
"expr": "sum(rate({kubernetes_namespace_name=\"uhc-stage\"} |= `returning http 500` | json | line_format \"{{ .message }}\" [1m])) > 0"
"for": "5m"
"labels":
"namespace": "uhc-stage"
"service": "${labels.kubernetes_labels_app}"
"severity": "warn"
- "alert": "UHC Nil Reference Errors - Stage"
"annotations":
"summary": "${labels.kubernetes_labels_app} is throwing nil reference errors"
"expr": "sum(rate({kubernetes_namespace_name=\"uhc-stage\"} |= `nil reference` | json | line_format \"{{ .message }}\" [1m])) > 0"
"for": "5m"
"labels":
"namespace": "uhc-stage"
"service": "${labels.kubernetes_labels_app}"
"severity": "warn"
- "alert": "UHC Panic Errors - Stage"
"annotations":
"summary": "${labels.kubernetes_labels_app} is throwing panic errors"
"expr": "sum(rate({kubernetes_namespace_name=\"uhc-stage\"} |= `panic` | json | line_format \"{{ .message }}\" [1m])) > 0"
"for": "5m"
"labels":
"namespace": "uhc-stage"
"service": "${labels.kubernetes_labels_app}"
"severity": "warn"
- "interval": "1m"
"name": "rhobs-logs-stage-alerts"
"rules":
- "alert": "rhobs-logs-always-firing"
"annotations":
"summary": "rhobs logs alert that always fires"
"expr": "1 > 0"
"for": "1m"
"labels":
"namespace": "observatorium-mst-stage"
"service": "observatorium-loki-ruler"
"severity": "warn"
data: {}
kind: ConfigMap
metadata:
labels:
Expand All @@ -2273,6 +2245,8 @@ parameters:
value: observatorium-mst-stage
- name: ALERTMANAGER_EXTERNAL_URL
value: https://observatorium-alertmanager-mst.api.stage.openshift.com
- name: RULES_OBJSTORE_S3_SECRET
value: rules-objstore-stage-s3
- name: STORAGE_CLASS
value: gp2
- name: LOKI_IMAGE_TAG
Expand Down
1 change: 1 addition & 0 deletions services/observatorium-logs-template.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ local obs = import 'observatorium.libsonnet';
{ name: 'NAMESPACE', value: 'observatorium-logs' },
{ name: 'ALERTMANAGER_NAMESPACE', value: 'observatorium-mst-stage' },
{ name: 'ALERTMANAGER_EXTERNAL_URL', value: 'https://observatorium-alertmanager-mst.api.stage.openshift.com' },
{ name: 'RULES_OBJSTORE_S3_SECRET', value: 'rules-objstore-stage-s3' },
{ name: 'STORAGE_CLASS', value: 'gp2' },
{ name: 'LOKI_IMAGE_TAG', value: '2.6.1' },
{ name: 'LOKI_IMAGE', value: 'docker.io/grafana/loki' },
Expand Down
83 changes: 8 additions & 75 deletions services/observatorium-logs.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -4,76 +4,6 @@ local lokiCaches = (import 'components/loki-caches.libsonnet');
{
local obs = self,

local stageTestAlerts = [
{
interval: '1m',
name: 'rhobs-logs-stage-alerts',
rules: [
{
alert: 'rhobs-logs-always-firing',
annotations: {
summary: 'rhobs logs alert that always fires',
},
expr: '1 > 0',
'for': '1m',
labels: {
severity: 'warn',
namespace: 'observatorium-mst-stage',
service: 'observatorium-loki-ruler',
},
},
],
},
],

local ocmAlerts = [
{
interval: '1m',
name: 'uhc-stage-logs-based-alerts',
rules: [
{
alert: 'UHC Server Errors',
annotations: {
summary: '${labels.kubernetes_labels_app} is returning server-side errors',
},
expr: 'sum(rate({kubernetes_namespace_name="uhc-stage"} |= `returning http 500` | json | line_format "{{ .message }}" [1m])) > 0',
'for': '5m',
labels: {
severity: 'warn',
namespace: 'uhc-stage',
service: '${labels.kubernetes_labels_app}',
},
},
{
alert: 'UHC Nil Reference Errors - Stage',
annotations: {
summary: '${labels.kubernetes_labels_app} is throwing nil reference errors',
},
expr: 'sum(rate({kubernetes_namespace_name="uhc-stage"} |= `nil reference` | json | line_format "{{ .message }}" [1m])) > 0',
'for': '5m',
labels: {
severity: 'warn',
namespace: 'uhc-stage',
service: '${labels.kubernetes_labels_app}',
},
},
{
alert: 'UHC Panic Errors - Stage',
annotations: {
summary: '${labels.kubernetes_labels_app} is throwing panic errors',
},
expr: 'sum(rate({kubernetes_namespace_name="uhc-stage"} |= `panic` | json | line_format "{{ .message }}" [1m])) > 0',
'for': '5m',
labels: {
severity: 'warn',
namespace: 'uhc-stage',
service: '${labels.kubernetes_labels_app}',
},
},
],
},
],

lokiCaches:: lokiCaches({
local cfg = self,
name: obs.config.name,
Expand Down Expand Up @@ -147,6 +77,14 @@ local lokiCaches = (import 'components/loki-caches.libsonnet');
accessKeyIdKey: 'aws_access_key_id',
secretAccessKeyKey: 'aws_secret_access_key',
},
rulesStorageConfig: {
type: 's3',
secretName: '${RULES_OBJSTORE_S3_SECRET}',
bucketsKey: 'bucket',
regionKey: 'aws_region',
accessKeyIdKey: 'aws_access_key_id',
secretAccessKeyKey: 'aws_secret_access_key',
},
memberlist: {
ringName: 'gossip-ring',
},
Expand All @@ -163,11 +101,6 @@ local lokiCaches = (import 'components/loki-caches.libsonnet');
query_frontend: '${{LOKI_QUERY_FRONTEND_REPLICAS}}',
ruler: '${{LOKI_RULER_REPLICAS}}',
},
rules: {
'rhobs-logs-ocm-alerts': {
groups: ocmAlerts + stageTestAlerts,
},
},
resources: {
compactor: {
requests: {
Expand Down

0 comments on commit 2d0e8ab

Please sign in to comment.