-
Notifications
You must be signed in to change notification settings - Fork 523
/
Copy pathprometheusrule.yaml
112 lines (112 loc) · 5.28 KB
/
prometheusrule.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
{{- if .Values.prometheusRule.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ include "opentelemetry-collector.fullname" . }}
namespace: {{ template "opentelemetry-collector.namespace" . }}
labels:
{{- include "opentelemetry-collector.labels" . | nindent 4 }}
{{- range $key, $value := .Values.prometheusRule.extraLabels }}
{{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }}
{{- end }}
spec:
groups:
{{- if .Values.prometheusRule.groups }}
{{- toYaml .Values.prometheusRule.groups | nindent 2 }}
{{- end }}
{{- if .Values.prometheusRule.defaultRules.enabled }}
- name: collectorRules
rules:
- alert: ReceiverDroppedSpans
expr: rate(otelcol_receiver_refused_spans[5m]) > 0
for: 2m
labels:
severity: critical
annotations:
description: '{{`The {{ $labels.receiver }} receiver is dropping spans at a rate of {{ humanize $value }} per second `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#receive-failures'
- alert: ReceiverDroppedMetrics
expr: rate(otelcol_receiver_refused_metric_points[5m]) > 0
for: 2m
labels:
severity: critical
annotations:
description: '{{`The {{ $labels.receiver }} receiver is dropping metrics at a rate of {{ humanize $value }} per second `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#receive-failures'
- alert: ReceiverDroppedLogs
expr: rate(otelcol_receiver_refused_log_records[5m]) > 0
for: 5m
labels:
severity: critical
annotations:
description: '{{` The {{ $labels.receiver }} is dropping logs at a rate of {{ humanize $value }} per second `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#receive-failures'
- alert: ProcessorDroppedSpans
expr: rate(otelcol_processor_dropped_spans[5m]) > 0
for: 2m
labels:
severity: critical
annotations:
description: '{{`The {{ $labels.processor }} processor is dropping spans at a rate of {{ humanize $value }} per second `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-loss'
- alert: ProcessorDroppedMetrics
expr: rate(otelcol_processor_dropped_metric_points[5m]) > 0
for: 2m
labels:
severity: critical
annotations:
description: '{{`The {{ $labels.processor }} processor is dropping metrics at a rate of {{ humanize $value }} per second `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-loss'
- alert: ProcessorDroppedLogs
expr: rate(otelcol_processor_dropped_log_records[5m]) > 0
for: 5m
labels:
severity: critical
annotations:
description: '{{` The {{ $labels.processor }} is dropping logs at a rate of {{ humanize $value }} per second `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-loss'
- alert: ExporterDroppedSpans
expr: rate(otelcol_exporter_send_failed_spans[5m]) > 0
for: 2m
labels:
severity: critical
annotations:
description: '{{`The {{ $labels.exporter }} exporter is dropping spans at a rate of {{ humanize $value }} per second `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-egress'
- alert: ExporterDroppedMetrics
expr: rate(otelcol_exporter_send_failed_metric_points[5m]) > 0
for: 2m
labels:
severity: critical
annotations:
description: '{{`The {{ $labels.exporter }} exporter is dropping metrics at a rate of {{ humanize $value }} per second `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-egress'
- alert: ExporterDroppedLogs
expr: rate(otelcol_exporter_send_failed_log_records[5m]) > 0
for: 5m
labels:
severity: critical
annotations:
description: '{{` The {{ $labels.exporter }} is dropping logs at a rate of {{ humanize $value }} per second `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#data-egress'
- alert: ExporterQueueSize
expr: otelcol_exporter_queue_size > 5000
for: 1m
labels:
severity: warning
annotations:
description: '{{`The {{ $labels.exporter }} queue has reached a size of {{ $value }} `}}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#queue-length'
{{- $signals := list "spans" "metric_points" "log_records" }}
{{- range $signal := $signals }}
- alert: SendQueueFailed{{ $signal }}
expr: rate(otelcol_exporter_enqueue_failed_{{ $signal }}[5m]) > 0
for: 1m
labels:
severity: warning
annotations:
description: '{{`The {{ $labels.exporter }} sending queue failed to accept {{ $value }} `}} {{ $signal }}'
runbook_url: 'https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/monitoring.md#queue-length'
{{- end }}
{{- end }}
{{- end }}