diff --git a/CHANGELOG.md b/CHANGELOG.md index d515f896eaf..bf99fa02755 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,7 @@ * [ENHANCEMENT] Ingester: Add `-ingester.instance-limits.max-inflight-push-requests-bytes`. This limit protects the ingester against requests that together may cause an OOM. #6492 * [ENHANCEMENT] Ingester: add new per-tenant `cortex_ingester_local_limits` metric to expose the calculated local per-tenant limits seen at each ingester. Exports the local per-tenant series limit with label `{limit="max_global_series_per_user"}` #6403 * [ENHANCEMENT] Query-frontend: added "queue_time_seconds" field to "query stats" log. This is total time that query and subqueries spent in the queue, before queriers picked it up. #6537 +* [ENHANCEMENT] All: enabled reporting gRPC codes as `status_code` label in `cortex_request_duration_seconds` and `cortex_ingester_client_request_duration_seconds` metrics. #6561 * [BUGFIX] Ring: Ensure network addresses used for component hash rings are formatted correctly when using IPv6. #6068 * [BUGFIX] Query-scheduler: don't retain connections from queriers that have shut down, leading to gradually increasing enqueue latency over time. #6100 #6145 * [BUGFIX] Ingester: prevent query logic from continuing to execute after queries are canceled. #6085 @@ -74,6 +75,7 @@ ### Mixin +* [CHANGE] Dashboards: enabled reporting gRPC codes as `status_code` label in Mimir dashboards. In case of gRPC calls, the successful `status_code` label on `cortex_request_duration_seconds` and `cortex_ingester_client_request_duration_seconds` metrics has changed from 'success' and '2xx' to 'OK'. #6561 * [ENHANCEMENT] Dashboards: Optionally show rejected requests on Mimir Writes dashboard. Useful when used together with "early request rejection". #6132 * [ENHANCEMENT] Alerts: added a critical alert for `CompactorSkippedBlocksWithOutOfOrderChunks` when multiple blocks are affected. #6410 * [ENHANCEMENT] Dashboards: Added the min-replicas for autoscaling dashboards. #6528 diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index b5264a64f54..97cb7e02f7b 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -405,6 +405,16 @@ "fieldType": "boolean", "fieldCategory": "advanced" }, + { + "kind": "field", + "name": "report_grpc_codes_in_instrumentation_label", + "required": false, + "desc": "If set to true, gRPC statuses will be reported in instrumentation labels with their string representations. Otherwise, they will be reported as \"error\".", + "fieldValue": null, + "fieldDefaultValue": false, + "fieldFlag": "server.report-grpc-codes-in-instrumentation-label", + "fieldType": "boolean" + }, { "kind": "field", "name": "graceful_shutdown_timeout", diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 0b3f52447a7..c7a82ee9f86 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -2597,6 +2597,8 @@ Usage of ./cmd/mimir/mimir: Base path to serve all API routes from (e.g. /v1/) -server.register-instrumentation Register the intrumentation handlers (/metrics etc). (default true) + -server.report-grpc-codes-in-instrumentation-label + If set to true, gRPC statuses will be reported in instrumentation labels with their string representations. Otherwise, they will be reported as "error". -server.tls-cipher-suites string Comma-separated list of cipher suites to use. If blank, the default Go cipher suites is used. -server.tls-min-version string diff --git a/cmd/mimir/help.txt.tmpl b/cmd/mimir/help.txt.tmpl index ea468bb24f9..6d1e07eb947 100644 --- a/cmd/mimir/help.txt.tmpl +++ b/cmd/mimir/help.txt.tmpl @@ -671,6 +671,8 @@ Usage of ./cmd/mimir/mimir: Optionally log request headers. -server.log-request-headers-exclude-list string Comma separated list of headers to exclude from loggin. Only used if server.log-request-headers is true. + -server.report-grpc-codes-in-instrumentation-label + If set to true, gRPC statuses will be reported in instrumentation labels with their string representations. Otherwise, they will be reported as "error". -server.tls-cipher-suites string Comma-separated list of cipher suites to use. If blank, the default Go cipher suites is used. -server.tls-min-version string diff --git a/docs/sources/mimir/references/configuration-parameters/index.md b/docs/sources/mimir/references/configuration-parameters/index.md index b5f700c794b..db86aaca639 100644 --- a/docs/sources/mimir/references/configuration-parameters/index.md +++ b/docs/sources/mimir/references/configuration-parameters/index.md @@ -586,6 +586,11 @@ grpc_tls_config: # CLI flag: -server.register-instrumentation [register_instrumentation: | default = true] +# If set to true, gRPC statuses will be reported in instrumentation labels with +# their string representations. Otherwise, they will be reported as "error". +# CLI flag: -server.report-grpc-codes-in-instrumentation-label +[report_grpc_codes_in_instrumentation_label: | default = false] + # (advanced) Timeout for graceful shutdowns # CLI flag: -server.graceful-shutdown-timeout [graceful_shutdown_timeout: | default = 30s] diff --git a/go.mod b/go.mod index f14eea0feb7..9a6c1cdbc46 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/golang/snappy v0.0.4 github.com/google/gopacket v1.1.19 github.com/gorilla/mux v1.8.0 - github.com/grafana/dskit v0.0.0-20231031132813-52f4e8d82d59 + github.com/grafana/dskit v0.0.0-20231104112041-b3823cb5cdb4 github.com/grafana/e2e v0.1.1 github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/json-iterator/go v1.1.12 diff --git a/go.sum b/go.sum index 79da5558a59..5a2d5c53fa2 100644 --- a/go.sum +++ b/go.sum @@ -538,8 +538,8 @@ github.com/gosimple/slug v1.1.1 h1:fRu/digW+NMwBIP+RmviTK97Ho/bEj/C9swrCspN3D4= github.com/gosimple/slug v1.1.1/go.mod h1:ER78kgg1Mv0NQGlXiDe57DpCyfbNywXXZ9mIorhxAf0= github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc h1:PXZQA2WCxe85Tnn+WEvr8fDpfwibmEPgfgFEaC87G24= github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc/go.mod h1:AHHlOEv1+GGQ3ktHMlhuTUwo3zljV3QJbC0+8o2kn+4= -github.com/grafana/dskit v0.0.0-20231031132813-52f4e8d82d59 h1:tWbF2UD8HgvpqyxV60zmUDDzJI2gybMJxw4/RY/UNTo= -github.com/grafana/dskit v0.0.0-20231031132813-52f4e8d82d59/go.mod h1:8dsy5tQOkeNQyjXpm5mQsbCu3H5uzeBD35MzRQFznKU= +github.com/grafana/dskit v0.0.0-20231104112041-b3823cb5cdb4 h1:GKbD6ueLnL7FQaZmIg8vc/kwzPOSgEiKdI7MVqRqBr8= +github.com/grafana/dskit v0.0.0-20231104112041-b3823cb5cdb4/go.mod h1:8dsy5tQOkeNQyjXpm5mQsbCu3H5uzeBD35MzRQFznKU= github.com/grafana/e2e v0.1.1 h1:/b6xcv5BtoBnx8cZnCiey9DbjEc8z7gXHO5edoeRYxc= github.com/grafana/e2e v0.1.1/go.mod h1:RpNLgae5VT+BUHvPE+/zSypmOXKwEu4t+tnEMS1ATaE= github.com/grafana/goautoneg v0.0.0-20231010094147-47ce5e72a9ae h1:Yxbw9jKGJVC6qAK5Ubzzb/qZwM6rRMMqaDc/d4Vp3pM= diff --git a/integration/ingester_test.go b/integration/ingester_test.go index 49d54bb82da..eb883674058 100644 --- a/integration/ingester_test.go +++ b/integration/ingester_test.go @@ -569,7 +569,7 @@ func TestIngesterQuerying(t *testing.T) { return counts[0], nil } - successfulQueryRequests, err := queryRequestCount("2xx") + successfulQueryRequests, err := queryRequestCount("OK") require.NoError(t, err) cancelledQueryRequests, err := queryRequestCount("cancel") @@ -662,7 +662,7 @@ func TestIngesterQueryingWithRequestMinimization(t *testing.T) { []string{"cortex_request_duration_seconds"}, e2e.WithLabelMatchers( labels.MustNewMatcher(labels.MatchEqual, "route", "/cortex.Ingester/QueryStream"), - labels.MustNewMatcher(labels.MatchEqual, "status_code", "success"), + labels.MustNewMatcher(labels.MatchEqual, "status_code", "OK"), ), e2e.SkipMissingMetrics, e2e.WithMetricCount, diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml index c24a6e13a8b..45ffe84350b 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml @@ -1149,6 +1149,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1183,7 +1184,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -6537,6 +6538,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -6571,7 +6573,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -11061,6 +11063,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -11095,7 +11098,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -11342,6 +11345,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -11376,7 +11380,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -20875,6 +20879,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -20909,7 +20914,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -21124,6 +21129,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -21158,7 +21164,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -21488,6 +21494,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -21522,7 +21529,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -21726,6 +21733,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -21760,7 +21768,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -21964,6 +21972,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -21998,7 +22007,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -22476,6 +22485,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -22510,7 +22520,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -26297,6 +26307,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -26331,7 +26342,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -26535,6 +26546,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -26569,7 +26581,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -26720,6 +26732,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -26754,7 +26767,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -29417,6 +29430,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -29451,7 +29465,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -29602,6 +29616,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -29636,7 +29651,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -29787,6 +29802,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -29821,7 +29837,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -39532,6 +39548,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -39566,7 +39583,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -39770,6 +39787,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -39805,7 +39823,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -40354,6 +40372,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -40388,7 +40407,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -40539,6 +40558,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -40573,7 +40593,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -40724,6 +40744,7 @@ data: "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -40758,7 +40779,7 @@ data: "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-alertmanager.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-alertmanager.json index 0769ca4a371..b3813637bdc 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-alertmanager.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-alertmanager.json @@ -281,6 +281,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -315,7 +316,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-compactor.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-compactor.json index ab03f6d107c..859beceff8c 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-compactor.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-compactor.json @@ -1994,6 +1994,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -2028,7 +2029,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-overview.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-overview.json index 4f315c7b2d8..7a351125255 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-overview.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-overview.json @@ -180,6 +180,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -214,7 +215,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -461,6 +462,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -495,7 +497,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads.json index cbe3b9e2e91..ef32f777df7 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-reads.json @@ -461,6 +461,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -495,7 +496,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -710,6 +711,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -744,7 +746,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1074,6 +1076,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1108,7 +1111,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1312,6 +1315,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1346,7 +1350,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1550,6 +1554,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1584,7 +1589,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -2062,6 +2067,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -2096,7 +2102,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json index ebf14e2f643..a246dd12b2f 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-remote-ruler-reads.json @@ -153,6 +153,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -187,7 +188,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -391,6 +392,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -425,7 +427,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -576,6 +578,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -610,7 +613,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-ruler.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-ruler.json index bbde06a6712..6a557fd6c9b 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-ruler.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-ruler.json @@ -537,6 +537,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -571,7 +572,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -722,6 +723,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -756,7 +758,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -907,6 +909,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -941,7 +944,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes.json index a6ce61ec671..fafa8c87aba 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes.json @@ -459,6 +459,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -493,7 +494,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -697,6 +698,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -732,7 +734,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1281,6 +1283,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1315,7 +1318,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1466,6 +1469,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1500,7 +1504,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1651,6 +1655,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1685,7 +1690,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-alertmanager.json b/operations/mimir-mixin-compiled/dashboards/mimir-alertmanager.json index 5d0d0f67361..e13695389ad 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-alertmanager.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-alertmanager.json @@ -281,6 +281,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -315,7 +316,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-compactor.json b/operations/mimir-mixin-compiled/dashboards/mimir-compactor.json index 9857d150e54..de221e167e4 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-compactor.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-compactor.json @@ -1994,6 +1994,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -2028,7 +2029,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-overview.json b/operations/mimir-mixin-compiled/dashboards/mimir-overview.json index 4f315c7b2d8..7a351125255 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-overview.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-overview.json @@ -180,6 +180,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -214,7 +215,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -461,6 +462,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -495,7 +497,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-reads.json b/operations/mimir-mixin-compiled/dashboards/mimir-reads.json index 1ddfee333a4..4ebc98517c5 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-reads.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-reads.json @@ -461,6 +461,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -495,7 +496,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -710,6 +711,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -744,7 +746,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1074,6 +1076,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1108,7 +1111,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1312,6 +1315,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1346,7 +1350,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1550,6 +1554,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1584,7 +1589,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -2062,6 +2067,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -2096,7 +2102,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json b/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json index a764fbea0f5..90461163451 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-remote-ruler-reads.json @@ -153,6 +153,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -187,7 +188,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -391,6 +392,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -425,7 +427,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -576,6 +578,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -610,7 +613,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-ruler.json b/operations/mimir-mixin-compiled/dashboards/mimir-ruler.json index bbde06a6712..6a557fd6c9b 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-ruler.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-ruler.json @@ -537,6 +537,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -571,7 +572,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -722,6 +723,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -756,7 +758,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -907,6 +909,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -941,7 +944,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-writes.json b/operations/mimir-mixin-compiled/dashboards/mimir-writes.json index d4e897cea89..72b9d0ad070 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-writes.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-writes.json @@ -459,6 +459,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -493,7 +494,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -697,6 +698,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -732,7 +734,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1281,6 +1283,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1315,7 +1318,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1466,6 +1469,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1500,7 +1504,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", @@ -1651,6 +1655,7 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", + "OK": "#7EB26D", "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" @@ -1685,7 +1690,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{status}}", diff --git a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet index 83a89156365..8b99281ddad 100644 --- a/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet +++ b/operations/mimir-mixin/dashboards/dashboard-utils.libsonnet @@ -177,6 +177,26 @@ local utils = import 'mixin-utils/utils.libsonnet'; qpsPanel(selector, statusLabelName='status_code'):: super.qpsPanel(selector, statusLabelName) + + { + aliasColors+: { + OK: '#7EB26D', + }, + targets: [ + { + expr: + ||| + sum by (status) ( + label_replace(label_replace(rate(%s[$__rate_interval]), + "status", "${1}xx", "%s", "([0-9]).."), + "status", "${1}", "%s", "([a-zA-Z]+)")) + ||| % [selector, statusLabelName, statusLabelName], + format: 'time_series', + intervalFactor: 2, + legendFormat: '{{status}}', + refId: 'A', + }, + ], + } + { yaxes: $.yaxes('reqps') }, // hiddenLegendQueryPanel adds on to 'timeseriesPanel', not the deprecated 'panel'. diff --git a/pkg/ingester/client/client.go b/pkg/ingester/client/client.go index 99a63103c2e..628617488eb 100644 --- a/pkg/ingester/client/client.go +++ b/pkg/ingester/client/client.go @@ -11,6 +11,7 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/grpcclient" + "github.com/grafana/dskit/middleware" "github.com/grafana/dskit/ring" "google.golang.org/grpc" "google.golang.org/grpc/health/grpc_health_v1" @@ -34,7 +35,7 @@ type closableHealthAndIngesterClient struct { // MakeIngesterClient makes a new IngesterClient func MakeIngesterClient(inst ring.InstanceDesc, cfg Config, metrics *Metrics, logger log.Logger) (HealthAndIngesterClient, error) { logger = log.With(logger, "component", "ingester-client") - unary, stream := grpcclient.Instrument(metrics.requestDuration) + unary, stream := grpcclient.Instrument(metrics.requestDuration, middleware.ReportGRPCStatusOption) if cfg.CircuitBreaker.Enabled { unary = append([]grpc.UnaryClientInterceptor{NewCircuitBreaker(inst, cfg.CircuitBreaker, metrics, logger)}, unary...) } diff --git a/pkg/mimir/modules.go b/pkg/mimir/modules.go index d382abd75f2..23522ca2afa 100644 --- a/pkg/mimir/modules.go +++ b/pkg/mimir/modules.go @@ -301,7 +301,7 @@ func (t *Mimir) initServer() (services.Service, error) { // Installing this allows us to reject push requests received via gRPC early -- before they are fully read into memory. t.Cfg.Server.GrpcMethodLimiter = newGrpcInflightMethodLimiter(ingFn, distFn) - + t.Cfg.Server.ReportGRPCCodesInInstrumentationLabel = true // Mimir handles signals on its own. DisableSignalHandling(&t.Cfg.Server) serv, err := server.New(t.Cfg.Server) diff --git a/vendor/github.com/grafana/dskit/grpcclient/instrumentation.go b/vendor/github.com/grafana/dskit/grpcclient/instrumentation.go index 4a10ce48d27..280f02180c3 100644 --- a/vendor/github.com/grafana/dskit/grpcclient/instrumentation.go +++ b/vendor/github.com/grafana/dskit/grpcclient/instrumentation.go @@ -9,14 +9,14 @@ import ( "github.com/grafana/dskit/middleware" ) -func Instrument(requestDuration *prometheus.HistogramVec) ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterceptor) { +func Instrument(requestDuration *prometheus.HistogramVec, instrumentationLabelOptions ...middleware.InstrumentationOption) ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterceptor) { return []grpc.UnaryClientInterceptor{ otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()), middleware.ClientUserHeaderInterceptor, - middleware.UnaryClientInstrumentInterceptor(requestDuration), + middleware.UnaryClientInstrumentInterceptor(requestDuration, instrumentationLabelOptions...), }, []grpc.StreamClientInterceptor{ otgrpc.OpenTracingStreamClientInterceptor(opentracing.GlobalTracer()), middleware.StreamClientUserHeaderInterceptor, - middleware.StreamClientInstrumentInterceptor(requestDuration), + middleware.StreamClientInstrumentInterceptor(requestDuration, instrumentationLabelOptions...), } } diff --git a/vendor/github.com/grafana/dskit/grpcutil/cancel.go b/vendor/github.com/grafana/dskit/grpcutil/cancel.go deleted file mode 100644 index b1d369d2a3e..00000000000 --- a/vendor/github.com/grafana/dskit/grpcutil/cancel.go +++ /dev/null @@ -1,25 +0,0 @@ -// Provenance-includes-location: https://github.com/weaveworks/common/blob/main/grpc/cancel.go -// Provenance-includes-license: Apache-2.0 -// Provenance-includes-copyright: Weaveworks Ltd. - -package grpcutil - -import ( - "context" - "errors" - - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" -) - -// IsCanceled checks whether an error comes from an operation being canceled -func IsCanceled(err error) bool { - if errors.Is(err, context.Canceled) { - return true - } - s, ok := status.FromError(err) - if ok && s.Code() == codes.Canceled { - return true - } - return false -} diff --git a/vendor/github.com/grafana/dskit/grpcutil/status.go b/vendor/github.com/grafana/dskit/grpcutil/status.go new file mode 100644 index 00000000000..a9e9aab249a --- /dev/null +++ b/vendor/github.com/grafana/dskit/grpcutil/status.go @@ -0,0 +1,70 @@ +package grpcutil + +import ( + "context" + "errors" + + "github.com/gogo/status" + "google.golang.org/grpc/codes" + grpcstatus "google.golang.org/grpc/status" +) + +// ErrorToStatus returns a *github.com/gogo/status.Status representation of err. +// +// - If err implements the method `GRPCStatus() *google.golang.org/grpc/status.Status` and +// `GRPCStatus()` does not return nil, or if err wraps a type satisfying this, Status from +// `GRPCStatus()` is converted to gogo Status, and returned. In that case, ok is true. +// +// - If err is or GRPCStatus() returns nil, a nil Status is returned and ok is false. +// +// - Otherwise, err is an error not compatible with this function. In this +// case, a nil Status is returned and ok is false. +func ErrorToStatus(err error) (*status.Status, bool) { + if err == nil { + return nil, false + } + type grpcStatus interface{ GRPCStatus() *grpcstatus.Status } + var gs grpcStatus + if errors.As(err, &gs) { + st := gs.GRPCStatus() + if st == nil { + return nil, false + } + return status.FromGRPCStatus(st), true + } + return nil, false +} + +// ErrorToStatusCode extracts gRPC status code from error and returns it. +// +// - If err is nil, codes.OK is returned. +// +// - If err implements (or wraps error that implements) the method +// `GRPCStatus() *google.golang.org/grpc/status.Status`, and +// `GRPCStatus()` returns a non-nil status, code from the status +// is returned. +// +// - Otherwise code.Unknown is returned. +func ErrorToStatusCode(err error) codes.Code { + if err == nil { + return codes.OK + } + type grpcStatus interface{ GRPCStatus() *grpcstatus.Status } + var gs grpcStatus + if errors.As(err, &gs) { + st := gs.GRPCStatus() + if st != nil { + return st.Code() + } + } + return codes.Unknown +} + +// IsCanceled checks whether an error comes from an operation being canceled. +func IsCanceled(err error) bool { + if errors.Is(err, context.Canceled) { + return true + } + statusCode := ErrorToStatusCode(err) + return statusCode == codes.Canceled +} diff --git a/vendor/github.com/grafana/dskit/httpgrpc/httpgrpc.go b/vendor/github.com/grafana/dskit/httpgrpc/httpgrpc.go index 83ea023f3b8..213c261b700 100644 --- a/vendor/github.com/grafana/dskit/httpgrpc/httpgrpc.go +++ b/vendor/github.com/grafana/dskit/httpgrpc/httpgrpc.go @@ -6,16 +6,15 @@ package httpgrpc import ( "context" - "errors" "fmt" "github.com/go-kit/log/level" - "google.golang.org/grpc/metadata" - grpcstatus "google.golang.org/grpc/status" - spb "github.com/gogo/googleapis/google/rpc" "github.com/gogo/protobuf/types" "github.com/gogo/status" + "google.golang.org/grpc/metadata" + + "github.com/grafana/dskit/grpcutil" "github.com/grafana/dskit/log" ) @@ -46,7 +45,7 @@ func ErrorFromHTTPResponse(resp *HTTPResponse) error { // HTTPResponseFromError converts a grpc error into an HTTP response func HTTPResponseFromError(err error) (*HTTPResponse, bool) { - s, ok := statusFromError(err) + s, ok := grpcutil.ErrorToStatus(err) if !ok { return nil, false } @@ -65,29 +64,6 @@ func HTTPResponseFromError(err error) (*HTTPResponse, bool) { return &resp, true } -// statusFromError tries to cast the given error into status.Status. -// If the given error, or any error from its tree are a status.Status, -// that status.Status and the outcome true are returned. -// Otherwise, nil and the outcome false are returned. -// This implementation differs from status.FromError() because the -// latter checks only if the given error can be cast to status.Status, -// and doesn't check other errors in the given error's tree. -func statusFromError(err error) (*status.Status, bool) { - if err == nil { - return nil, false - } - type grpcStatus interface{ GRPCStatus() *grpcstatus.Status } - var gs grpcStatus - if errors.As(err, &gs) { - st := gs.GRPCStatus() - if st == nil { - return nil, false - } - return status.FromGRPCStatus(st), true - } - return nil, false -} - const ( MetadataMethod = "httpgrpc-method" MetadataURL = "httpgrpc-url" diff --git a/vendor/github.com/grafana/dskit/middleware/grpc_instrumentation.go b/vendor/github.com/grafana/dskit/middleware/grpc_instrumentation.go index 70069fa36fa..e4052b8ed05 100644 --- a/vendor/github.com/grafana/dskit/middleware/grpc_instrumentation.go +++ b/vendor/github.com/grafana/dskit/middleware/grpc_instrumentation.go @@ -6,6 +6,7 @@ package middleware import ( "context" + "errors" "io" "strconv" "time" @@ -13,72 +14,69 @@ import ( "github.com/prometheus/client_golang/prometheus" "go.uber.org/atomic" "google.golang.org/grpc" + "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" "github.com/grafana/dskit/grpcutil" - "github.com/grafana/dskit/httpgrpc" "github.com/grafana/dskit/instrument" ) -func observe(ctx context.Context, hist *prometheus.HistogramVec, method string, err error, duration time.Duration) { - respStatus := "success" - if err != nil { - if errResp, ok := httpgrpc.HTTPResponseFromError(err); ok { - respStatus = strconv.Itoa(int(errResp.Code)) - } else if grpcutil.IsCanceled(err) { - respStatus = "cancel" - } else { - respStatus = "error" - } - } - instrument.ObserveWithExemplar(ctx, hist.WithLabelValues(gRPC, method, respStatus, "false"), duration.Seconds()) +func observe(ctx context.Context, hist *prometheus.HistogramVec, method string, err error, duration time.Duration, instrumentLabel instrumentationLabel) { + instrument.ObserveWithExemplar(ctx, hist.WithLabelValues(gRPC, method, instrumentLabel.getInstrumentationLabel(err), "false"), duration.Seconds()) } // UnaryServerInstrumentInterceptor instruments gRPC requests for errors and latency. -func UnaryServerInstrumentInterceptor(hist *prometheus.HistogramVec) grpc.UnaryServerInterceptor { +func UnaryServerInstrumentInterceptor(hist *prometheus.HistogramVec, instrumentationOptions ...InstrumentationOption) grpc.UnaryServerInterceptor { + instrumentationLabel := applyInstrumentationOptions(false, instrumentationOptions...) return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) { begin := time.Now() resp, err := handler(ctx, req) - observe(ctx, hist, info.FullMethod, err, time.Since(begin)) + observe(ctx, hist, info.FullMethod, err, time.Since(begin), instrumentationLabel) return resp, err } } // StreamServerInstrumentInterceptor instruments gRPC requests for errors and latency. -func StreamServerInstrumentInterceptor(hist *prometheus.HistogramVec) grpc.StreamServerInterceptor { +func StreamServerInstrumentInterceptor(hist *prometheus.HistogramVec, instrumentationOptions ...InstrumentationOption) grpc.StreamServerInterceptor { + instrumentationLabel := applyInstrumentationOptions(false, instrumentationOptions...) return func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { begin := time.Now() err := handler(srv, ss) - observe(ss.Context(), hist, info.FullMethod, err, time.Since(begin)) + observe(ss.Context(), hist, info.FullMethod, err, time.Since(begin), instrumentationLabel) return err } } // UnaryClientInstrumentInterceptor records duration of gRPC requests client side. -func UnaryClientInstrumentInterceptor(metric *prometheus.HistogramVec) grpc.UnaryClientInterceptor { +func UnaryClientInstrumentInterceptor(metric *prometheus.HistogramVec, instrumentationOptions ...InstrumentationOption) grpc.UnaryClientInterceptor { + // we enforce masking of HTTP statuses. + instrumentationLabel := applyInstrumentationOptions(true, instrumentationOptions...) return func(ctx context.Context, method string, req, resp interface{}, cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error { start := time.Now() err := invoker(ctx, method, req, resp, cc, opts...) - metric.WithLabelValues(method, errorCode(err)).Observe(time.Since(start).Seconds()) + metric.WithLabelValues(method, instrumentationLabel.getInstrumentationLabel(err)).Observe(time.Since(start).Seconds()) return err } } // StreamClientInstrumentInterceptor records duration of streaming gRPC requests client side. -func StreamClientInstrumentInterceptor(metric *prometheus.HistogramVec) grpc.StreamClientInterceptor { +func StreamClientInstrumentInterceptor(metric *prometheus.HistogramVec, instrumentationOptions ...InstrumentationOption) grpc.StreamClientInterceptor { + // we enforce masking of HTTP statuses. + instrumentationLabel := applyInstrumentationOptions(true, instrumentationOptions...) return func(ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string, streamer grpc.Streamer, opts ...grpc.CallOption, ) (grpc.ClientStream, error) { start := time.Now() stream, err := streamer(ctx, desc, cc, method, opts...) s := &instrumentedClientStream{ - metric: metric, - start: start, - method: method, - serverStreams: desc.ServerStreams, - finished: atomic.NewBool(false), - finishedChan: make(chan struct{}), - stream: stream, + metric: metric, + start: start, + method: method, + serverStreams: desc.ServerStreams, + finished: atomic.NewBool(false), + finishedChan: make(chan struct{}), + stream: stream, + instrumentationLabel: instrumentationLabel, } s.awaitCompletion(ctx) return s, err @@ -87,13 +85,14 @@ func StreamClientInstrumentInterceptor(metric *prometheus.HistogramVec) grpc.Str // This implementation is heavily inspired by github.com/opentracing-contrib/go-grpc's openTracingClientStream. type instrumentedClientStream struct { - metric *prometheus.HistogramVec - start time.Time - method string - serverStreams bool - finished *atomic.Bool - finishedChan chan struct{} - stream grpc.ClientStream + metric *prometheus.HistogramVec + start time.Time + method string + serverStreams bool + finished *atomic.Bool + finishedChan chan struct{} + stream grpc.ClientStream + instrumentationLabel instrumentationLabel } func (s *instrumentedClientStream) Trailer() metadata.MD { @@ -122,7 +121,7 @@ func (s *instrumentedClientStream) finish(err error) { close(s.finishedChan) - s.metric.WithLabelValues(s.method, errorCode(err)).Observe(time.Since(s.start).Seconds()) + s.metric.WithLabelValues(s.method, s.instrumentationLabel.getInstrumentationLabel(err)).Observe(time.Since(s.start).Seconds()) } func (s *instrumentedClientStream) SendMsg(m interface{}) error { @@ -173,18 +172,75 @@ func (s *instrumentedClientStream) CloseSend() error { return err } -// errorCode converts an error into an error code string. -func errorCode(err error) string { - if err == nil { - return "2xx" +type InstrumentationOption func(*instrumentationLabel) + +var ( + // ReportGRPCStatusOption is an InstrumentationOption that is used for enabling gRPC status codes to be used + // in instrumentation labels. + ReportGRPCStatusOption InstrumentationOption = func(instrumentationLabel *instrumentationLabel) { + instrumentationLabel.reportGRPCStatus = true + } +) + +func applyInstrumentationOptions(maskHTTPStatuses bool, options ...InstrumentationOption) instrumentationLabel { + instrumentationLabel := instrumentationLabel{maskHTTPStatus: maskHTTPStatuses} + for _, opt := range options { + opt(&instrumentationLabel) + } + return instrumentationLabel +} + +type instrumentationLabel struct { + reportGRPCStatus bool + maskHTTPStatus bool +} + +// getInstrumentationLabel converts an error into an error code string by applying the configurations +// contained in this instrumentationLabel object. +func (i *instrumentationLabel) getInstrumentationLabel(err error) string { + statusCode := errorToStatusCode(err) + return i.statusCodeToString(statusCode) +} + +func (i *instrumentationLabel) statusCodeToString(statusCode codes.Code) string { + if isHTTPStatusCode(statusCode) { + statusFamily := int(statusCode / 100) + if i.maskHTTPStatus { + return strconv.Itoa(statusFamily) + "xx" + } + return strconv.Itoa(int(statusCode)) } - if errResp, ok := httpgrpc.HTTPResponseFromError(err); ok { - statusFamily := int(errResp.Code / 100) - return strconv.Itoa(statusFamily) + "xx" - } else if grpcutil.IsCanceled(err) { + if i.reportGRPCStatus { + return statusCode.String() + } + + if statusCode == codes.OK { + if i.maskHTTPStatus { + return "2xx" + } + return "success" + } + + if statusCode == codes.Canceled { return "cancel" - } else { - return "error" } + + return "error" +} + +func errorToStatusCode(err error) codes.Code { + if err == nil { + return codes.OK + } + + if errors.Is(err, context.Canceled) { + return codes.Canceled + } + + return grpcutil.ErrorToStatusCode(err) +} + +func isHTTPStatusCode(statusCode codes.Code) bool { + return int(statusCode) >= 100 && int(statusCode) < 600 } diff --git a/vendor/github.com/grafana/dskit/server/server.go b/vendor/github.com/grafana/dskit/server/server.go index 157642c648e..a33e1a06f02 100644 --- a/vendor/github.com/grafana/dskit/server/server.go +++ b/vendor/github.com/grafana/dskit/server/server.go @@ -92,9 +92,10 @@ type Config struct { HTTPTLSConfig TLSConfig `yaml:"http_tls_config"` GRPCTLSConfig TLSConfig `yaml:"grpc_tls_config"` - RegisterInstrumentation bool `yaml:"register_instrumentation"` - ExcludeRequestInLog bool `yaml:"-"` - DisableRequestSuccessLog bool `yaml:"-"` + RegisterInstrumentation bool `yaml:"register_instrumentation"` + ReportGRPCCodesInInstrumentationLabel bool `yaml:"report_grpc_codes_in_instrumentation_label"` + ExcludeRequestInLog bool `yaml:"-"` + DisableRequestSuccessLog bool `yaml:"-"` ServerGracefulShutdownTimeout time.Duration `yaml:"graceful_shutdown_timeout"` HTTPServerReadTimeout time.Duration `yaml:"http_server_read_timeout"` @@ -168,6 +169,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.GRPCListenPort, "server.grpc-listen-port", 9095, "gRPC server listen port.") f.IntVar(&cfg.GRPCConnLimit, "server.grpc-conn-limit", 0, "Maximum number of simultaneous grpc connections, <=0 to disable") f.BoolVar(&cfg.RegisterInstrumentation, "server.register-instrumentation", true, "Register the intrumentation handlers (/metrics etc).") + f.BoolVar(&cfg.ReportGRPCCodesInInstrumentationLabel, "server.report-grpc-codes-in-instrumentation-label", false, "If set to true, gRPC statuses will be reported in instrumentation labels with their string representations. Otherwise, they will be reported as \"error\".") f.DurationVar(&cfg.ServerGracefulShutdownTimeout, "server.graceful-shutdown-timeout", 30*time.Second, "Timeout for graceful shutdowns") f.DurationVar(&cfg.HTTPServerReadTimeout, "server.http-read-timeout", 30*time.Second, "Read timeout for entire HTTP request, including headers and body.") f.DurationVar(&cfg.HTTPServerReadHeaderTimeout, "server.http-read-header-timeout", 0, "Read timeout for HTTP request headers. If set to 0, value of -server.http-read-timeout is used.") @@ -348,17 +350,21 @@ func newServer(cfg Config, metrics *Metrics) (*Server, error) { WithRequest: !cfg.ExcludeRequestInLog, DisableRequestSuccessLog: cfg.DisableRequestSuccessLog, } + var reportGRPCStatusesOptions []middleware.InstrumentationOption + if cfg.ReportGRPCCodesInInstrumentationLabel { + reportGRPCStatusesOptions = []middleware.InstrumentationOption{middleware.ReportGRPCStatusOption} + } grpcMiddleware := []grpc.UnaryServerInterceptor{ serverLog.UnaryServerInterceptor, otgrpc.OpenTracingServerInterceptor(opentracing.GlobalTracer()), - middleware.UnaryServerInstrumentInterceptor(metrics.RequestDuration), + middleware.UnaryServerInstrumentInterceptor(metrics.RequestDuration, reportGRPCStatusesOptions...), } grpcMiddleware = append(grpcMiddleware, cfg.GRPCMiddleware...) grpcStreamMiddleware := []grpc.StreamServerInterceptor{ serverLog.StreamServerInterceptor, otgrpc.OpenTracingStreamServerInterceptor(opentracing.GlobalTracer()), - middleware.StreamServerInstrumentInterceptor(metrics.RequestDuration), + middleware.StreamServerInstrumentInterceptor(metrics.RequestDuration, reportGRPCStatusesOptions...), } grpcStreamMiddleware = append(grpcStreamMiddleware, cfg.GRPCStreamMiddleware...) diff --git a/vendor/modules.txt b/vendor/modules.txt index 2a301c886bb..802e2f32161 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -534,7 +534,7 @@ github.com/gosimple/slug # github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc ## explicit; go 1.13 github.com/grafana-tools/sdk -# github.com/grafana/dskit v0.0.0-20231031132813-52f4e8d82d59 +# github.com/grafana/dskit v0.0.0-20231104112041-b3823cb5cdb4 ## explicit; go 1.20 github.com/grafana/dskit/backoff github.com/grafana/dskit/cache