From dc6d3f07dbd76726917531ce16000e83d27c25e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= Date: Mon, 6 Nov 2023 11:31:19 +0100 Subject: [PATCH] Show rejected distributor requests in the dashboard. (#6556) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Show rejected distributor requests in the dashboard. Signed-off-by: Peter Štibraný * CHANGELOG.md Signed-off-by: Peter Štibraný * CHANGELOG.md Signed-off-by: Peter Štibraný * make build-helm-tests Signed-off-by: Peter Štibraný --------- Signed-off-by: Peter Štibraný --- CHANGELOG.md | 2 +- .../metamonitoring/grafana-dashboards.yaml | 3 +- .../dashboards/mimir-writes.json | 3 +- .../dashboards/mimir-writes.json | 3 +- operations/mimir-mixin/config.libsonnet | 10 +++---- .../mimir-mixin/dashboards/writes.libsonnet | 28 +++++++++++++++++-- 6 files changed, 38 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 645c37dae9c..749facce715 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,7 +74,7 @@ ### Mixin -* [ENHANCEMENT] Dashboards: Optionally show rejected requests on Mimir Writes dashboard. Useful when used together with "early request rejection". #6132 +* [ENHANCEMENT] Dashboards: Optionally show rejected requests on Mimir Writes dashboard. Useful when used together with "early request rejection" in ingester and distributor. #6132 #6556 * [ENHANCEMENT] Alerts: added a critical alert for `CompactorSkippedBlocksWithOutOfOrderChunks` when multiple blocks are affected. #6410 * [ENHANCEMENT] Dashboards: Added the min-replicas for autoscaling dashboards. #6528 * [BUGFIX] Alerts: fixed issue where `GossipMembersMismatch` warning message referred to per-instance labels that were not produced by the alert query. #6146 diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml index c24a6e13a8b..2ffe82213b2 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml @@ -39540,6 +39540,7 @@ data: "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", "fill": 10, "id": 7, "legend": { @@ -39778,7 +39779,7 @@ data: "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests and ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", "fill": 10, "id": 10, "legend": { diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes.json index a6ce61ec671..8b43c8be730 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-writes.json @@ -467,6 +467,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", "fill": 10, "id": 7, "legend": { @@ -705,7 +706,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests and ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", "fill": 10, "id": 10, "legend": { diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-writes.json b/operations/mimir-mixin-compiled/dashboards/mimir-writes.json index d4e897cea89..9bf06a34453 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-writes.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-writes.json @@ -467,6 +467,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to distributor.\nRejected requests are requests that distributor fails to handle because of distributor instance limits.\nWhen distributor is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen distributor is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", "fill": 10, "id": 7, "legend": { @@ -705,7 +706,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests and ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", + "description": "### Requests / sec\nThe rate of successful, failed and rejected requests to ingester.\nRejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate).\nWhen ingester is configured to use \"early\" request rejection, then rejected requests are NOT included in other metrics.\nWhen ingester is not configured to use \"early\" request rejection, then rejected requests are also counted as \"errors\".\n\n", "fill": 10, "id": 10, "legend": { diff --git a/operations/mimir-mixin/config.libsonnet b/operations/mimir-mixin/config.libsonnet index c6975cb3a75..74a17c4e08d 100644 --- a/operations/mimir-mixin/config.libsonnet +++ b/operations/mimir-mixin/config.libsonnet @@ -648,12 +648,12 @@ // Used to add additional services to dashboards that support it. extraServiceNames: [], - // When using rejecting inflight requests in ingesters early (using -ingester.limit-inflight-requests-using-grpc-method-limiter option), - // rejected requests will not count towards standard Mimir metrics like cortex_request_duration_seconds_count. - // Enabling this will make them visible on the dashboard again. + // When using early rejection of inflight requests in ingesters and distributors (using -ingester.limit-inflight-requests-using-grpc-method-limiter + // and -distributor.limit-inflight-requests-using-grpc-method-limiter options), rejected requests will not count towards standard Mimir metrics + // like cortex_request_duration_seconds_count. Enabling this will make them visible on the dashboard again. // - // Disabled by default, because when -ingester.limit-inflight-requests-using-grpc-method-limiter is not used (default), then rejected requests - // are already counted as failures. + // Disabled by default, because when -ingester.limit-inflight-requests-using-grpc-method-limiter and -distributor.limit-inflight-requests-using-grpc-method-limiter is + // not used (default), then rejected requests are already counted as failures. show_rejected_requests_on_writes_dashboard: false, }, } diff --git a/operations/mimir-mixin/dashboards/writes.libsonnet b/operations/mimir-mixin/dashboards/writes.libsonnet index 2fb7773b3a4..58041cf4473 100644 --- a/operations/mimir-mixin/dashboards/writes.libsonnet +++ b/operations/mimir-mixin/dashboards/writes.libsonnet @@ -124,7 +124,31 @@ local filename = 'mimir-writes.json'; $.row('Distributor') .addPanel( $.panel('Requests / sec') + - $.qpsPanel($.queries.distributor.writeRequestsPerSecond) + $.panelDescription( + 'Requests / sec', + ||| + The rate of successful, failed and rejected requests to distributor. + Rejected requests are requests that distributor fails to handle because of distributor instance limits. + When distributor is configured to use "early" request rejection, then rejected requests are NOT included in other metrics. + When distributor is not configured to use "early" request rejection, then rejected requests are also counted as "errors". + ||| + ) + + $.qpsPanel($.queries.distributor.writeRequestsPerSecond) + + if $._config.show_rejected_requests_on_writes_dashboard then { + targets: [ + { + legendLink: null, + expr: 'sum (rate(cortex_distributor_instance_rejected_requests_total{%s}[$__rate_interval]))' % [$.jobMatcher($._config.job_names.distributor)], + format: 'time_series', + intervalFactor: 2, + legendFormat: 'rejected', + refId: 'B', + }, + ] + super.targets, + aliasColors+: { + rejected: '#EAB839', + }, + } else {}, ) .addPanel( $.panel('Latency') + @@ -146,7 +170,7 @@ local filename = 'mimir-writes.json'; 'Requests / sec', ||| The rate of successful, failed and rejected requests to ingester. - Rejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests and ingester-max-ingestion-rate). + Rejected requests are requests that ingester fails to handle because of ingester instance limits (ingester-max-inflight-push-requests, ingester-max-inflight-push-requests-bytes, ingester-max-ingestion-rate). When ingester is configured to use "early" request rejection, then rejected requests are NOT included in other metrics. When ingester is not configured to use "early" request rejection, then rejected requests are also counted as "errors". |||