From 2b3dd70e3a95514e1acaf436770ddc8291f2b2ba Mon Sep 17 00:00:00 2001 From: Kemal Akkoyun Date: Thu, 6 Aug 2020 09:06:30 +0200 Subject: [PATCH] Add aggreagation by operation to Store bucket dashboards (#2990) Signed-off-by: Kemal Akkoyun --- examples/dashboards/store.json | 16 ++++++------ mixin/dashboards/store.libsonnet | 43 ++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/examples/dashboards/store.json b/examples/dashboards/store.json index 3ec6edda2d..7c9b04d665 100644 --- a/examples/dashboards/store.json +++ b/examples/dashboards/store.json @@ -651,9 +651,7 @@ ] }, { - "aliasColors": { - "error": "#E24D42" - }, + "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, @@ -685,11 +683,11 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum(rate(thanos_objstore_bucket_operations_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", + "expr": "sum by (job, operation) (rate(thanos_objstore_bucket_operation_failures_total{namespace=\"$namespace\",job=~\"$job\"}[$interval])) / sum by (job, operation) (rate(thanos_objstore_bucket_operations_total{namespace=\"$namespace\",job=~\"$job\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", - "refId": "A", + "legendFormat": "{{job}} {{operation}}", + "legendLink": null, "step": 10 } ], @@ -762,7 +760,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation, le)) * 1", "format": "time_series", "intervalFactor": 2, "legendFormat": "P99 {{job}}", @@ -770,7 +768,7 @@ "step": 10 }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job) * 1 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job)", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation) * 1 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation)", "format": "time_series", "intervalFactor": 2, "legendFormat": "mean {{job}}", @@ -778,7 +776,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, le)) * 1", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{namespace=\"$namespace\",job=~\"$job\"}[$interval])) by (job, operation, le)) * 1", "format": "time_series", "intervalFactor": 2, "legendFormat": "P50 {{job}}", diff --git a/mixin/dashboards/store.libsonnet b/mixin/dashboards/store.libsonnet index eb0019d4ce..952a09feda 100644 --- a/mixin/dashboards/store.libsonnet +++ b/mixin/dashboards/store.libsonnet @@ -52,14 +52,16 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; ) .addPanel( g.panel('Errors', 'Shows ratio of errors compared to the total number of executed operations against the bucket.') + - g.qpsErrTotalPanel( - 'thanos_objstore_bucket_operation_failures_total{namespace="$namespace",job=~"$job"}', - 'thanos_objstore_bucket_operations_total{namespace="$namespace",job=~"$job"}', - ) + g.queryPanel( + 'sum by (job, operation) (rate(thanos_objstore_bucket_operation_failures_total{namespace="$namespace",job=~"$job"}[$interval])) / sum by (job, operation) (rate(thanos_objstore_bucket_operations_total{namespace="$namespace",job=~"$job"}[$interval]))', + '{{job}} {{operation}}' + ) + + { yaxes: g.yaxes({ format: 'percentunit' }) } + + g.stack, ) .addPanel( g.panel('Duration', 'Shows how long has it taken to execute operations against the bucket, in quantiles.') + - g.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', 'namespace="$namespace",job=~"$job"') + $.latencyByOperationPanel('thanos_objstore_bucket_operation_duration_seconds', 'namespace="$namespace",job=~"$job"') ) ) .addRow( @@ -242,4 +244,35 @@ local g = import '../lib/thanos-grafana-builder/builder.libsonnet'; ), ], }, + + latencyByOperationPanel(metricName, selector, multiplier='1'):: { + nullPointMode: 'null as zero', + targets: [ + { + expr: 'histogram_quantile(0.99, sum(rate(%s_bucket{%s}[$interval])) by (job, operation, le)) * %s' % [metricName, selector, multiplier], + format: 'time_series', + intervalFactor: 2, + legendFormat: 'P99 {{job}}', + refId: 'A', + step: 10, + }, + { + expr: 'sum(rate(%s_sum{%s}[$interval])) by (job, operation) * %s / sum(rate(%s_count{%s}[$interval])) by (job, operation)' % [metricName, selector, multiplier, metricName, selector], + format: 'time_series', + intervalFactor: 2, + legendFormat: 'mean {{job}}', + refId: 'B', + step: 10, + }, + { + expr: 'histogram_quantile(0.50, sum(rate(%s_bucket{%s}[$interval])) by (job, operation, le)) * %s' % [metricName, selector, multiplier], + format: 'time_series', + intervalFactor: 2, + legendFormat: 'P50 {{job}}', + refId: 'C', + step: 10, + }, + ], + yaxes: g.yaxes('s'), + }, }