Skip to content

Commit

Permalink
Support native histograms in panels
Browse files Browse the repository at this point in the history
Give utilities to generate native and classic queries over generic
histogram metrics defined by a metric name and selectors.
Allow to define panels that can switch between showing either.

Native histograms are
https://grafana.com/docs/mimir/latest/send/native-histograms/

Signed-off-by: György Krajcsovits <[email protected]>
  • Loading branch information
krajorama committed Mar 14, 2024
1 parent 0f4cbd9 commit 56fe626
Show file tree
Hide file tree
Showing 2 changed files with 256 additions and 11 deletions.
180 changes: 169 additions & 11 deletions grafana-builder/grafana.libsonnet
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
local utils = import 'mixin-utils/utils.libsonnet';

{
dashboard(title, uid='', datasource='default', datasource_regex=''):: {
// Stuff that isn't materialised.
Expand Down Expand Up @@ -70,6 +72,40 @@
},
},

addShowNativeLatencyVariable():: self {
templating+: {
list+: [{
current: {
selected: true,
text: 'classic',
value: '1',
},
description: 'Choose between showing latencies based on low precision classic or high precision native histogram metrics.',
hide: 0,
includeAll: false,
label: 'Latency metrics',
multi: false,
name: 'latency_metrics',
query: 'native : -1,classic : 1',
options: [
{
selected: false,
text: 'native',
value: '-1',
},
{
selected: true,
text: 'classic',
value: '1',
},
],
skipUrlSync: false,
type: 'custom',
useTags: false,
}],
},
},

dashboardLinkUrl(title, url):: self {
links+: [
{
Expand Down Expand Up @@ -420,18 +456,20 @@
},
],

httpStatusColors:: {
'1xx': '#EAB839',
'2xx': '#7EB26D',
'3xx': '#6ED0E0',
'4xx': '#EF843C',
'5xx': '#E24D42',
OK: '#7EB26D',
success: '#7EB26D',
'error': '#E24D42',
cancel: '#A9A9A9',
},

qpsPanel(selector, statusLabelName='status_code'):: {
aliasColors: {
'1xx': '#EAB839',
'2xx': '#7EB26D',
'3xx': '#6ED0E0',
'4xx': '#EF843C',
'5xx': '#E24D42',
OK: '#7EB26D',
success: '#7EB26D',
'error': '#E24D42',
cancel: '#A9A9A9',
},
aliasColors: $.httpStatusColors,
targets: [
{
expr:
Expand All @@ -448,6 +486,74 @@
],
} + $.stack,

// Assumes that the metricName is for a histogram (as opposed to qpsPanel above)
// Assumes that there is a dashboard variable named latency_metrics, values are -1 (native) or 1 (classic)
qpsPanelNativeHistogram(title, metricName, selector, statusLabelName='status_code'):: $.timeseriesPanel(title) {
fieldConfig+: {
defaults+: {
custom+: {
lineWidth: 0,
fillOpacity: 100, // Get solid fill.
stacking: {
mode: 'normal',
group: 'A',
},
},
unit: 'reqps',
min: 0,
},
overrides+: [{
matcher: {
id: 'byName',
options: status,
},
properties: [
{
id: 'color',
value: {
mode: 'fixed',
fixedColor: $.httpStatusColors[status],
},
},
],
} for status in std.objectFieldsAll($.httpStatusColors)],
},
targets: [
{
expr:
|||
sum by (status) (
label_replace(label_replace(%(metricQuery)s,
"status", "${1}xx", "%(label)s", "([0-9]).."),
"status", "${1}", "%(label)s", "([a-zA-Z]+)"))
< ($latency_metrics * -Inf)
||| % {
metricQuery: utils.nativeClassicHistogramCountRate(metricName, selector).native,
label: statusLabelName,
},
format: 'time_series',
legendFormat: '{{status}}',
refId: 'A',
},
{
expr:
|||
sum by (status) (
label_replace(label_replace(%(metricQuery)s,
"status", "${1}xx", "%(label)s", "([0-9]).."),
"status", "${1}", "%(label)s", "([a-zA-Z]+)"))
< ($latency_metrics * +Inf)
||| % {
metricQuery: utils.nativeClassicHistogramCountRate(metricName, selector).classic,
label: statusLabelName,
},
format: 'time_series',
legendFormat: '{{status}}',
refId: 'A_classic',
},
],
} + $.stack,

latencyPanel(metricName, selector, multiplier='1e3'):: {
nullPointMode: 'null as zero',
targets: [
Expand All @@ -473,6 +579,58 @@
yaxes: $.yaxes('ms'),
},

// Assumes that there is a dashboard variable named latency_metrics, values are -1 (native) or 1 (classic)
latencyPanelNativeHistogram(title, metricName, selector, multiplier='1e3'):: $.timeseriesPanel(title) {
nullPointMode: 'null as zero',
fieldConfig+: {
defaults+: {
custom+: {
fillOpacity: 10,
},
unit: 'ms',
},
},
targets: [
{
expr: utils.showNativeHistogramQuery(utils.nativeClassicHistogramQuantile('0.99', metricName, selector, multiplier=multiplier)),
format: 'time_series',
legendFormat: '99th percentile',
refId: 'A',
},
{
expr: utils.showClassicHistogramQuery(utils.nativeClassicHistogramQuantile('0.99', metricName, selector, multiplier=multiplier)),
format: 'time_series',
legendFormat: '99th percentile',
refId: 'A_classic',
},
{
expr: utils.showNativeHistogramQuery(utils.nativeClassicHistogramQuantile('0.50', metricName, selector, multiplier=multiplier)),
format: 'time_series',
legendFormat: '50th percentile',
refId: 'B',
},
{
expr: utils.showClassicHistogramQuery(utils.nativeClassicHistogramQuantile('0.50', metricName, selector, multiplier=multiplier)),
format: 'time_series',
legendFormat: '50th percentile',
refId: 'B_classic',
},
{
expr: utils.showNativeHistogramQuery(utils.nativeClassicHistogramAverageRate(metricName, selector, multiplier=multiplier)),
format: 'time_series',
legendFormat: 'Average',
refId: 'C',
},
{
expr: utils.showClassicHistogramQuery(utils.nativeClassicHistogramAverageRate(metricName, selector, multiplier=multiplier)),
format: 'time_series',
legendFormat: 'Average',
refId: 'C_classic',
},
],
yaxes: $.yaxes('ms'),
},

selector:: {
eq(label, value):: { label: label, op: '=', value: value },
neq(label, value):: { label: label, op: '!=', value: value },
Expand Down
87 changes: 87 additions & 0 deletions mixin-utils/utils.libsonnet
Original file line number Diff line number Diff line change
@@ -1,6 +1,93 @@
local g = import 'grafana-builder/grafana.libsonnet';

{
// The classicNativeHistogramQuantile function is used to calculate histogram quantiles from native histograms or classic histograms.
// Metric name should be provided without _bucket suffix.
nativeClassicHistogramQuantile(percentile, metric, selector, sum_by=[], rate_interval='$__rate_interval', multiplier='')::
local classicSumBy = if std.length(sum_by) > 0 then ' by (%(lbls)s) ' % { lbls: std.join(',', ['le'] + sum_by) } else ' by (le) ';
local nativeSumBy = if std.length(sum_by) > 0 then ' by (%(lbls)s) ' % { lbls: std.join(',', sum_by) } else ' ';
local multiplierStr = if multiplier == '' then '' else ' * %s' % multiplier;
{
classic: 'histogram_quantile(%(percentile)s, sum%(classicSumBy)s(rate(%(metric)s_bucket{%(selector)s}[%(rateInterval)s])))%(multiplierStr)s' % {
classicSumBy: classicSumBy,
metric: metric,
multiplierStr: multiplierStr,
percentile: percentile,
rateInterval: rate_interval,
selector: selector,
},
native: 'histogram_quantile(%(percentile)s, sum%(nativeSumBy)s(rate(%(metric)s{%(selector)s}[%(rateInterval)s])))%(multiplierStr)s' % {
metric: metric,
multiplierStr: multiplierStr,
nativeSumBy: nativeSumBy,
percentile: percentile,
rateInterval: rate_interval,
selector: selector,
},
},

// The classicNativeHistogramSumRate function is used to calculate the histogram sum of rate from native histograms or classic histograms.
// Metric name should be provided without _sum suffix.
nativeClassicHistogramSumRate(metric, selector, rate_interval='$__rate_interval')::
{
classic: 'rate(%(metric)s_sum{%(selector)s}[%(rateInterval)s])' % {
metric: metric,
rateInterval: rate_interval,
selector: selector,
},
native: 'histogram_sum(rate(%(metric)s{%(selector)s}[%(rateInterval)s]))' % {
metric: metric,
rateInterval: rate_interval,
selector: selector,
},
},


// The classicNativeHistogramCountRate function is used to calculate the histogram count of rate from native histograms or classic histograms.
// Metric name should be provided without _count suffix.
nativeClassicHistogramCountRate(metric, selector, rate_interval='$__rate_interval')::
{
classic: 'rate(%(metric)s_count{%(selector)s}[%(rateInterval)s])' % {
metric: metric,
rateInterval: rate_interval,
selector: selector,
},
native: 'histogram_count(rate(%(metric)s{%(selector)s}[%(rateInterval)s]))' % {
metric: metric,
rateInterval: rate_interval,
selector: selector,
},
},

// TODO(krajorama) Switch to histogram_avg function for native histograms later.
nativeClassicHistogramAverageRate(metric, selector, rate_interval='$__rate_interval', multiplier='')::
local multiplierStr = if multiplier == '' then '' else '%s * ' % multiplier;
{
classic: |||
%(multiplier)ssum(%(sumMetricQuery)s) /
sum(%(countMetricQuery)s)
||| % {
sumMetricQuery: $.nativeClassicHistogramSumRate(metric, selector, rate_interval).classic,
countMetricQuery: $.nativeClassicHistogramCountRate(metric, selector, rate_interval).classic,
multiplier: multiplierStr,
},
native: |||
%(multiplier)ssum(%(sumMetricQuery)s) /
sum(%(countMetricQuery)s)
||| % {
sumMetricQuery: $.nativeClassicHistogramSumRate(metric, selector, rate_interval).native,
countMetricQuery: $.nativeClassicHistogramCountRate(metric, selector, rate_interval).native,
multiplier: multiplierStr,
},
},

// showClassicHistogramQuery wraps a query defined as map {classic: q, native: q}, and compares the classic query
// to dashboard variable which should take -1 or +1 as values in order to hide or show the classic query.
showClassicHistogramQuery(query, dashboard_variable='latency_metrics'):: '%s < ($%s * +Inf)' % [query.classic, dashboard_variable],
// showNativeHistogramQuery wraps a query defined as map {classic: q, native: q}, and compares the native query
// to dashboard variable which should take -1 or +1 as values in order to show or hide the native query.
showNativeHistogramQuery(query, dashboard_variable='latency_metrics'):: '%s < ($%s * -Inf)' % [query.native, dashboard_variable],

histogramRules(metric, labels, interval='1m', record_native=false)::
local vars = {
metric: metric,
Expand Down

0 comments on commit 56fe626

Please sign in to comment.