From e225304b6f594fc4da768e3f1635ee54d092429b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 6 Mar 2024 13:11:25 +0100 Subject: [PATCH 1/4] ruler: fix native histogram recording rule result corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were copying the address of a loop variable for native histograms, causing the result of recording rules being corrupted. Signed-off-by: György Krajcsovits --- CHANGELOG.md | 1 + pkg/mimirpb/query_response_extra_test.go | 32 +++++++++++++++ pkg/ruler/remotequerier_decoder.go | 5 ++- pkg/ruler/remotequerier_decoder_test.go | 51 ++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 pkg/ruler/remotequerier_decoder_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 588e4429ce4..7fcdced4a16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -119,6 +119,7 @@ * [BUGFIX] Ruler: fix regression that caused client errors to be tracked in `cortex_ruler_write_requests_failed_total` metric. #7472 * [BUGFIX] promql: Fix Range selectors with an @ modifier are wrongly scoped in range queries. #7475 * [BUGFIX] Fix metadata API using wrong JSON field names. #7475 +* [BUGFIX] Ruler: fix native histogram recording rule result corruption. #7552 ### Mixin diff --git a/pkg/mimirpb/query_response_extra_test.go b/pkg/mimirpb/query_response_extra_test.go index 5795f969b59..09dea918324 100644 --- a/pkg/mimirpb/query_response_extra_test.go +++ b/pkg/mimirpb/query_response_extra_test.go @@ -79,3 +79,35 @@ func extractPrometheusStrings(t *testing.T, constantType string) []string { func TestFloatHistogramProtobufTypeRemainsInSyncWithPrometheus(t *testing.T) { test.RequireSameShape(t, histogram.FloatHistogram{}, FloatHistogram{}, false) } + +// This example is from an investigation into a bug in the ruler. Keeping it here for future reference. +func TestFloatHistogramProtobufToPrometheus(t *testing.T) { + fh := &FloatHistogram{ + CounterResetHint: 3, + Schema: 3, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 0, + Count: 6.844444444444443, + Sum: 0.00872226031111156, + PositiveSpans: []BucketSpan{{-134, 10}, {1, 2}, {2, 4}, {2, 2}, {5, 1}, {43, 1}, {3, 2}, {3, 1}, {21, 1}, {1, 2}, {4, 1}}, + NegativeSpans: nil, + PositiveBuckets: []float64{0.02222222222222222, 0.9999999999999998, 1.2, 1.0666666666666667, 0.9777777777777776, 1.1777777777777776, 0.42222222222222217, 0.2444444444444444, 0.1333333333333333, 0.17777777777777776, 0.04444444444444444, 0.02222222222222222, 0.04444444444444444, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222}, + NegativeBuckets: nil, + } + expectedModelh := &histogram.FloatHistogram{ + CounterResetHint: histogram.GaugeType, + Schema: 3, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 0, + Count: 6.844444444444443, + Sum: 0.00872226031111156, + PositiveSpans: []histogram.Span{{-134, 10}, {1, 2}, {2, 4}, {2, 2}, {5, 1}, {43, 1}, {3, 2}, {3, 1}, {21, 1}, {1, 2}, {4, 1}}, + NegativeSpans: nil, + PositiveBuckets: []float64{0.02222222222222222, 0.9999999999999998, 1.2, 1.0666666666666667, 0.9777777777777776, 1.1777777777777776, 0.42222222222222217, 0.2444444444444444, 0.1333333333333333, 0.17777777777777776, 0.04444444444444444, 0.02222222222222222, 0.04444444444444444, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222}, + NegativeBuckets: nil, + } + + modelh := fh.ToPrometheusModel() + + require.Equal(t, expectedModelh, modelh) +} diff --git a/pkg/ruler/remotequerier_decoder.go b/pkg/ruler/remotequerier_decoder.go index 5569bc91570..f78178115d8 100644 --- a/pkg/ruler/remotequerier_decoder.go +++ b/pkg/ruler/remotequerier_decoder.go @@ -156,8 +156,9 @@ func (d protobufDecoder) decodeVector(v *mimirpb.VectorData) (promql.Vector, err samples[floatSampleCount+i] = promql.Sample{ Metric: m, - H: s.Histogram.ToPrometheusModel(), - T: s.TimestampMs, + // We must not use the address of the loop variable as that's reused. + H: (&(v.Histograms[i].Histogram)).ToPrometheusModel(), + T: s.TimestampMs, } } diff --git a/pkg/ruler/remotequerier_decoder_test.go b/pkg/ruler/remotequerier_decoder_test.go new file mode 100644 index 00000000000..a81ef4cf33a --- /dev/null +++ b/pkg/ruler/remotequerier_decoder_test.go @@ -0,0 +1,51 @@ +package ruler + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/grafana/mimir/pkg/mimirpb" +) + +// Regression test for a bug where we were copying the loop variable address. +func TestProtobufDecoderDecodeVectorHistogramMemoryHandling(t *testing.T) { + vd := &mimirpb.VectorData{ + Histograms: []mimirpb.VectorHistogram{ + { + Metric: []string{"__name__", "test"}, + Histogram: mimirpb.FloatHistogram{ + CounterResetHint: 3, + Schema: 3, + ZeroThreshold: 0.1, + ZeroCount: 0, + Count: 3, + Sum: 3, + }, + TimestampMs: 123, + }, + { + Metric: []string{"__name__", "test"}, + Histogram: mimirpb.FloatHistogram{ + CounterResetHint: 3, + Schema: 3, + ZeroThreshold: 0.1, + ZeroCount: 0, + Count: 1337, + Sum: 3, + }, + TimestampMs: 123, + }, + }, + } + decoder := protobufDecoder{} + v, err := decoder.decodeVector(vd) + require.NoError(t, err) + require.Equal(t, 2, len(v)) + require.Equal(t, float64(3), v[0].H.Count) + + // Check that the decoding is zero copy + ph := &(vd.Histograms[0].Histogram) + ph.Count = 42 + require.Equal(t, float64(42), v[0].H.Count) +} From c849b4851894e6991a8453edc414fa2f7c2d1762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 6 Mar 2024 14:12:38 +0100 Subject: [PATCH 2/4] Fix linter issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- pkg/mimirpb/query_response_extra_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/mimirpb/query_response_extra_test.go b/pkg/mimirpb/query_response_extra_test.go index 09dea918324..7b112537146 100644 --- a/pkg/mimirpb/query_response_extra_test.go +++ b/pkg/mimirpb/query_response_extra_test.go @@ -101,7 +101,7 @@ func TestFloatHistogramProtobufToPrometheus(t *testing.T) { ZeroCount: 0, Count: 6.844444444444443, Sum: 0.00872226031111156, - PositiveSpans: []histogram.Span{{-134, 10}, {1, 2}, {2, 4}, {2, 2}, {5, 1}, {43, 1}, {3, 2}, {3, 1}, {21, 1}, {1, 2}, {4, 1}}, + PositiveSpans: []histogram.Span{{Offset: -134, Length: 10}, {Offset: 1, Length: 2}, {Offset: 2, Length: 4}, {Offset: 2, Length: 2}, {Offset: 5, Length: 1}, {Offset: 43, Length: 1}, {Offset: 3, Length: 2}, {Offset: 3, Length: 1}, {Offset: 21, Length: 1}, {Offset: 1, Length: 2}, {Offset: 4, Length: 1}}, NegativeSpans: nil, PositiveBuckets: []float64{0.02222222222222222, 0.9999999999999998, 1.2, 1.0666666666666667, 0.9777777777777776, 1.1777777777777776, 0.42222222222222217, 0.2444444444444444, 0.1333333333333333, 0.17777777777777776, 0.04444444444444444, 0.02222222222222222, 0.04444444444444444, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222}, NegativeBuckets: nil, From 7e9636a9e86b380fe51c5156b7e4a110bb862439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 6 Mar 2024 14:45:31 +0100 Subject: [PATCH 3/4] Add license header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- pkg/ruler/remotequerier_decoder_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/ruler/remotequerier_decoder_test.go b/pkg/ruler/remotequerier_decoder_test.go index a81ef4cf33a..ae34eb39c2c 100644 --- a/pkg/ruler/remotequerier_decoder_test.go +++ b/pkg/ruler/remotequerier_decoder_test.go @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: AGPL-3.0-only + package ruler import ( From 770cc5a9b20db5d6d851bc80b0bbb66caea3eb9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Thu, 7 Mar 2024 07:50:10 +0100 Subject: [PATCH 4/4] Update due to review comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- pkg/ruler/remotequerier_decoder_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/ruler/remotequerier_decoder_test.go b/pkg/ruler/remotequerier_decoder_test.go index ae34eb39c2c..1cb7334d5ad 100644 --- a/pkg/ruler/remotequerier_decoder_test.go +++ b/pkg/ruler/remotequerier_decoder_test.go @@ -44,10 +44,13 @@ func TestProtobufDecoderDecodeVectorHistogramMemoryHandling(t *testing.T) { v, err := decoder.decodeVector(vd) require.NoError(t, err) require.Equal(t, 2, len(v)) + // If the encoding takes the address of the loop variable, the value will be 1337. require.Equal(t, float64(3), v[0].H.Count) - // Check that the decoding is zero copy - ph := &(vd.Histograms[0].Histogram) - ph.Count = 42 + // Also check the second histogram. + require.Equal(t, float64(1337), v[1].H.Count) + + // Ensure that the decoding is zero copy for performance reasons. + vd.Histograms[0].Histogram.Count = 42 require.Equal(t, float64(42), v[0].H.Count) }