diff --git a/CHANGELOG.md b/CHANGELOG.md index 588e4429ce4..7fcdced4a16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -119,6 +119,7 @@ * [BUGFIX] Ruler: fix regression that caused client errors to be tracked in `cortex_ruler_write_requests_failed_total` metric. #7472 * [BUGFIX] promql: Fix Range selectors with an @ modifier are wrongly scoped in range queries. #7475 * [BUGFIX] Fix metadata API using wrong JSON field names. #7475 +* [BUGFIX] Ruler: fix native histogram recording rule result corruption. #7552 ### Mixin diff --git a/pkg/mimirpb/query_response_extra_test.go b/pkg/mimirpb/query_response_extra_test.go index 5795f969b59..7b112537146 100644 --- a/pkg/mimirpb/query_response_extra_test.go +++ b/pkg/mimirpb/query_response_extra_test.go @@ -79,3 +79,35 @@ func extractPrometheusStrings(t *testing.T, constantType string) []string { func TestFloatHistogramProtobufTypeRemainsInSyncWithPrometheus(t *testing.T) { test.RequireSameShape(t, histogram.FloatHistogram{}, FloatHistogram{}, false) } + +// This example is from an investigation into a bug in the ruler. Keeping it here for future reference. +func TestFloatHistogramProtobufToPrometheus(t *testing.T) { + fh := &FloatHistogram{ + CounterResetHint: 3, + Schema: 3, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 0, + Count: 6.844444444444443, + Sum: 0.00872226031111156, + PositiveSpans: []BucketSpan{{-134, 10}, {1, 2}, {2, 4}, {2, 2}, {5, 1}, {43, 1}, {3, 2}, {3, 1}, {21, 1}, {1, 2}, {4, 1}}, + NegativeSpans: nil, + PositiveBuckets: []float64{0.02222222222222222, 0.9999999999999998, 1.2, 1.0666666666666667, 0.9777777777777776, 1.1777777777777776, 0.42222222222222217, 0.2444444444444444, 0.1333333333333333, 0.17777777777777776, 0.04444444444444444, 0.02222222222222222, 0.04444444444444444, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222}, + NegativeBuckets: nil, + } + expectedModelh := &histogram.FloatHistogram{ + CounterResetHint: histogram.GaugeType, + Schema: 3, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 0, + Count: 6.844444444444443, + Sum: 0.00872226031111156, + PositiveSpans: []histogram.Span{{Offset: -134, Length: 10}, {Offset: 1, Length: 2}, {Offset: 2, Length: 4}, {Offset: 2, Length: 2}, {Offset: 5, Length: 1}, {Offset: 43, Length: 1}, {Offset: 3, Length: 2}, {Offset: 3, Length: 1}, {Offset: 21, Length: 1}, {Offset: 1, Length: 2}, {Offset: 4, Length: 1}}, + NegativeSpans: nil, + PositiveBuckets: []float64{0.02222222222222222, 0.9999999999999998, 1.2, 1.0666666666666667, 0.9777777777777776, 1.1777777777777776, 0.42222222222222217, 0.2444444444444444, 0.1333333333333333, 0.17777777777777776, 0.04444444444444444, 0.02222222222222222, 0.04444444444444444, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222, 0.02222222222222222}, + NegativeBuckets: nil, + } + + modelh := fh.ToPrometheusModel() + + require.Equal(t, expectedModelh, modelh) +} diff --git a/pkg/ruler/remotequerier_decoder.go b/pkg/ruler/remotequerier_decoder.go index 5569bc91570..f78178115d8 100644 --- a/pkg/ruler/remotequerier_decoder.go +++ b/pkg/ruler/remotequerier_decoder.go @@ -156,8 +156,9 @@ func (d protobufDecoder) decodeVector(v *mimirpb.VectorData) (promql.Vector, err samples[floatSampleCount+i] = promql.Sample{ Metric: m, - H: s.Histogram.ToPrometheusModel(), - T: s.TimestampMs, + // We must not use the address of the loop variable as that's reused. + H: (&(v.Histograms[i].Histogram)).ToPrometheusModel(), + T: s.TimestampMs, } } diff --git a/pkg/ruler/remotequerier_decoder_test.go b/pkg/ruler/remotequerier_decoder_test.go new file mode 100644 index 00000000000..1cb7334d5ad --- /dev/null +++ b/pkg/ruler/remotequerier_decoder_test.go @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package ruler + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/grafana/mimir/pkg/mimirpb" +) + +// Regression test for a bug where we were copying the loop variable address. +func TestProtobufDecoderDecodeVectorHistogramMemoryHandling(t *testing.T) { + vd := &mimirpb.VectorData{ + Histograms: []mimirpb.VectorHistogram{ + { + Metric: []string{"__name__", "test"}, + Histogram: mimirpb.FloatHistogram{ + CounterResetHint: 3, + Schema: 3, + ZeroThreshold: 0.1, + ZeroCount: 0, + Count: 3, + Sum: 3, + }, + TimestampMs: 123, + }, + { + Metric: []string{"__name__", "test"}, + Histogram: mimirpb.FloatHistogram{ + CounterResetHint: 3, + Schema: 3, + ZeroThreshold: 0.1, + ZeroCount: 0, + Count: 1337, + Sum: 3, + }, + TimestampMs: 123, + }, + }, + } + decoder := protobufDecoder{} + v, err := decoder.decodeVector(vd) + require.NoError(t, err) + require.Equal(t, 2, len(v)) + // If the encoding takes the address of the loop variable, the value will be 1337. + require.Equal(t, float64(3), v[0].H.Count) + + // Also check the second histogram. + require.Equal(t, float64(1337), v[1].H.Count) + + // Ensure that the decoding is zero copy for performance reasons. + vd.Histograms[0].Histogram.Count = 42 + require.Equal(t, float64(42), v[0].H.Count) +}