From 2afddae270947a5713f596559d21e6586eb2b6da Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Fri, 29 Jan 2021 10:50:45 +0100 Subject: [PATCH] Fix a bug whith metric queries and label_format. In my previous PR where I introduced hints, I forgot to implement label format label required. This would cause label format label to be skipped during parsing. I've also took the opportunity to improve the code. Signed-off-by: Cyril Tovena --- pkg/logql/log/fmt.go | 9 +++- pkg/logql/log/fmt_test.go | 18 ++++++- pkg/logql/log/labels.go | 12 ++--- pkg/logql/log/metrics_extraction.go | 84 +++++++++++++++++++++++++++-- pkg/logql/log/parser.go | 43 ++++----------- pkg/logql/log/parser_test.go | 4 +- 6 files changed, 122 insertions(+), 48 deletions(-) diff --git a/pkg/logql/log/fmt.go b/pkg/logql/log/fmt.go index e30d92a823346..270df15d68e55 100644 --- a/pkg/logql/log/fmt.go +++ b/pkg/logql/log/fmt.go @@ -245,7 +245,14 @@ func (lf *LabelsFormatter) Process(l []byte, lbs *LabelsBuilder) ([]byte, bool) func (lf *LabelsFormatter) RequiredLabelNames() []string { var names []string - return names + for _, fm := range lf.formats { + if fm.Rename { + names = append(names, fm.Value) + continue + } + names = append(names, listNodeFields(fm.tmpl.Root)...) + } + return uniqueString(names) } func trunc(c int, s string) string { diff --git a/pkg/logql/log/fmt_test.go b/pkg/logql/log/fmt_test.go index 0e9a3e3a8483c..8903973e65c01 100644 --- a/pkg/logql/log/fmt_test.go +++ b/pkg/logql/log/fmt_test.go @@ -264,7 +264,6 @@ func Test_trunc(t *testing.T) { } func Test_substring(t *testing.T) { - tests := []struct { start int end int @@ -309,3 +308,20 @@ func TestLineFormatter_RequiredLabelNames(t *testing.T) { }) } } + +func TestLabelFormatter_RequiredLabelNames(t *testing.T) { + tests := []struct { + name string + fmts []LabelFmt + want []string + }{ + {"rename", []LabelFmt{NewRenameLabelFmt("foo", "bar")}, []string{"bar"}}, + {"rename and fmt", []LabelFmt{NewRenameLabelFmt("fuzz", "bar"), NewTemplateLabelFmt("1", "{{ .foo | ToUpper | .buzz }} and {{.bar}}")}, []string{"bar", "foo", "buzz"}}, + {"fmt", []LabelFmt{NewTemplateLabelFmt("1", "{{.blip}}"), NewTemplateLabelFmt("2", "{{ .foo | ToUpper | .buzz }} and {{.bar}}")}, []string{"blip", "foo", "buzz", "bar"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.want, mustNewLabelsFormatter(tt.fmts).RequiredLabelNames()) + }) + } +} diff --git a/pkg/logql/log/labels.go b/pkg/logql/log/labels.go index 3d6d2bd833d23..fc6e036f826f5 100644 --- a/pkg/logql/log/labels.go +++ b/pkg/logql/log/labels.go @@ -6,9 +6,7 @@ import ( "github.com/prometheus/prometheus/pkg/labels" ) -var ( - emptyLabelsResult = NewLabelsResult(labels.Labels{}, labels.Labels{}.Hash()) -) +var emptyLabelsResult = NewLabelsResult(labels.Labels{}, labels.Labels{}.Hash()) // LabelsResult is a computed labels result that contains the labels set with associated string and hash. // The is mainly used for caching and returning labels computations out of pipelines and stages. @@ -68,7 +66,7 @@ type BaseLabelsBuilder struct { err string groups []string - parserKeyHints []string // label key hints for metric queries that allows to limit parser extractions to only this list of labels. + parserKeyHints ParserHint // label key hints for metric queries that allows to limit parser extractions to only this list of labels. without, noLabels bool resultCache map[uint64]LabelsResult @@ -85,7 +83,7 @@ type LabelsBuilder struct { } // NewBaseLabelsBuilderWithGrouping creates a new base labels builder with grouping to compute results. -func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints []string, without, noLabels bool) *BaseLabelsBuilder { +func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints ParserHint, without, noLabels bool) *BaseLabelsBuilder { return &BaseLabelsBuilder{ del: make([]string, 0, 5), add: make([]labels.Label, 0, 16), @@ -100,7 +98,7 @@ func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints []string, // NewLabelsBuilder creates a new base labels builder. func NewBaseLabelsBuilder() *BaseLabelsBuilder { - return NewBaseLabelsBuilderWithGrouping(nil, nil, false, false) + return NewBaseLabelsBuilderWithGrouping(nil, noParserHints, false, false) } // ForLabels creates a labels builder for a given labels set as base. @@ -133,7 +131,7 @@ func (b *LabelsBuilder) Reset() { // ParserLabelHints returns a limited list of expected labels to extract for metric queries. // Returns nil when it's impossible to hint labels extractions. -func (b *BaseLabelsBuilder) ParserLabelHints() []string { +func (b *BaseLabelsBuilder) ParserLabelHints() ParserHint { return b.parserKeyHints } diff --git a/pkg/logql/log/metrics_extraction.go b/pkg/logql/log/metrics_extraction.go index 1a5b446fdfbcd..3d95ed4cd0fb1 100644 --- a/pkg/logql/log/metrics_extraction.go +++ b/pkg/logql/log/metrics_extraction.go @@ -3,6 +3,7 @@ package log import ( "sort" "strconv" + "strings" "time" "github.com/pkg/errors" @@ -23,6 +24,7 @@ type LineExtractor func([]byte) float64 var ( CountExtractor LineExtractor = func(line []byte) float64 { return 1. } BytesExtractor LineExtractor = func(line []byte) float64 { return float64(len(line)) } + noParserHints = &parserHint{} ) // SampleExtractor creates StreamSampleExtractor that can extract samples for a given log stream. @@ -47,17 +49,18 @@ type lineSampleExtractor struct { // NewLineSampleExtractor creates a SampleExtractor from a LineExtractor. // Multiple log stages are run before converting the log line. -func NewLineSampleExtractor(ex LineExtractor, stages []Stage, groups []string, without bool, noLabels bool) (SampleExtractor, error) { +func NewLineSampleExtractor(ex LineExtractor, stages []Stage, groups []string, without, noLabels bool) (SampleExtractor, error) { s := ReduceStages(stages) var expectedLabels []string if !without { expectedLabels = append(expectedLabels, s.RequiredLabelNames()...) expectedLabels = uniqueString(append(expectedLabels, groups...)) } + hints := newParserHint(expectedLabels, without, noLabels, "") return &lineSampleExtractor{ Stage: s, LineExtractor: ex, - baseBuilder: NewBaseLabelsBuilderWithGrouping(groups, expectedLabels, without, noLabels), + baseBuilder: NewBaseLabelsBuilderWithGrouping(groups, hints, without, noLabels), streamExtractors: make(map[uint64]StreamSampleExtractor), }, nil } @@ -118,7 +121,7 @@ type labelSampleExtractor struct { // to remove sample containing the __error__ label. func LabelExtractorWithStages( labelName, conversion string, - groups []string, without bool, noLabels bool, + groups []string, without, noLabels bool, preStages []Stage, postFilter Stage, ) (SampleExtractor, error) { @@ -147,12 +150,13 @@ func LabelExtractorWithStages( expectedLabels = append(expectedLabels, labelName) expectedLabels = uniqueString(expectedLabels) } + hints := newParserHint(expectedLabels, without, noLabels, labelName) return &labelSampleExtractor{ preStage: preStage, conversionFn: convFn, labelName: labelName, postFilter: postFilter, - baseBuilder: NewBaseLabelsBuilderWithGrouping(groups, expectedLabels, without, noLabels), + baseBuilder: NewBaseLabelsBuilderWithGrouping(groups, hints, without, noLabels), streamExtractors: make(map[uint64]StreamSampleExtractor), }, nil } @@ -226,3 +230,75 @@ func convertBytes(v string) (float64, error) { } return float64(b), nil } + +// ParserHint are hints given to LogQL parsers. +// This is specially useful for parser that extract implicitly all possible label keys. +// This is used only within metric queries since it's rare that you need all label keys. +// For example in the following expression: +// +// sum by (status_code) (rate({app="foo"} | json [5m])) +// +// All we need to extract is the status_code in the json parser. +type ParserHint interface { + // Tells is a label with the given key should be extracted. + ShouldExtract(key string) bool + // Tells if there's any hint that start with the given prefix. + // This allows to speed up key searching in nested structured like json. + ShouldExtractPrefix(prefix string) bool + // Tells if we should not extract any labels. + // For example in : + // sum(rate({app="foo"} | json [5m])) + // We don't need to extract any labels from the log line. + NoLabels() bool +} + +type parserHint struct { + noLabels bool + requiredLabels []string +} + +func (p *parserHint) ShouldExtract(key string) bool { + if len(p.requiredLabels) == 0 { + return true + } + for _, l := range p.requiredLabels { + if l == key { + return true + } + } + return false +} + +func (p *parserHint) ShouldExtractPrefix(prefix string) bool { + if len(p.requiredLabels) == 0 { + return true + } + for _, l := range p.requiredLabels { + if strings.HasPrefix(l, prefix) { + return true + } + } + + return false +} + +func (p *parserHint) NoLabels() bool { + return p.noLabels +} + +// newParserHint creates a new parser hint using the list of labels that are seen and required in a query. +func newParserHint(requiredLabelNames []string, without, noLabels bool, metricLabelName string) *parserHint { + if noLabels { + // if we need to extract metric from a label, then use that as required labels list. + if metricLabelName != "" { + return &parserHint{requiredLabels: []string{metricLabelName}} + } + return &parserHint{noLabels: true} + } + // we don't know what is required when a without clause is used. + // no hint available + if without { + return noParserHints + } + return &parserHint{requiredLabels: requiredLabelNames} +} diff --git a/pkg/logql/log/parser.go b/pkg/logql/log/parser.go index 47bd612c534f7..fe2fe030cb1e6 100644 --- a/pkg/logql/log/parser.go +++ b/pkg/logql/log/parser.go @@ -5,7 +5,6 @@ import ( "fmt" "io" "regexp" - "strings" "github.com/grafana/loki/pkg/logql/log/logfmt" @@ -41,6 +40,9 @@ func NewJSONParser() *JSONParser { } func (j *JSONParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) { + if lbs.ParserLabelHints().NoLabels() { + return line, true + } it := jsoniter.ConfigFastest.BorrowIterator(line) defer jsoniter.ConfigFastest.ReturnIterator(it) @@ -91,7 +93,7 @@ func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) { // first time we add return the field as prefix. if len(prefix) == 0 { field = sanitizeLabelKey(field, true) - if isValidKeyPrefix(field, j.lbs.ParserLabelHints()) { + if j.lbs.ParserLabelHints().ShouldExtractPrefix(field) { return field, true } return "", false @@ -102,31 +104,17 @@ func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) { j.buf = append(j.buf, byte(jsonSpacer)) j.buf = append(j.buf, sanitizeLabelKey(field, false)...) // if matches keep going - if isValidKeyPrefix(string(j.buf), j.lbs.ParserLabelHints()) { + if j.lbs.ParserLabelHints().ShouldExtractPrefix(string(j.buf)) { return string(j.buf), true } return "", false - -} - -// isValidKeyPrefix extract an object if the prefix is valid -func isValidKeyPrefix(objectprefix string, hints []string) bool { - if len(hints) == 0 { - return true - } - for _, k := range hints { - if strings.HasPrefix(k, objectprefix) { - return true - } - } - return false } func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field string) { // the first time we use the field as label key. if len(prefix) == 0 { field = sanitizeLabelKey(field, true) - if !shouldExtractKey(field, j.lbs.ParserLabelHints()) { + if !j.lbs.ParserLabelHints().ShouldExtract(field) { // we can skip the value iter.Skip() return @@ -147,7 +135,7 @@ func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field stri if j.lbs.BaseHas(string(j.buf)) { j.buf = append(j.buf, duplicateSuffix...) } - if !shouldExtractKey(string(j.buf), j.lbs.ParserLabelHints()) { + if !j.lbs.ParserLabelHints().ShouldExtract(string(j.buf)) { iter.Skip() return } @@ -173,18 +161,6 @@ func readValue(iter *jsoniter.Iterator) string { } } -func shouldExtractKey(key string, hints []string) bool { - if len(hints) == 0 { - return true - } - for _, k := range hints { - if k == key { - return true - } - } - return false -} - func addLabel(lbs *LabelsBuilder, key, value string) { key = sanitizeLabelKey(key, true) if lbs.BaseHas(key) { @@ -263,9 +239,12 @@ func NewLogfmtParser() *LogfmtParser { } func (l *LogfmtParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) { + if lbs.ParserLabelHints().NoLabels() { + return line, true + } l.dec.Reset(line) for l.dec.ScanKeyval() { - if !shouldExtractKey(string(l.dec.Key()), lbs.ParserLabelHints()) { + if !lbs.ParserLabelHints().ShouldExtract(string(l.dec.Key())) { continue } key := string(l.dec.Key()) diff --git a/pkg/logql/log/parser_test.go b/pkg/logql/log/parser_test.go index d2b44940725d6..4847adf78e6ae 100644 --- a/pkg/logql/log/parser_test.go +++ b/pkg/logql/log/parser_test.go @@ -88,7 +88,6 @@ func Test_jsonParser_Parse(t *testing.T) { } func Benchmark_Parser(b *testing.B) { - lbs := labels.Labels{ {Name: "cluster", Value: "qa-us-central1"}, {Name: "namespace", Value: "qa"}, @@ -127,7 +126,7 @@ func Benchmark_Parser(b *testing.B) { b.Run("labels hints", func(b *testing.B) { builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash()) - builder.parserKeyHints = tt.LabelParseHints + builder.parserKeyHints = newParserHint(tt.LabelParseHints, false, false, "") for n := 0; n < b.N; n++ { builder.Reset() _, _ = tt.s.Process(line, builder) @@ -135,7 +134,6 @@ func Benchmark_Parser(b *testing.B) { }) }) } - } func TestNewRegexpParser(t *testing.T) {