From 2afddae270947a5713f596559d21e6586eb2b6da Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Fri, 29 Jan 2021 10:50:45 +0100
Subject: [PATCH] Fix a bug whith  metric queries and label_format.

In my previous PR where I introduced hints, I forgot to implement label format label required.
This would cause label format label to be skipped during parsing.

I've also took the opportunity to improve the code.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 pkg/logql/log/fmt.go                |  9 +++-
 pkg/logql/log/fmt_test.go           | 18 ++++++-
 pkg/logql/log/labels.go             | 12 ++---
 pkg/logql/log/metrics_extraction.go | 84 +++++++++++++++++++++++++++--
 pkg/logql/log/parser.go             | 43 ++++-----------
 pkg/logql/log/parser_test.go        |  4 +-
 6 files changed, 122 insertions(+), 48 deletions(-)

diff --git a/pkg/logql/log/fmt.go b/pkg/logql/log/fmt.go
index e30d92a823346..270df15d68e55 100644
--- a/pkg/logql/log/fmt.go
+++ b/pkg/logql/log/fmt.go
@@ -245,7 +245,14 @@ func (lf *LabelsFormatter) Process(l []byte, lbs *LabelsBuilder) ([]byte, bool)
 
 func (lf *LabelsFormatter) RequiredLabelNames() []string {
 	var names []string
-	return names
+	for _, fm := range lf.formats {
+		if fm.Rename {
+			names = append(names, fm.Value)
+			continue
+		}
+		names = append(names, listNodeFields(fm.tmpl.Root)...)
+	}
+	return uniqueString(names)
 }
 
 func trunc(c int, s string) string {
diff --git a/pkg/logql/log/fmt_test.go b/pkg/logql/log/fmt_test.go
index 0e9a3e3a8483c..8903973e65c01 100644
--- a/pkg/logql/log/fmt_test.go
+++ b/pkg/logql/log/fmt_test.go
@@ -264,7 +264,6 @@ func Test_trunc(t *testing.T) {
 }
 
 func Test_substring(t *testing.T) {
-
 	tests := []struct {
 		start int
 		end   int
@@ -309,3 +308,20 @@ func TestLineFormatter_RequiredLabelNames(t *testing.T) {
 		})
 	}
 }
+
+func TestLabelFormatter_RequiredLabelNames(t *testing.T) {
+	tests := []struct {
+		name string
+		fmts []LabelFmt
+		want []string
+	}{
+		{"rename", []LabelFmt{NewRenameLabelFmt("foo", "bar")}, []string{"bar"}},
+		{"rename and fmt", []LabelFmt{NewRenameLabelFmt("fuzz", "bar"), NewTemplateLabelFmt("1", "{{ .foo | ToUpper | .buzz }} and {{.bar}}")}, []string{"bar", "foo", "buzz"}},
+		{"fmt", []LabelFmt{NewTemplateLabelFmt("1", "{{.blip}}"), NewTemplateLabelFmt("2", "{{ .foo | ToUpper | .buzz }} and {{.bar}}")}, []string{"blip", "foo", "buzz", "bar"}},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			require.Equal(t, tt.want, mustNewLabelsFormatter(tt.fmts).RequiredLabelNames())
+		})
+	}
+}
diff --git a/pkg/logql/log/labels.go b/pkg/logql/log/labels.go
index 3d6d2bd833d23..fc6e036f826f5 100644
--- a/pkg/logql/log/labels.go
+++ b/pkg/logql/log/labels.go
@@ -6,9 +6,7 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 )
 
-var (
-	emptyLabelsResult = NewLabelsResult(labels.Labels{}, labels.Labels{}.Hash())
-)
+var emptyLabelsResult = NewLabelsResult(labels.Labels{}, labels.Labels{}.Hash())
 
 // LabelsResult is a computed labels result that contains the labels set with associated string and hash.
 // The is mainly used for caching and returning labels computations out of pipelines and stages.
@@ -68,7 +66,7 @@ type BaseLabelsBuilder struct {
 	err string
 
 	groups            []string
-	parserKeyHints    []string // label key hints for metric queries that allows to limit parser extractions to only this list of labels.
+	parserKeyHints    ParserHint // label key hints for metric queries that allows to limit parser extractions to only this list of labels.
 	without, noLabels bool
 
 	resultCache map[uint64]LabelsResult
@@ -85,7 +83,7 @@ type LabelsBuilder struct {
 }
 
 // NewBaseLabelsBuilderWithGrouping creates a new base labels builder with grouping to compute results.
-func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints []string, without, noLabels bool) *BaseLabelsBuilder {
+func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints ParserHint, without, noLabels bool) *BaseLabelsBuilder {
 	return &BaseLabelsBuilder{
 		del:            make([]string, 0, 5),
 		add:            make([]labels.Label, 0, 16),
@@ -100,7 +98,7 @@ func NewBaseLabelsBuilderWithGrouping(groups []string, parserKeyHints []string,
 
 // NewLabelsBuilder creates a new base labels builder.
 func NewBaseLabelsBuilder() *BaseLabelsBuilder {
-	return NewBaseLabelsBuilderWithGrouping(nil, nil, false, false)
+	return NewBaseLabelsBuilderWithGrouping(nil, noParserHints, false, false)
 }
 
 // ForLabels creates a labels builder for a given labels set as base.
@@ -133,7 +131,7 @@ func (b *LabelsBuilder) Reset() {
 
 // ParserLabelHints returns a limited list of expected labels to extract for metric queries.
 // Returns nil when it's impossible to hint labels extractions.
-func (b *BaseLabelsBuilder) ParserLabelHints() []string {
+func (b *BaseLabelsBuilder) ParserLabelHints() ParserHint {
 	return b.parserKeyHints
 }
 
diff --git a/pkg/logql/log/metrics_extraction.go b/pkg/logql/log/metrics_extraction.go
index 1a5b446fdfbcd..3d95ed4cd0fb1 100644
--- a/pkg/logql/log/metrics_extraction.go
+++ b/pkg/logql/log/metrics_extraction.go
@@ -3,6 +3,7 @@ package log
 import (
 	"sort"
 	"strconv"
+	"strings"
 	"time"
 
 	"github.com/pkg/errors"
@@ -23,6 +24,7 @@ type LineExtractor func([]byte) float64
 var (
 	CountExtractor LineExtractor = func(line []byte) float64 { return 1. }
 	BytesExtractor LineExtractor = func(line []byte) float64 { return float64(len(line)) }
+	noParserHints                = &parserHint{}
 )
 
 // SampleExtractor creates StreamSampleExtractor that can extract samples for a given log stream.
@@ -47,17 +49,18 @@ type lineSampleExtractor struct {
 
 // NewLineSampleExtractor creates a SampleExtractor from a LineExtractor.
 // Multiple log stages are run before converting the log line.
-func NewLineSampleExtractor(ex LineExtractor, stages []Stage, groups []string, without bool, noLabels bool) (SampleExtractor, error) {
+func NewLineSampleExtractor(ex LineExtractor, stages []Stage, groups []string, without, noLabels bool) (SampleExtractor, error) {
 	s := ReduceStages(stages)
 	var expectedLabels []string
 	if !without {
 		expectedLabels = append(expectedLabels, s.RequiredLabelNames()...)
 		expectedLabels = uniqueString(append(expectedLabels, groups...))
 	}
+	hints := newParserHint(expectedLabels, without, noLabels, "")
 	return &lineSampleExtractor{
 		Stage:            s,
 		LineExtractor:    ex,
-		baseBuilder:      NewBaseLabelsBuilderWithGrouping(groups, expectedLabels, without, noLabels),
+		baseBuilder:      NewBaseLabelsBuilderWithGrouping(groups, hints, without, noLabels),
 		streamExtractors: make(map[uint64]StreamSampleExtractor),
 	}, nil
 }
@@ -118,7 +121,7 @@ type labelSampleExtractor struct {
 // to remove sample containing the __error__ label.
 func LabelExtractorWithStages(
 	labelName, conversion string,
-	groups []string, without bool, noLabels bool,
+	groups []string, without, noLabels bool,
 	preStages []Stage,
 	postFilter Stage,
 ) (SampleExtractor, error) {
@@ -147,12 +150,13 @@ func LabelExtractorWithStages(
 		expectedLabels = append(expectedLabels, labelName)
 		expectedLabels = uniqueString(expectedLabels)
 	}
+	hints := newParserHint(expectedLabels, without, noLabels, labelName)
 	return &labelSampleExtractor{
 		preStage:         preStage,
 		conversionFn:     convFn,
 		labelName:        labelName,
 		postFilter:       postFilter,
-		baseBuilder:      NewBaseLabelsBuilderWithGrouping(groups, expectedLabels, without, noLabels),
+		baseBuilder:      NewBaseLabelsBuilderWithGrouping(groups, hints, without, noLabels),
 		streamExtractors: make(map[uint64]StreamSampleExtractor),
 	}, nil
 }
@@ -226,3 +230,75 @@ func convertBytes(v string) (float64, error) {
 	}
 	return float64(b), nil
 }
+
+// ParserHint are hints given to LogQL parsers.
+// This is specially useful for parser that extract implicitly all possible label keys.
+// This is used only within metric queries since it's rare that you need all label keys.
+// For example in the following expression:
+//
+//		sum by (status_code) (rate({app="foo"} | json [5m]))
+//
+// All we need to extract is the status_code in the json parser.
+type ParserHint interface {
+	// Tells is a label with the given key should be extracted.
+	ShouldExtract(key string) bool
+	// Tells if there's any hint that start with the given prefix.
+	// This allows to speed up key searching in nested structured like json.
+	ShouldExtractPrefix(prefix string) bool
+	// Tells if we should not extract any labels.
+	// For example in :
+	//		 sum(rate({app="foo"} | json [5m]))
+	// We don't need to extract any labels from the log line.
+	NoLabels() bool
+}
+
+type parserHint struct {
+	noLabels       bool
+	requiredLabels []string
+}
+
+func (p *parserHint) ShouldExtract(key string) bool {
+	if len(p.requiredLabels) == 0 {
+		return true
+	}
+	for _, l := range p.requiredLabels {
+		if l == key {
+			return true
+		}
+	}
+	return false
+}
+
+func (p *parserHint) ShouldExtractPrefix(prefix string) bool {
+	if len(p.requiredLabels) == 0 {
+		return true
+	}
+	for _, l := range p.requiredLabels {
+		if strings.HasPrefix(l, prefix) {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (p *parserHint) NoLabels() bool {
+	return p.noLabels
+}
+
+// newParserHint creates a new parser hint using the list of labels that are seen and required in a query.
+func newParserHint(requiredLabelNames []string, without, noLabels bool, metricLabelName string) *parserHint {
+	if noLabels {
+		// if we need to extract metric from a label, then use that as required labels list.
+		if metricLabelName != "" {
+			return &parserHint{requiredLabels: []string{metricLabelName}}
+		}
+		return &parserHint{noLabels: true}
+	}
+	// we don't know what is required when a without clause is used.
+	// no hint available
+	if without {
+		return noParserHints
+	}
+	return &parserHint{requiredLabels: requiredLabelNames}
+}
diff --git a/pkg/logql/log/parser.go b/pkg/logql/log/parser.go
index 47bd612c534f7..fe2fe030cb1e6 100644
--- a/pkg/logql/log/parser.go
+++ b/pkg/logql/log/parser.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"io"
 	"regexp"
-	"strings"
 
 	"github.com/grafana/loki/pkg/logql/log/logfmt"
 
@@ -41,6 +40,9 @@ func NewJSONParser() *JSONParser {
 }
 
 func (j *JSONParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {
+	if lbs.ParserLabelHints().NoLabels() {
+		return line, true
+	}
 	it := jsoniter.ConfigFastest.BorrowIterator(line)
 	defer jsoniter.ConfigFastest.ReturnIterator(it)
 
@@ -91,7 +93,7 @@ func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) {
 	// first time we add return the field as prefix.
 	if len(prefix) == 0 {
 		field = sanitizeLabelKey(field, true)
-		if isValidKeyPrefix(field, j.lbs.ParserLabelHints()) {
+		if j.lbs.ParserLabelHints().ShouldExtractPrefix(field) {
 			return field, true
 		}
 		return "", false
@@ -102,31 +104,17 @@ func (j *JSONParser) nextKeyPrefix(prefix, field string) (string, bool) {
 	j.buf = append(j.buf, byte(jsonSpacer))
 	j.buf = append(j.buf, sanitizeLabelKey(field, false)...)
 	// if matches keep going
-	if isValidKeyPrefix(string(j.buf), j.lbs.ParserLabelHints()) {
+	if j.lbs.ParserLabelHints().ShouldExtractPrefix(string(j.buf)) {
 		return string(j.buf), true
 	}
 	return "", false
-
-}
-
-// isValidKeyPrefix extract an object if the prefix is valid
-func isValidKeyPrefix(objectprefix string, hints []string) bool {
-	if len(hints) == 0 {
-		return true
-	}
-	for _, k := range hints {
-		if strings.HasPrefix(k, objectprefix) {
-			return true
-		}
-	}
-	return false
 }
 
 func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field string) {
 	// the first time we use the field as label key.
 	if len(prefix) == 0 {
 		field = sanitizeLabelKey(field, true)
-		if !shouldExtractKey(field, j.lbs.ParserLabelHints()) {
+		if !j.lbs.ParserLabelHints().ShouldExtract(field) {
 			// we can skip the value
 			iter.Skip()
 			return
@@ -147,7 +135,7 @@ func (j *JSONParser) parseLabelValue(iter *jsoniter.Iterator, prefix, field stri
 	if j.lbs.BaseHas(string(j.buf)) {
 		j.buf = append(j.buf, duplicateSuffix...)
 	}
-	if !shouldExtractKey(string(j.buf), j.lbs.ParserLabelHints()) {
+	if !j.lbs.ParserLabelHints().ShouldExtract(string(j.buf)) {
 		iter.Skip()
 		return
 	}
@@ -173,18 +161,6 @@ func readValue(iter *jsoniter.Iterator) string {
 	}
 }
 
-func shouldExtractKey(key string, hints []string) bool {
-	if len(hints) == 0 {
-		return true
-	}
-	for _, k := range hints {
-		if k == key {
-			return true
-		}
-	}
-	return false
-}
-
 func addLabel(lbs *LabelsBuilder, key, value string) {
 	key = sanitizeLabelKey(key, true)
 	if lbs.BaseHas(key) {
@@ -263,9 +239,12 @@ func NewLogfmtParser() *LogfmtParser {
 }
 
 func (l *LogfmtParser) Process(line []byte, lbs *LabelsBuilder) ([]byte, bool) {
+	if lbs.ParserLabelHints().NoLabels() {
+		return line, true
+	}
 	l.dec.Reset(line)
 	for l.dec.ScanKeyval() {
-		if !shouldExtractKey(string(l.dec.Key()), lbs.ParserLabelHints()) {
+		if !lbs.ParserLabelHints().ShouldExtract(string(l.dec.Key())) {
 			continue
 		}
 		key := string(l.dec.Key())
diff --git a/pkg/logql/log/parser_test.go b/pkg/logql/log/parser_test.go
index d2b44940725d6..4847adf78e6ae 100644
--- a/pkg/logql/log/parser_test.go
+++ b/pkg/logql/log/parser_test.go
@@ -88,7 +88,6 @@ func Test_jsonParser_Parse(t *testing.T) {
 }
 
 func Benchmark_Parser(b *testing.B) {
-
 	lbs := labels.Labels{
 		{Name: "cluster", Value: "qa-us-central1"},
 		{Name: "namespace", Value: "qa"},
@@ -127,7 +126,7 @@ func Benchmark_Parser(b *testing.B) {
 
 			b.Run("labels hints", func(b *testing.B) {
 				builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash())
-				builder.parserKeyHints = tt.LabelParseHints
+				builder.parserKeyHints = newParserHint(tt.LabelParseHints, false, false, "")
 				for n := 0; n < b.N; n++ {
 					builder.Reset()
 					_, _ = tt.s.Process(line, builder)
@@ -135,7 +134,6 @@ func Benchmark_Parser(b *testing.B) {
 			})
 		})
 	}
-
 }
 
 func TestNewRegexpParser(t *testing.T) {