Skip to content

Commit

Permalink
[TraceQL] Fix subtly incorrect handling of second pass conditions (#3734
Browse files Browse the repository at this point in the history
)

* Fix handling of second pass conditions for non-existent attributes which were unintentionally having an existence check on them

* cleanup commented out portion

* changelog

* cleanup duplicate clause
  • Loading branch information
mdisibio authored May 31, 2024
1 parent f0554c6 commit 4e85a50
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 133 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* [ENHANCEMENT] Tag value lookup use protobuf internally for improved latency [#3731](https://github.com/grafana/tempo/pull/3731) (@mdisibio)
* [ENHANCEMENT] Improve use of OTEL semantic conventions on the service graph [#3711](https://github.com/grafana/tempo/pull/3711) (@zalegrala)
* [ENHANCEMENT] Performance improvement for `rate() by ()` queries [#3719](https://github.com/grafana/tempo/pull/3719) (@mapno)
* [BUGFIX] Fix metrics queries when grouping by attributes that may not exist [#3734](https://github.com/grafana/tempo/pull/3734) (@mdisibio)

## v2.5.0-rc.1

Expand Down
48 changes: 7 additions & 41 deletions tempodb/encoding/vparquet2/block_traceql.go
Original file line number Diff line number Diff line change
Expand Up @@ -1255,31 +1255,22 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c
// matched upstream to a resource.
// TODO - After introducing AllConditions it seems like some of this logic overlaps.
// Determine if it can be generalized or simplified.
var (
// If there are only span conditions, then don't return a span upstream
// unless it matches at least 1 span-level condition.
spanRequireAtLeastOneMatch = len(spanConditions) > 0 && len(resourceConditions) == 0 && len(traceConditions) == 0

// If there are only resource conditions, then don't return a resource upstream
// unless it matches at least 1 resource-level condition.
batchRequireAtLeastOneMatch = len(spanConditions) == 0 && len(resourceConditions) > 0 && len(traceConditions) == 0

// Don't return the final spanset upstream unless it matched at least 1 condition
// anywhere, except in the case of the empty query: {}
batchRequireAtLeastOneMatchOverall = len(conds) > 0 && len(traceConditions) == 0 && len(traceConditions) == 0
)
// Don't return the final spanset upstream unless it matched at least 1 condition
// anywhere, except in the case of the empty query: {}
batchRequireAtLeastOneMatchOverall := len(conds) > 0 && len(traceConditions) == 0

// Optimization for queries like {resource.x... && span.y ...}
// Requires no mingled scopes like .foo=x, which could be satisfied
// one either resource or span.
allConditions = allConditions && !mingledConditions

spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, spanRequireAtLeastOneMatch, allConditions)
spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, allConditions)
if err != nil {
return nil, fmt.Errorf("creating span iterator: %w", err)
}

resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatch, batchRequireAtLeastOneMatchOverall, allConditions)
resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatchOverall, allConditions)
if err != nil {
return nil, fmt.Errorf("creating resource iterator: %w", err)
}
Expand All @@ -1289,7 +1280,7 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c

// createSpanIterator iterates through all span-level columns, groups them into rows representing
// one span each. Spans are returned that match any of the given conditions.
func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool) (parquetquery.Iterator, error) {
func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, allConditions bool) (parquetquery.Iterator, error) {
var (
columnSelectAs = map[string]string{}
columnPredicates = map[string][]parquetquery.Predicate{}
Expand Down Expand Up @@ -1432,9 +1423,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
}

minCount := 0
if requireAtLeastOneMatch {
minCount = 1
}
if allConditions {
// The final number of expected attributes.
distinct := map[string]struct{}{}
Expand All @@ -1454,16 +1442,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
iters = nil
}

// This is an optimization for cases when allConditions is false, and
// only span conditions are present, and we require at least one of them to match.
// Wrap up the individual conditions with a union and move it into the required list.
// This skips over static columns like ID that are omnipresent. This is also only
// possible when there isn't a duration filter because it's computed from start/end.
if requireAtLeastOneMatch && len(iters) > 0 {
required = append(required, unionIfNeeded(DefinitionLevelResourceSpansILSSpan, iters, nil))
iters = nil
}

// if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
// b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed
// how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here
Expand All @@ -1484,7 +1462,7 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
// createResourceIterator iterates through all resourcespans-level (batch-level) columns, groups them into rows representing
// one batch each. It builds on top of the span iterator, and turns the groups of spans and resource-level values into
// spansets. Spansets are returned that match any of the given conditions.
func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, requireAtLeastOneMatchOverall, allConditions bool) (parquetquery.Iterator, error) {
func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatchOverall, allConditions bool) (parquetquery.Iterator, error) {
var (
columnSelectAs = map[string]string{}
columnPredicates = map[string][]parquetquery.Predicate{}
Expand Down Expand Up @@ -1535,9 +1513,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
}

minCount := 0
if requireAtLeastOneMatch {
minCount = 1
}
if allConditions {
// The final number of expected attributes
distinct := map[string]struct{}{}
Expand All @@ -1557,15 +1532,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
iters = nil
}

// This is an optimization for cases when only resource conditions are
// present and we require at least one of them to match. Wrap
// up the individual conditions with a union and move it into the
// required list.
if requireAtLeastOneMatch && len(iters) > 0 {
required = append(required, unionIfNeeded(DefinitionLevelResourceSpans, iters, nil))
iters = nil
}

// Put span iterator last so it is only read when
// the resource conditions are met.
required = append(required, spanIterator)
Expand Down
48 changes: 7 additions & 41 deletions tempodb/encoding/vparquet3/block_traceql.go
Original file line number Diff line number Diff line change
Expand Up @@ -1406,31 +1406,22 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c
// matched upstream to a resource.
// TODO - After introducing AllConditions it seems like some of this logic overlaps.
// Determine if it can be generalized or simplified.
var (
// If there are only span conditions, then don't return a span upstream
// unless it matches at least 1 span-level condition.
spanRequireAtLeastOneMatch = len(spanConditions) > 0 && len(resourceConditions) == 0 && len(traceConditions) == 0

// If there are only resource conditions, then don't return a resource upstream
// unless it matches at least 1 resource-level condition.
batchRequireAtLeastOneMatch = len(spanConditions) == 0 && len(resourceConditions) > 0 && len(traceConditions) == 0

// Don't return the final spanset upstream unless it matched at least 1 condition
// anywhere, except in the case of the empty query: {}
batchRequireAtLeastOneMatchOverall = len(conds) > 0 && len(traceConditions) == 0 && len(traceConditions) == 0
)
// Don't return the final spanset upstream unless it matched at least 1 condition
// anywhere, except in the case of the empty query: {}
batchRequireAtLeastOneMatchOverall := len(conds) > 0 && len(traceConditions) == 0

// Optimization for queries like {resource.x... && span.y ...}
// Requires no mingled scopes like .foo=x, which could be satisfied
// one either resource or span.
allConditions = allConditions && !mingledConditions

spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, spanRequireAtLeastOneMatch, allConditions, dc)
spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, allConditions, dc)
if err != nil {
return nil, fmt.Errorf("creating span iterator: %w", err)
}

resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatch, batchRequireAtLeastOneMatchOverall, allConditions, dc)
resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatchOverall, allConditions, dc)
if err != nil {
return nil, fmt.Errorf("creating resource iterator: %w", err)
}
Expand All @@ -1440,7 +1431,7 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c

// createSpanIterator iterates through all span-level columns, groups them into rows representing
// one span each. Spans are returned that match any of the given conditions.
func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) {
func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) {
var (
columnSelectAs = map[string]string{}
columnPredicates = map[string][]parquetquery.Predicate{}
Expand Down Expand Up @@ -1649,9 +1640,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
}

minCount := 0
if requireAtLeastOneMatch {
minCount = 1
}
if allConditions {
// The final number of expected attributes.
distinct := map[string]struct{}{}
Expand All @@ -1675,16 +1663,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
iters = nil
}

// This is an optimization for cases when allConditions is false, and
// only span conditions are present, and we require at least one of them to match.
// Wrap up the individual conditions with a union and move it into the required list.
// This skips over static columns like ID that are omnipresent. This is also only
// possible when there isn't a duration filter because it's computed from start/end.
if requireAtLeastOneMatch && len(iters) > 0 {
required = append(required, unionIfNeeded(DefinitionLevelResourceSpansILSSpan, iters, nil))
iters = nil
}

// if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
// b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed
// how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here
Expand All @@ -1705,7 +1683,7 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
// createResourceIterator iterates through all resourcespans-level (batch-level) columns, groups them into rows representing
// one batch each. It builds on top of the span iterator, and turns the groups of spans and resource-level values into
// spansets. Spansets are returned that match any of the given conditions.
func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, requireAtLeastOneMatchOverall, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) {
func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatchOverall, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) {
var (
columnSelectAs = map[string]string{}
columnPredicates = map[string][]parquetquery.Predicate{}
Expand Down Expand Up @@ -1778,9 +1756,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
}

minCount := 0
if requireAtLeastOneMatch {
minCount = 1
}
if allConditions {
// The final number of expected attributes
distinct := map[string]struct{}{}
Expand All @@ -1800,15 +1775,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
iters = nil
}

// This is an optimization for cases when only resource conditions are
// present and we require at least one of them to match. Wrap
// up the individual conditions with a union and move it into the
// required list.
if requireAtLeastOneMatch && len(iters) > 0 {
required = append(required, unionIfNeeded(DefinitionLevelResourceSpans, iters, nil))
iters = nil
}

// Put span iterator last so it is only read when
// the resource conditions are met.
required = append(required, spanIterator)
Expand Down
21 changes: 11 additions & 10 deletions tempodb/encoding/vparquet3/block_traceql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -597,12 +597,12 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
ctx := context.TODO()
tenantID := "1"
// blockID := uuid.MustParse("00000c2f-8133-4a60-a62a-7748bd146938")
// blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
// blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")

r, _, _, err := local.New(&local.Config{
Path: path.Join("/Users/joe/testblock/"),
// Path: path.Join("/Users/marty/src/tmp"),
// Path: path.Join("/Users/joe/testblock/"),
Path: path.Join("/Users/marty/src/tmp"),
// Path: path.Join("/Users/mapno/workspace/testblock"),
})
require.NoError(b, err)
Expand Down Expand Up @@ -703,6 +703,8 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
"{} | rate() by (name)",
"{} | rate() by (resource.service.name)",
"{} | rate() by (span.http.url)", // High cardinality attribute
"{} | rate() by (span.foo)", // Nonexistent, all spans will be in the nil series
"{} | rate() by (resource.foo)", // Nonexistent, all spans will be in the nil series
"{resource.service.name=`loki-ingester`} | rate()",
"{status=error} | rate()",
}
Expand All @@ -712,17 +714,16 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
e = traceql.NewEngine()
opts = common.DefaultSearchOptions()
tenantID = "1"
// blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
// blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
// blockID = uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
// path = "/Users/joe/testblock/"
path = "/Users/mapno/workspace/testblock"
// path = "/Users/mapno/workspace/testblock"
path = "/Users/marty/src/tmp"
)

r, _, _, err := local.New(&local.Config{
Path: path,
// Path: path.Join("/Users/marty/src/tmp"),
// Path: path.Join("/Users/mapno/workspace/testblock"),
})
require.NoError(b, err)

Expand All @@ -741,7 +742,7 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {

for _, tc := range testCases {
b.Run(tc, func(b *testing.B) {
for _, minutes := range []int{5, 7} {
for _, minutes := range []int{3} {
b.Run(strconv.Itoa(minutes), func(b *testing.B) {
st := meta.StartTime
end := st.Add(time.Duration(minutes) * time.Minute)
Expand Down
Loading

0 comments on commit 4e85a50

Please sign in to comment.