diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d7f01b8228..e779298f666 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * [FEATURE] Flush blocks to storage from the metrics-generator [#3628](https://github.com/grafana/tempo/pull/3628) [#3691](https://github.com/grafana/tempo/pull/3691) (@mapno) * [ENHANCEMENT] Improve use of OTEL semantic conventions on the service graph [#3711](https://github.com/grafana/tempo/pull/3711) (@zalegrala) * [ENHANCEMENT] Performance improvement for `rate() by ()` queries [#3719](https://github.com/grafana/tempo/pull/3719) (@mapno) +* [BUGFIX] Fix metrics queries when grouping by attributes that may not exist [#3734](https://github.com/grafana/tempo/pull/3734) (@mdisibio) ## v2.5.0-rc.1 diff --git a/tempodb/encoding/vparquet2/block_traceql.go b/tempodb/encoding/vparquet2/block_traceql.go index 5f3149aafd3..679f9484de2 100644 --- a/tempodb/encoding/vparquet2/block_traceql.go +++ b/tempodb/encoding/vparquet2/block_traceql.go @@ -1255,31 +1255,22 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c // matched upstream to a resource. // TODO - After introducing AllConditions it seems like some of this logic overlaps. // Determine if it can be generalized or simplified. - var ( - // If there are only span conditions, then don't return a span upstream - // unless it matches at least 1 span-level condition. - spanRequireAtLeastOneMatch = len(spanConditions) > 0 && len(resourceConditions) == 0 && len(traceConditions) == 0 - - // If there are only resource conditions, then don't return a resource upstream - // unless it matches at least 1 resource-level condition. - batchRequireAtLeastOneMatch = len(spanConditions) == 0 && len(resourceConditions) > 0 && len(traceConditions) == 0 - // Don't return the final spanset upstream unless it matched at least 1 condition - // anywhere, except in the case of the empty query: {} - batchRequireAtLeastOneMatchOverall = len(conds) > 0 && len(traceConditions) == 0 && len(traceConditions) == 0 - ) + // Don't return the final spanset upstream unless it matched at least 1 condition + // anywhere, except in the case of the empty query: {} + batchRequireAtLeastOneMatchOverall := len(conds) > 0 && len(traceConditions) == 0 // Optimization for queries like {resource.x... && span.y ...} // Requires no mingled scopes like .foo=x, which could be satisfied // one either resource or span. allConditions = allConditions && !mingledConditions - spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, spanRequireAtLeastOneMatch, allConditions) + spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, allConditions) if err != nil { return nil, fmt.Errorf("creating span iterator: %w", err) } - resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatch, batchRequireAtLeastOneMatchOverall, allConditions) + resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatchOverall, allConditions) if err != nil { return nil, fmt.Errorf("creating resource iterator: %w", err) } @@ -1289,7 +1280,7 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c // createSpanIterator iterates through all span-level columns, groups them into rows representing // one span each. Spans are returned that match any of the given conditions. -func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool) (parquetquery.Iterator, error) { +func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, allConditions bool) (parquetquery.Iterator, error) { var ( columnSelectAs = map[string]string{} columnPredicates = map[string][]parquetquery.Predicate{} @@ -1432,9 +1423,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, } minCount := 0 - if requireAtLeastOneMatch { - minCount = 1 - } if allConditions { // The final number of expected attributes. distinct := map[string]struct{}{} @@ -1454,16 +1442,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, iters = nil } - // This is an optimization for cases when allConditions is false, and - // only span conditions are present, and we require at least one of them to match. - // Wrap up the individual conditions with a union and move it into the required list. - // This skips over static columns like ID that are omnipresent. This is also only - // possible when there isn't a duration filter because it's computed from start/end. - if requireAtLeastOneMatch && len(iters) > 0 { - required = append(required, unionIfNeeded(DefinitionLevelResourceSpansILSSpan, iters, nil)) - iters = nil - } - // if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column // b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed // how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here @@ -1484,7 +1462,7 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, // createResourceIterator iterates through all resourcespans-level (batch-level) columns, groups them into rows representing // one batch each. It builds on top of the span iterator, and turns the groups of spans and resource-level values into // spansets. Spansets are returned that match any of the given conditions. -func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, requireAtLeastOneMatchOverall, allConditions bool) (parquetquery.Iterator, error) { +func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatchOverall, allConditions bool) (parquetquery.Iterator, error) { var ( columnSelectAs = map[string]string{} columnPredicates = map[string][]parquetquery.Predicate{} @@ -1535,9 +1513,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera } minCount := 0 - if requireAtLeastOneMatch { - minCount = 1 - } if allConditions { // The final number of expected attributes distinct := map[string]struct{}{} @@ -1557,15 +1532,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera iters = nil } - // This is an optimization for cases when only resource conditions are - // present and we require at least one of them to match. Wrap - // up the individual conditions with a union and move it into the - // required list. - if requireAtLeastOneMatch && len(iters) > 0 { - required = append(required, unionIfNeeded(DefinitionLevelResourceSpans, iters, nil)) - iters = nil - } - // Put span iterator last so it is only read when // the resource conditions are met. required = append(required, spanIterator) diff --git a/tempodb/encoding/vparquet3/block_traceql.go b/tempodb/encoding/vparquet3/block_traceql.go index 9e6cf1b61fd..6b4020b3372 100644 --- a/tempodb/encoding/vparquet3/block_traceql.go +++ b/tempodb/encoding/vparquet3/block_traceql.go @@ -1406,31 +1406,22 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c // matched upstream to a resource. // TODO - After introducing AllConditions it seems like some of this logic overlaps. // Determine if it can be generalized or simplified. - var ( - // If there are only span conditions, then don't return a span upstream - // unless it matches at least 1 span-level condition. - spanRequireAtLeastOneMatch = len(spanConditions) > 0 && len(resourceConditions) == 0 && len(traceConditions) == 0 - - // If there are only resource conditions, then don't return a resource upstream - // unless it matches at least 1 resource-level condition. - batchRequireAtLeastOneMatch = len(spanConditions) == 0 && len(resourceConditions) > 0 && len(traceConditions) == 0 - // Don't return the final spanset upstream unless it matched at least 1 condition - // anywhere, except in the case of the empty query: {} - batchRequireAtLeastOneMatchOverall = len(conds) > 0 && len(traceConditions) == 0 && len(traceConditions) == 0 - ) + // Don't return the final spanset upstream unless it matched at least 1 condition + // anywhere, except in the case of the empty query: {} + batchRequireAtLeastOneMatchOverall := len(conds) > 0 && len(traceConditions) == 0 // Optimization for queries like {resource.x... && span.y ...} // Requires no mingled scopes like .foo=x, which could be satisfied // one either resource or span. allConditions = allConditions && !mingledConditions - spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, spanRequireAtLeastOneMatch, allConditions, dc) + spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, allConditions, dc) if err != nil { return nil, fmt.Errorf("creating span iterator: %w", err) } - resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatch, batchRequireAtLeastOneMatchOverall, allConditions, dc) + resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatchOverall, allConditions, dc) if err != nil { return nil, fmt.Errorf("creating resource iterator: %w", err) } @@ -1440,7 +1431,7 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c // createSpanIterator iterates through all span-level columns, groups them into rows representing // one span each. Spans are returned that match any of the given conditions. -func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) { +func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) { var ( columnSelectAs = map[string]string{} columnPredicates = map[string][]parquetquery.Predicate{} @@ -1649,9 +1640,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, } minCount := 0 - if requireAtLeastOneMatch { - minCount = 1 - } if allConditions { // The final number of expected attributes. distinct := map[string]struct{}{} @@ -1675,16 +1663,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, iters = nil } - // This is an optimization for cases when allConditions is false, and - // only span conditions are present, and we require at least one of them to match. - // Wrap up the individual conditions with a union and move it into the required list. - // This skips over static columns like ID that are omnipresent. This is also only - // possible when there isn't a duration filter because it's computed from start/end. - if requireAtLeastOneMatch && len(iters) > 0 { - required = append(required, unionIfNeeded(DefinitionLevelResourceSpansILSSpan, iters, nil)) - iters = nil - } - // if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column // b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed // how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here @@ -1705,7 +1683,7 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, // createResourceIterator iterates through all resourcespans-level (batch-level) columns, groups them into rows representing // one batch each. It builds on top of the span iterator, and turns the groups of spans and resource-level values into // spansets. Spansets are returned that match any of the given conditions. -func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, requireAtLeastOneMatchOverall, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) { +func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatchOverall, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) { var ( columnSelectAs = map[string]string{} columnPredicates = map[string][]parquetquery.Predicate{} @@ -1778,9 +1756,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera } minCount := 0 - if requireAtLeastOneMatch { - minCount = 1 - } if allConditions { // The final number of expected attributes distinct := map[string]struct{}{} @@ -1800,15 +1775,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera iters = nil } - // This is an optimization for cases when only resource conditions are - // present and we require at least one of them to match. Wrap - // up the individual conditions with a union and move it into the - // required list. - if requireAtLeastOneMatch && len(iters) > 0 { - required = append(required, unionIfNeeded(DefinitionLevelResourceSpans, iters, nil)) - iters = nil - } - // Put span iterator last so it is only read when // the resource conditions are met. required = append(required, spanIterator) diff --git a/tempodb/encoding/vparquet3/block_traceql_test.go b/tempodb/encoding/vparquet3/block_traceql_test.go index 20b61ced4b9..6c11914bb40 100644 --- a/tempodb/encoding/vparquet3/block_traceql_test.go +++ b/tempodb/encoding/vparquet3/block_traceql_test.go @@ -597,12 +597,12 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) { ctx := context.TODO() tenantID := "1" // blockID := uuid.MustParse("00000c2f-8133-4a60-a62a-7748bd146938") - // blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5") - blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23") + blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5") + // blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23") r, _, _, err := local.New(&local.Config{ - Path: path.Join("/Users/joe/testblock/"), - // Path: path.Join("/Users/marty/src/tmp"), + // Path: path.Join("/Users/joe/testblock/"), + Path: path.Join("/Users/marty/src/tmp"), // Path: path.Join("/Users/mapno/workspace/testblock"), }) require.NoError(b, err) @@ -703,6 +703,8 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) { "{} | rate() by (name)", "{} | rate() by (resource.service.name)", "{} | rate() by (span.http.url)", // High cardinality attribute + "{} | rate() by (span.foo)", // Nonexistent, all spans will be in the nil series + "{} | rate() by (resource.foo)", // Nonexistent, all spans will be in the nil series "{resource.service.name=`loki-ingester`} | rate()", "{status=error} | rate()", } @@ -712,17 +714,16 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) { e = traceql.NewEngine() opts = common.DefaultSearchOptions() tenantID = "1" - // blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5") - blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c") + blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5") + // blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c") // blockID = uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23") // path = "/Users/joe/testblock/" - path = "/Users/mapno/workspace/testblock" + // path = "/Users/mapno/workspace/testblock" + path = "/Users/marty/src/tmp" ) r, _, _, err := local.New(&local.Config{ Path: path, - // Path: path.Join("/Users/marty/src/tmp"), - // Path: path.Join("/Users/mapno/workspace/testblock"), }) require.NoError(b, err) @@ -741,7 +742,7 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) { for _, tc := range testCases { b.Run(tc, func(b *testing.B) { - for _, minutes := range []int{5, 7} { + for _, minutes := range []int{3} { b.Run(strconv.Itoa(minutes), func(b *testing.B) { st := meta.StartTime end := st.Add(time.Duration(minutes) * time.Minute) diff --git a/tempodb/encoding/vparquet4/block_traceql.go b/tempodb/encoding/vparquet4/block_traceql.go index 4068fe0a424..98e5ce19b09 100644 --- a/tempodb/encoding/vparquet4/block_traceql.go +++ b/tempodb/encoding/vparquet4/block_traceql.go @@ -1455,19 +1455,10 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c // matched upstream to a resource. // TODO - After introducing AllConditions it seems like some of this logic overlaps. // Determine if it can be generalized or simplified. - var ( - // If there are only span conditions, then don't return a span upstream - // unless it matches at least 1 span-level condition. - spanRequireAtLeastOneMatch = len(spanConditions) > 0 && len(resourceConditions) == 0 && len(traceConditions) == 0 - - // If there are only resource conditions, then don't return a resource upstream - // unless it matches at least 1 resource-level condition. - batchRequireAtLeastOneMatch = len(spanConditions) == 0 && len(resourceConditions) > 0 && len(traceConditions) == 0 - // Don't return the final spanset upstream unless it matched at least 1 condition - // anywhere, except in the case of the empty query: {} - batchRequireAtLeastOneMatchOverall = len(conds) > 0 && len(traceConditions) == 0 && len(traceConditions) == 0 - ) + // Don't return the final spanset upstream unless it matched at least 1 condition + // anywhere, except in the case of the empty query: {} + batchRequireAtLeastOneMatchOverall := len(conds) > 0 && len(traceConditions) == 0 // Optimization for queries like {resource.x... && span.y ...} // Requires no mingled scopes like .foo=x, which could be satisfied @@ -1479,12 +1470,12 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c return nil, fmt.Errorf("creating event iterator: %w", err) } - spanIter, err := createSpanIterator(makeIter, eventIter, spanConditions, spanRequireAtLeastOneMatch, allConditions, dc) + spanIter, err := createSpanIterator(makeIter, eventIter, spanConditions, allConditions, dc) if err != nil { return nil, fmt.Errorf("creating span iterator: %w", err) } - resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatch, batchRequireAtLeastOneMatchOverall, allConditions, dc) + resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatchOverall, allConditions, dc) if err != nil { return nil, fmt.Errorf("creating resource iterator: %w", err) } @@ -1519,7 +1510,7 @@ func createEventIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, // createSpanIterator iterates through all span-level columns, groups them into rows representing // one span each. Spans are returned that match any of the given conditions. -func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) { +func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) { var ( columnSelectAs = map[string]string{} columnPredicates = map[string][]parquetquery.Predicate{} @@ -1728,9 +1719,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, } minCount := 0 - if requireAtLeastOneMatch { - minCount = 1 - } if allConditions { // The final number of expected attributes. distinct := map[string]struct{}{} @@ -1754,16 +1742,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, iters = nil } - // This is an optimization for cases when allConditions is false, and - // only span conditions are present, and we require at least one of them to match. - // Wrap up the individual conditions with a union and move it into the required list. - // This skips over static columns like ID that are omnipresent. This is also only - // possible when there isn't a duration filter because it's computed from start/end. - if requireAtLeastOneMatch && len(iters) > 0 { - required = append(required, unionIfNeeded(DefinitionLevelResourceSpansILSSpan, iters, nil)) - iters = nil - } - // if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column // b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed // how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here @@ -1784,7 +1762,7 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, // createResourceIterator iterates through all resourcespans-level (batch-level) columns, groups them into rows representing // one batch each. It builds on top of the span iterator, and turns the groups of spans and resource-level values into // spansets. Spansets are returned that match any of the given conditions. -func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, requireAtLeastOneMatchOverall, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) { +func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatchOverall, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) { var ( columnSelectAs = map[string]string{} columnPredicates = map[string][]parquetquery.Predicate{} @@ -1857,9 +1835,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera } minCount := 0 - if requireAtLeastOneMatch { - minCount = 1 - } if allConditions { // The final number of expected attributes distinct := map[string]struct{}{} @@ -1879,15 +1854,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera iters = nil } - // This is an optimization for cases when only resource conditions are - // present and we require at least one of them to match. Wrap - // up the individual conditions with a union and move it into the - // required list. - if requireAtLeastOneMatch && len(iters) > 0 { - required = append(required, unionIfNeeded(DefinitionLevelResourceSpans, iters, nil)) - iters = nil - } - // Put span iterator last so it is only read when // the resource conditions are met. required = append(required, spanIterator)