Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TraceQL] Fix subtly incorrect handling of second pass conditions #3734

Merged
merged 4 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* [FEATURE] Flush blocks to storage from the metrics-generator [#3628](https://github.com/grafana/tempo/pull/3628) [#3691](https://github.com/grafana/tempo/pull/3691) (@mapno)
* [ENHANCEMENT] Improve use of OTEL semantic conventions on the service graph [#3711](https://github.com/grafana/tempo/pull/3711) (@zalegrala)
* [ENHANCEMENT] Performance improvement for `rate() by ()` queries [#3719](https://github.com/grafana/tempo/pull/3719) (@mapno)
* [BUGFIX] Fix metrics queries when grouping by attributes that may not exist [#3734](https://github.com/grafana/tempo/pull/3734) (@mdisibio)

## v2.5.0-rc.1

Expand Down
48 changes: 7 additions & 41 deletions tempodb/encoding/vparquet2/block_traceql.go
Original file line number Diff line number Diff line change
Expand Up @@ -1255,31 +1255,22 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c
// matched upstream to a resource.
// TODO - After introducing AllConditions it seems like some of this logic overlaps.
// Determine if it can be generalized or simplified.
var (
// If there are only span conditions, then don't return a span upstream
// unless it matches at least 1 span-level condition.
spanRequireAtLeastOneMatch = len(spanConditions) > 0 && len(resourceConditions) == 0 && len(traceConditions) == 0

// If there are only resource conditions, then don't return a resource upstream
// unless it matches at least 1 resource-level condition.
batchRequireAtLeastOneMatch = len(spanConditions) == 0 && len(resourceConditions) > 0 && len(traceConditions) == 0

// Don't return the final spanset upstream unless it matched at least 1 condition
// anywhere, except in the case of the empty query: {}
batchRequireAtLeastOneMatchOverall = len(conds) > 0 && len(traceConditions) == 0 && len(traceConditions) == 0
)
// Don't return the final spanset upstream unless it matched at least 1 condition
// anywhere, except in the case of the empty query: {}
batchRequireAtLeastOneMatchOverall := len(conds) > 0 && len(traceConditions) == 0 && len(traceConditions) == 0

// Optimization for queries like {resource.x... && span.y ...}
// Requires no mingled scopes like .foo=x, which could be satisfied
// one either resource or span.
allConditions = allConditions && !mingledConditions

spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, spanRequireAtLeastOneMatch, allConditions)
spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, allConditions)
if err != nil {
return nil, fmt.Errorf("creating span iterator: %w", err)
}

resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatch, batchRequireAtLeastOneMatchOverall, allConditions)
resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatchOverall, allConditions)
if err != nil {
return nil, fmt.Errorf("creating resource iterator: %w", err)
}
Expand All @@ -1289,7 +1280,7 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c

// createSpanIterator iterates through all span-level columns, groups them into rows representing
// one span each. Spans are returned that match any of the given conditions.
func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool) (parquetquery.Iterator, error) {
func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, allConditions bool) (parquetquery.Iterator, error) {
var (
columnSelectAs = map[string]string{}
columnPredicates = map[string][]parquetquery.Predicate{}
Expand Down Expand Up @@ -1432,9 +1423,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
}

minCount := 0
if requireAtLeastOneMatch {
minCount = 1
}
if allConditions {
// The final number of expected attributes.
distinct := map[string]struct{}{}
Expand All @@ -1454,16 +1442,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
iters = nil
}

// This is an optimization for cases when allConditions is false, and
// only span conditions are present, and we require at least one of them to match.
// Wrap up the individual conditions with a union and move it into the required list.
// This skips over static columns like ID that are omnipresent. This is also only
// possible when there isn't a duration filter because it's computed from start/end.
if requireAtLeastOneMatch && len(iters) > 0 {
required = append(required, unionIfNeeded(DefinitionLevelResourceSpansILSSpan, iters, nil))
iters = nil
}

// if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
// b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed
// how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here
Expand All @@ -1484,7 +1462,7 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
// createResourceIterator iterates through all resourcespans-level (batch-level) columns, groups them into rows representing
// one batch each. It builds on top of the span iterator, and turns the groups of spans and resource-level values into
// spansets. Spansets are returned that match any of the given conditions.
func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, requireAtLeastOneMatchOverall, allConditions bool) (parquetquery.Iterator, error) {
func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatchOverall, allConditions bool) (parquetquery.Iterator, error) {
var (
columnSelectAs = map[string]string{}
columnPredicates = map[string][]parquetquery.Predicate{}
Expand Down Expand Up @@ -1535,9 +1513,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
}

minCount := 0
if requireAtLeastOneMatch {
minCount = 1
}
if allConditions {
// The final number of expected attributes
distinct := map[string]struct{}{}
Expand All @@ -1557,15 +1532,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
iters = nil
}

// This is an optimization for cases when only resource conditions are
// present and we require at least one of them to match. Wrap
// up the individual conditions with a union and move it into the
// required list.
if requireAtLeastOneMatch && len(iters) > 0 {
required = append(required, unionIfNeeded(DefinitionLevelResourceSpans, iters, nil))
iters = nil
}

// Put span iterator last so it is only read when
// the resource conditions are met.
required = append(required, spanIterator)
Expand Down
48 changes: 7 additions & 41 deletions tempodb/encoding/vparquet3/block_traceql.go
Original file line number Diff line number Diff line change
Expand Up @@ -1406,31 +1406,22 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c
// matched upstream to a resource.
// TODO - After introducing AllConditions it seems like some of this logic overlaps.
// Determine if it can be generalized or simplified.
var (
// If there are only span conditions, then don't return a span upstream
// unless it matches at least 1 span-level condition.
spanRequireAtLeastOneMatch = len(spanConditions) > 0 && len(resourceConditions) == 0 && len(traceConditions) == 0

// If there are only resource conditions, then don't return a resource upstream
// unless it matches at least 1 resource-level condition.
batchRequireAtLeastOneMatch = len(spanConditions) == 0 && len(resourceConditions) > 0 && len(traceConditions) == 0

// Don't return the final spanset upstream unless it matched at least 1 condition
// anywhere, except in the case of the empty query: {}
batchRequireAtLeastOneMatchOverall = len(conds) > 0 && len(traceConditions) == 0 && len(traceConditions) == 0
)
// Don't return the final spanset upstream unless it matched at least 1 condition
// anywhere, except in the case of the empty query: {}
batchRequireAtLeastOneMatchOverall := len(conds) > 0 && len(traceConditions) == 0 && len(traceConditions) == 0

// Optimization for queries like {resource.x... && span.y ...}
// Requires no mingled scopes like .foo=x, which could be satisfied
// one either resource or span.
allConditions = allConditions && !mingledConditions

spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, spanRequireAtLeastOneMatch, allConditions, dc)
spanIter, err := createSpanIterator(makeIter, primaryIter, spanConditions, allConditions, dc)
if err != nil {
return nil, fmt.Errorf("creating span iterator: %w", err)
}

resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatch, batchRequireAtLeastOneMatchOverall, allConditions, dc)
resourceIter, err := createResourceIterator(makeIter, spanIter, resourceConditions, batchRequireAtLeastOneMatchOverall, allConditions, dc)
if err != nil {
return nil, fmt.Errorf("creating resource iterator: %w", err)
}
Expand All @@ -1440,7 +1431,7 @@ func createAllIterator(ctx context.Context, primaryIter parquetquery.Iterator, c

// createSpanIterator iterates through all span-level columns, groups them into rows representing
// one span each. Spans are returned that match any of the given conditions.
func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) {
func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator, conditions []traceql.Condition, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) {
var (
columnSelectAs = map[string]string{}
columnPredicates = map[string][]parquetquery.Predicate{}
Expand Down Expand Up @@ -1649,9 +1640,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
}

minCount := 0
if requireAtLeastOneMatch {
minCount = 1
}
if allConditions {
// The final number of expected attributes.
distinct := map[string]struct{}{}
Expand All @@ -1675,16 +1663,6 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
iters = nil
}

// This is an optimization for cases when allConditions is false, and
// only span conditions are present, and we require at least one of them to match.
// Wrap up the individual conditions with a union and move it into the required list.
// This skips over static columns like ID that are omnipresent. This is also only
// possible when there isn't a duration filter because it's computed from start/end.
if requireAtLeastOneMatch && len(iters) > 0 {
required = append(required, unionIfNeeded(DefinitionLevelResourceSpansILSSpan, iters, nil))
iters = nil
}

// if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
// b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed
// how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here
Expand All @@ -1705,7 +1683,7 @@ func createSpanIterator(makeIter makeIterFn, primaryIter parquetquery.Iterator,
// createResourceIterator iterates through all resourcespans-level (batch-level) columns, groups them into rows representing
// one batch each. It builds on top of the span iterator, and turns the groups of spans and resource-level values into
// spansets. Spansets are returned that match any of the given conditions.
func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatch, requireAtLeastOneMatchOverall, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) {
func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Iterator, conditions []traceql.Condition, requireAtLeastOneMatchOverall, allConditions bool, dedicatedColumns backend.DedicatedColumns) (parquetquery.Iterator, error) {
var (
columnSelectAs = map[string]string{}
columnPredicates = map[string][]parquetquery.Predicate{}
Expand Down Expand Up @@ -1778,9 +1756,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
}

minCount := 0
if requireAtLeastOneMatch {
minCount = 1
}
if allConditions {
// The final number of expected attributes
distinct := map[string]struct{}{}
Expand All @@ -1800,15 +1775,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
iters = nil
}

// This is an optimization for cases when only resource conditions are
// present and we require at least one of them to match. Wrap
// up the individual conditions with a union and move it into the
// required list.
if requireAtLeastOneMatch && len(iters) > 0 {
required = append(required, unionIfNeeded(DefinitionLevelResourceSpans, iters, nil))
iters = nil
}

// Put span iterator last so it is only read when
// the resource conditions are met.
required = append(required, spanIterator)
Expand Down
21 changes: 11 additions & 10 deletions tempodb/encoding/vparquet3/block_traceql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -597,12 +597,12 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
ctx := context.TODO()
tenantID := "1"
// blockID := uuid.MustParse("00000c2f-8133-4a60-a62a-7748bd146938")
// blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
// blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")

r, _, _, err := local.New(&local.Config{
Path: path.Join("/Users/joe/testblock/"),
// Path: path.Join("/Users/marty/src/tmp"),
// Path: path.Join("/Users/joe/testblock/"),
Path: path.Join("/Users/marty/src/tmp"),
// Path: path.Join("/Users/mapno/workspace/testblock"),
})
require.NoError(b, err)
Expand Down Expand Up @@ -703,6 +703,8 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
"{} | rate() by (name)",
"{} | rate() by (resource.service.name)",
"{} | rate() by (span.http.url)", // High cardinality attribute
"{} | rate() by (span.foo)", // Nonexistent, all spans will be in the nil series
"{} | rate() by (resource.foo)", // Nonexistent, all spans will be in the nil series
"{resource.service.name=`loki-ingester`} | rate()",
"{status=error} | rate()",
}
Expand All @@ -712,17 +714,16 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
e = traceql.NewEngine()
opts = common.DefaultSearchOptions()
tenantID = "1"
// blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
// blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
// blockID = uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
// path = "/Users/joe/testblock/"
path = "/Users/mapno/workspace/testblock"
// path = "/Users/mapno/workspace/testblock"
path = "/Users/marty/src/tmp"
)

r, _, _, err := local.New(&local.Config{
Path: path,
// Path: path.Join("/Users/marty/src/tmp"),
// Path: path.Join("/Users/mapno/workspace/testblock"),
})
require.NoError(b, err)

Expand All @@ -741,7 +742,7 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {

for _, tc := range testCases {
b.Run(tc, func(b *testing.B) {
for _, minutes := range []int{5, 7} {
for _, minutes := range []int{3} {
b.Run(strconv.Itoa(minutes), func(b *testing.B) {
st := meta.StartTime
end := st.Add(time.Duration(minutes) * time.Minute)
Expand Down
Loading
Loading