From f626e512952972db9d798d988eebcf49debd3e16 Mon Sep 17 00:00:00 2001 From: sumeerbhola Date: Mon, 18 May 2020 17:14:12 -0400 Subject: [PATCH] rowexec: add batchedInvertedExprEvaluator for inverted index execution This is for evaluating invertedexpr.SpanExpressionProto, both for join queries and single table filtering, using an inverted index. More usage context can be found in #48019 which contains an invertedJoiner that uses an earlier version of this evaluator. Release note: None --- pkg/sql/rowexec/inverted_expr_evaluator.go | 447 ++++++++++++++++++ .../rowexec/inverted_expr_evaluator_test.go | 316 +++++++++++++ 2 files changed, 763 insertions(+) create mode 100644 pkg/sql/rowexec/inverted_expr_evaluator.go create mode 100644 pkg/sql/rowexec/inverted_expr_evaluator_test.go diff --git a/pkg/sql/rowexec/inverted_expr_evaluator.go b/pkg/sql/rowexec/inverted_expr_evaluator.go new file mode 100644 index 000000000000..c911debafa8a --- /dev/null +++ b/pkg/sql/rowexec/inverted_expr_evaluator.go @@ -0,0 +1,447 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package rowexec + +import ( + "bytes" + "sort" + + "github.com/cockroachdb/cockroach/pkg/sql/opt/invertedexpr" +) + +// The abstractions in this file help with evaluating (batches of) +// invertedexpr.SpanExpression. The spans in a SpanExpression represent spans +// of an inverted index, which consists of an inverted column followed by the +// primary key of the table. The set expressions involve union and +// intersection over operands. The operands are sets of primary keys contained +// in the corresponding span. Callers should use batchedInvertedExprEvaluator. +// This evaluator does not do the actual scan -- it is fed the set elements as +// the inverted index is scanned, and routes a set element to all the sets to +// which it belongs (since spans can be overlapping). Once the scan is +// complete, the expressions are evaluated. + +// KeyIndex is used as a set element. It is already de-duped. +type KeyIndex int + +// setContainer is a set of key indexes in increasing order. +type setContainer []KeyIndex + +func (s setContainer) Len() int { + return len(s) +} + +func (s setContainer) Less(i, j int) bool { + return s[i] < s[j] +} + +func (s setContainer) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +func unionSetContainers(a, b setContainer) setContainer { + if len(a) == 0 { + return b + } + if len(b) == 0 { + return a + } + var out setContainer + var i, j int + for i < len(a) && j < len(b) { + if a[i] < b[j] { + out = append(out, a[i]) + i++ + } else if a[i] > b[j] { + out = append(out, b[j]) + j++ + } else { + out = append(out, a[i]) + i++ + j++ + } + } + for ; i < len(a); i++ { + out = append(out, a[i]) + } + for ; j < len(b); j++ { + out = append(out, b[j]) + } + return out +} + +func intersectSetContainers(a, b setContainer) setContainer { + var out setContainer + var i, j int + // TODO(sumeer): when one set is much larger than the other + // it is more efficient to iterate over the smaller set + // and seek into the larger set. + for i < len(a) && j < len(b) { + if a[i] < b[j] { + i++ + } else if a[i] > b[j] { + j++ + } else { + out = append(out, a[i]) + i++ + j++ + } + } + return out +} + +// setExpression follows the structure of SpanExpression. +type setExpression struct { + op invertedexpr.SetOperator + // The index in invertedExprEvaluator.sets + unionSetIndex int + left *setExpression + right *setExpression +} + +type invertedSpan = invertedexpr.SpanExpressionProto_Span +type spanExpression = invertedexpr.SpanExpressionProto_Node + +// The spans in a SpanExpression.FactoredUnionSpans and the corresponding index +// in invertedExprEvaluator.sets. Only populated when FactoredUnionsSpans is +// non-empty. +type spansAndSetIndex struct { + spans []invertedSpan + setIndex int +} + +// invertedExprEvaluator evaluates a single expression. It should not be directly +// used -- see batchedInvertedExprEvaluator. +type invertedExprEvaluator struct { + setExpr *setExpression + // These are initially populated by calls to addIndexRow() as + // the inverted index is scanned. + sets []setContainer + + spansIndex []spansAndSetIndex +} + +func newInvertedExprEvaluator(expr *spanExpression) *invertedExprEvaluator { + eval := &invertedExprEvaluator{} + eval.setExpr = eval.initSetExpr(expr) + return eval +} + +func (ev *invertedExprEvaluator) initSetExpr(expr *spanExpression) *setExpression { + // Assign it an index even if FactoredUnionSpans is empty, since we will + // need it when evaluating. + i := len(ev.sets) + ev.sets = append(ev.sets, nil) + sx := &setExpression{op: expr.Operator, unionSetIndex: i} + if len(expr.FactoredUnionSpans) > 0 { + ev.spansIndex = append(ev.spansIndex, + spansAndSetIndex{spans: expr.FactoredUnionSpans, setIndex: i}) + } + if expr.Left != nil { + sx.left = ev.initSetExpr(expr.Left) + } + if expr.Right != nil { + sx.right = ev.initSetExpr(expr.Right) + } + return sx +} + +// getSpansAndSetIndex returns the spans and corresponding set indexes for +// this expression. The spans are not in sorted order and can be overlapping. +func (ev *invertedExprEvaluator) getSpansAndSetIndex() []spansAndSetIndex { + return ev.spansIndex +} + +// Adds a row to the given set. KeyIndexes are not added in increasing order, +// nor do they represent any ordering of the primary key of the table whose +// inverted index is being read. Also, the same KeyIndex could be added +// repeatedly to a set. +func (ev *invertedExprEvaluator) addIndexRow(setIndex int, keyIndex KeyIndex) { + // If duplicates in a set become a memory problem in this build phase, we + // could do periodic de-duplication as we go. For now, we simply append to + // the slice and de-dup at the start of evaluate(). + ev.sets[setIndex] = append(ev.sets[setIndex], keyIndex) +} + +// Evaluates the expression. The return value is in increasing order +// of KeyIndex. +func (ev *invertedExprEvaluator) evaluate() []KeyIndex { + // Sort and de-dup the sets so that we can efficiently do set operations. + for i, c := range ev.sets { + if len(c) == 0 { + continue + } + sort.Sort(c) + // De-duplicate + set := c[:0] + for j := range c { + if len(set) > 0 && c[j] == set[len(set)-1] { + continue + } + set = append(set, c[j]) + } + ev.sets[i] = set + } + return ev.evaluateSetExpr(ev.setExpr) +} + +func (ev *invertedExprEvaluator) evaluateSetExpr(sx *setExpression) setContainer { + var left, right setContainer + if sx.left != nil { + left = ev.evaluateSetExpr(sx.left) + } + if sx.right != nil { + right = ev.evaluateSetExpr(sx.right) + } + var childrenSet setContainer + switch sx.op { + case invertedexpr.SetUnion: + childrenSet = unionSetContainers(left, right) + case invertedexpr.SetIntersection: + childrenSet = intersectSetContainers(left, right) + } + return unionSetContainers(ev.sets[sx.unionSetIndex], childrenSet) +} + +// Supporting struct for invertedSpanRoutingInfo. +type exprAndSetIndex struct { + // An index into batchedInvertedExprEvaluator.exprEvals. + exprIndex int + // An index into batchedInvertedExprEvaluator.exprEvals[exprIndex].sets. + setIndex int +} + +// invertedSpanRoutingInfo contains the list of exprAndSetIndex pairs that +// need rows from the inverted index span. A []invertedSpanRoutingInfo with +// spans that are sorted and non-overlapping is used to route an added row to +// all the expressions and sets that need that row. +type invertedSpanRoutingInfo struct { + span invertedSpan + exprAndSetIndexList []exprAndSetIndex +} + +// batchedInvertedExprEvaluator is for evaluating one or more expressions. The +// batched evaluator can be reused by calling reset(). In the build phase, +// append expressions directly to exprs. A nil expression is permitted, and is +// just a placeholder that will result in a nil []KeyIndex in evaluate(). +// getSpans() must be called before calls to addIndexRow() even if the result +// of getSpans() is not needed -- it builds the fragmentedSpans used for +// routing the added rows. +type batchedInvertedExprEvaluator struct { + exprs []*invertedexpr.SpanExpressionProto + // The evaluators for all the exprs. + exprEvals []*invertedExprEvaluator + // Spans here are in sorted order and non-overlapping. + fragmentedSpans []invertedSpanRoutingInfo + + // Temporary state used for constructing fragmentedSpans. All spans here + // have the same start key. They are not sorted by end key. + pendingSpans []invertedSpanRoutingInfo +} + +// Helper used in building fragmentedSpans using pendingSpans. pendingSpans +// contains spans with the same start key. This fragments and removes all +// spans up to end key fragmentUntil (or all spans if fragmentUntil == nil). +// +// Example 1: +// pendingSpans contains +// c---g +// c-----i +// c--e +// +// And fragmentUntil = i. Since end keys are exclusive we can fragment and +// remove all spans in pendingSpans. These will be: +// c-e-g +// c-e-g-i +// c-e +// +// For the c-e span, all the exprAndSetIndexList slices for these spans are +// appended since any row in that span needs to be routed to all these +// expressions and sets. For the e-g span only the exprAndSetIndexList slices +// for the top two spans are unioned. +// +// Example 2: +// +// Same pendingSpans, and fragmentUntil = f. The fragments that are generated +// for fragmentedSpans and the remaining spans in pendingSpans are: +// +// fragments remaining +// c-e-f f-g +// c-e-f f-i +// c-e +func (b *batchedInvertedExprEvaluator) fragmentPendingSpans( + fragmentUntil invertedexpr.EncInvertedVal, +) { + // The start keys are the same, so this only sorts in increasing + // order of end keys. + sort.Slice(b.pendingSpans, func(i, j int) bool { + return bytes.Compare(b.pendingSpans[i].span.End, b.pendingSpans[j].span.End) < 0 + }) + for len(b.pendingSpans) > 0 { + if fragmentUntil != nil && bytes.Compare(fragmentUntil, b.pendingSpans[0].span.Start) <= 0 { + break + } + // The prefix of pendingSpans that will be completely consumed when + // the next fragment is constructed. + var removeSize int + // The end of the next fragment. + var end invertedexpr.EncInvertedVal + // The start of the fragment after the next fragment. + var nextStart invertedexpr.EncInvertedVal + if fragmentUntil != nil && bytes.Compare(fragmentUntil, b.pendingSpans[0].span.End) < 0 { + // Can't completely remove any spans from pendingSpans, but a prefix + // of these spans will be removed + removeSize = 0 + end = fragmentUntil + nextStart = end + } else { + // We can remove all spans whose end key is the same as span[0]. + // The end of span[0] is also the end key of this fragment. + removeSize = b.pendingLenWithSameEnd() + end = b.pendingSpans[0].span.End + nextStart = end + } + // The next span to be added to fragmentedSpans. + nextSpan := invertedSpanRoutingInfo{ + span: invertedSpan{ + Start: b.pendingSpans[0].span.Start, + End: end, + }, + } + for i := 0; i < len(b.pendingSpans); i++ { + if i >= removeSize { + // This span is not completely removed so adjust its start. + b.pendingSpans[i].span.Start = nextStart + } + // All spans in pendingSpans contribute to exprAndSetIndexList. + nextSpan.exprAndSetIndexList = + append(nextSpan.exprAndSetIndexList, b.pendingSpans[i].exprAndSetIndexList...) + } + b.fragmentedSpans = append(b.fragmentedSpans, nextSpan) + b.pendingSpans = b.pendingSpans[removeSize:] + if removeSize == 0 { + // fragmentUntil was earlier than the smallest End key in the pending + // spans, so cannot fragment any more. + break + } + } +} + +func (b *batchedInvertedExprEvaluator) pendingLenWithSameEnd() int { + length := 1 + for i := 1; i < len(b.pendingSpans); i++ { + if !bytes.Equal(b.pendingSpans[0].span.End, b.pendingSpans[i].span.End) { + break + } + length++ + } + return length +} + +// getSpans fragments the spans for later routing of rows and returns spans +// representing a union of all the spans (for executing the scan). +func (b *batchedInvertedExprEvaluator) getSpans() []invertedSpan { + if cap(b.exprEvals) < len(b.exprs) { + b.exprEvals = make([]*invertedExprEvaluator, len(b.exprs)) + } else { + b.exprEvals = b.exprEvals[:len(b.exprs)] + } + // Initial spans fetched from all expressions. + var routingSpans []invertedSpanRoutingInfo + for i, expr := range b.exprs { + if expr == nil { + b.exprEvals[i] = nil + continue + } + b.exprEvals[i] = newInvertedExprEvaluator(&expr.Node) + exprSpans := b.exprEvals[i].getSpansAndSetIndex() + for _, spans := range exprSpans { + for _, span := range spans.spans { + routingSpans = append(routingSpans, + invertedSpanRoutingInfo{ + span: span, + exprAndSetIndexList: []exprAndSetIndex{{exprIndex: i, setIndex: spans.setIndex}}, + }, + ) + } + } + } + if len(routingSpans) == 0 { + return nil + } + + // Sort the routingSpans in increasing order of start key, and for equal + // start keys in increasing order of end key. + sort.Slice(routingSpans, func(i, j int) bool { + cmp := bytes.Compare(routingSpans[i].span.Start, routingSpans[j].span.Start) + if cmp == 0 { + cmp = bytes.Compare(routingSpans[i].span.End, routingSpans[j].span.End) + } + return cmp < 0 + }) + + // The union of the spans, which is returned from this function. + var coveringSpans []invertedSpan + currentCoveringSpan := routingSpans[0].span + b.pendingSpans = append(b.pendingSpans, routingSpans[0]) + // This loop does both the union of the routingSpans and fragments the + // routingSpans. + for i := 1; i < len(routingSpans); i++ { + span := routingSpans[i] + if bytes.Compare(b.pendingSpans[0].span.Start, span.span.Start) < 0 { + b.fragmentPendingSpans(span.span.Start) + if bytes.Compare(currentCoveringSpan.End, span.span.Start) < 0 { + coveringSpans = append(coveringSpans, currentCoveringSpan) + currentCoveringSpan = span.span + } else if bytes.Compare(currentCoveringSpan.End, span.span.End) < 0 { + currentCoveringSpan.End = span.span.End + } + } else if bytes.Compare(currentCoveringSpan.End, span.span.End) < 0 { + currentCoveringSpan.End = span.span.End + } + // Add this span to the pending list. + b.pendingSpans = append(b.pendingSpans, span) + } + b.fragmentPendingSpans(nil) + coveringSpans = append(coveringSpans, currentCoveringSpan) + return coveringSpans +} + +// TODO(sumeer): if this will be called in non-decreasing order of enc, +// use that to optimize the binary search. +func (b *batchedInvertedExprEvaluator) addIndexRow( + enc invertedexpr.EncInvertedVal, keyIndex KeyIndex, +) { + i := sort.Search(len(b.fragmentedSpans), func(i int) bool { + return bytes.Compare(b.fragmentedSpans[i].span.Start, enc) > 0 + }) + i-- + for _, elem := range b.fragmentedSpans[i].exprAndSetIndexList { + b.exprEvals[elem.exprIndex].addIndexRow(elem.setIndex, keyIndex) + } +} + +func (b *batchedInvertedExprEvaluator) evaluate() [][]KeyIndex { + result := make([][]KeyIndex, len(b.exprs)) + for i := range b.exprEvals { + if b.exprEvals[i] == nil { + continue + } + result[i] = b.exprEvals[i].evaluate() + } + return result +} + +func (b *batchedInvertedExprEvaluator) reset() { + b.exprs = b.exprs[:0] + b.exprEvals = b.exprEvals[:0] + b.fragmentedSpans = b.fragmentedSpans[:0] + b.pendingSpans = b.pendingSpans[:0] +} diff --git a/pkg/sql/rowexec/inverted_expr_evaluator_test.go b/pkg/sql/rowexec/inverted_expr_evaluator_test.go new file mode 100644 index 000000000000..33728c6e1172 --- /dev/null +++ b/pkg/sql/rowexec/inverted_expr_evaluator_test.go @@ -0,0 +1,316 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package rowexec + +import ( + "fmt" + "math/rand" + "strings" + "testing" + + "github.com/cockroachdb/cockroach/pkg/sql/opt/invertedexpr" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/stretchr/testify/require" +) + +func setToString(set []KeyIndex) string { + var b strings.Builder + for i, elem := range set { + sep := "," + if i == len(set)-1 { + sep = "" + } + fmt.Fprintf(&b, "%d%s", elem, sep) + } + return b.String() +} + +func writeSpan(b *strings.Builder, span invertedSpan) { + fmt.Fprintf(b, "[%s, %s) ", span.Start, span.End) +} + +func spansToString(spans []invertedSpan) string { + var b strings.Builder + for _, elem := range spans { + writeSpan(&b, elem) + } + return b.String() +} + +func spansIndexToString(spans []spansAndSetIndex) string { + var b strings.Builder + for _, elem := range spans { + b.WriteString("spans: ") + for _, s := range elem.spans { + writeSpan(&b, s) + } + fmt.Fprintf(&b, " setIndex: %d\n", elem.setIndex) + } + return b.String() +} + +func fragmentedSpansToString(spans []invertedSpanRoutingInfo) string { + var b strings.Builder + for _, elem := range spans { + b.WriteString("span: ") + writeSpan(&b, elem.span) + b.WriteString(" indexes (expr, set): ") + for _, indexes := range elem.exprAndSetIndexList { + fmt.Fprintf(&b, "(%d, %d) ", indexes.exprIndex, indexes.setIndex) + } + b.WriteString("\n") + } + return b.String() +} + +func keyIndexesToString(indexes [][]KeyIndex) string { + var b strings.Builder + for i, elem := range indexes { + fmt.Fprintf(&b, "%d: ", i) + for _, index := range elem { + fmt.Fprintf(&b, "%d ", index) + } + b.WriteString("\n") + } + return b.String() +} + +func TestSetContainerUnion(t *testing.T) { + defer leaktest.AfterTest(t)() + + type testCase struct { + a setContainer + b setContainer + expected setContainer + } + cases := []testCase{ + {a: nil, b: nil, expected: nil}, + {a: []KeyIndex{5}, b: nil, expected: []KeyIndex{5}}, + {a: []KeyIndex{5}, b: []KeyIndex{2, 12}, expected: []KeyIndex{2, 5, 12}}, + {a: []KeyIndex{2, 5}, b: []KeyIndex{12}, expected: []KeyIndex{2, 5, 12}}, + {a: []KeyIndex{2}, b: []KeyIndex{5, 12}, expected: []KeyIndex{2, 5, 12}}, + {a: []KeyIndex{2, 12}, b: []KeyIndex{2, 5}, expected: []KeyIndex{2, 5, 12}}, + {a: []KeyIndex{2, 5}, b: []KeyIndex{5, 12}, expected: []KeyIndex{2, 5, 12}}, + } + for _, c := range cases { + require.Equal(t, setToString(c.expected), setToString(unionSetContainers(c.a, c.b))) + } +} + +func TestSetContainerIntersection(t *testing.T) { + defer leaktest.AfterTest(t)() + + type testCase struct { + a setContainer + b setContainer + expected setContainer + } + cases := []testCase{ + {a: nil, b: nil, expected: nil}, + {a: []KeyIndex{5}, b: nil, expected: nil}, + {a: []KeyIndex{5}, b: []KeyIndex{2, 12}, expected: nil}, + {a: []KeyIndex{2, 12}, b: []KeyIndex{2, 5}, expected: []KeyIndex{2}}, + {a: []KeyIndex{2, 5}, b: []KeyIndex{5, 12}, expected: []KeyIndex{5}}, + {a: []KeyIndex{2, 5, 17, 25, 30}, b: []KeyIndex{2, 12, 13, 17, 23, 30}, + expected: []KeyIndex{2, 17, 30}}, + } + for _, c := range cases { + require.Equal(t, setToString(c.expected), setToString(intersectSetContainers(c.a, c.b))) + } +} + +type keyAndIndex struct { + key string + index int +} + +// Tests both invertedExprEvaluator and batchedInvertedExprEvaluator. +func TestInvertedExpressionEvaluator(t *testing.T) { + defer leaktest.AfterTest(t)() + + leaf1 := &spanExpression{ + FactoredUnionSpans: []invertedSpan{{Start: []byte("a"), End: []byte("d")}}, + Operator: invertedexpr.None, + } + leaf2 := &spanExpression{ + FactoredUnionSpans: []invertedSpan{{Start: []byte("e"), End: []byte("h")}}, + Operator: invertedexpr.None, + } + l1Andl2 := &spanExpression{ + FactoredUnionSpans: []invertedSpan{ + {Start: []byte("i"), End: []byte("j")}, {Start: []byte("k"), End: []byte("n")}}, + Operator: invertedexpr.SetIntersection, + Left: leaf1, + Right: leaf2, + } + leaf3 := &spanExpression{ + FactoredUnionSpans: []invertedSpan{{Start: []byte("d"), End: []byte("f")}}, + Operator: invertedexpr.None, + } + leaf4 := &spanExpression{ + FactoredUnionSpans: []invertedSpan{{Start: []byte("a"), End: []byte("c")}}, + Operator: invertedexpr.None, + } + l3Andl4 := &spanExpression{ + FactoredUnionSpans: []invertedSpan{ + {Start: []byte("g"), End: []byte("m")}}, + Operator: invertedexpr.SetIntersection, + Left: leaf3, + Right: leaf4, + } + // In reality, the FactoredUnionSpans of l1Andl2 and l3Andl4 would be moved + // up to expr, by the factoring code in the invertedexpr package. But the + // evaluator does not care, and keeping them separate exercises more code. + exprUnion := &spanExpression{ + Operator: invertedexpr.SetUnion, + Left: l1Andl2, + Right: l3Andl4, + } + + exprIntersection := &spanExpression{ + Operator: invertedexpr.SetIntersection, + Left: l1Andl2, + Right: l3Andl4, + } + + expectedSpansAndSetIndex := "spans: [i, j) [k, n) setIndex: 1\nspans: [a, d) setIndex: 2\n" + + "spans: [e, h) setIndex: 3\nspans: [g, m) setIndex: 4\nspans: [d, f) setIndex: 5\n" + + "spans: [a, c) setIndex: 6\n" + + // Test the getSpansAndSetIndex() method on the invertedExprEvaluator + // directly. The rest of the methods we will only exercise through + // batchedInvertedExprEvaluator. + evalUnion := newInvertedExprEvaluator(exprUnion) + // Indexes are being assigned using a pre-order traversal. + require.Equal(t, expectedSpansAndSetIndex, + spansIndexToString(evalUnion.getSpansAndSetIndex())) + + evalIntersection := newInvertedExprEvaluator(exprIntersection) + require.Equal(t, expectedSpansAndSetIndex, + spansIndexToString(evalIntersection.getSpansAndSetIndex())) + + // The batchedInvertedExprEvaluators will construct their own + // invertedExprEvaluators. + protoUnion := invertedexpr.SpanExpressionProto{Node: *exprUnion} + batchEvalUnion := &batchedInvertedExprEvaluator{ + exprs: []*invertedexpr.SpanExpressionProto{&protoUnion, nil}, + } + protoIntersection := invertedexpr.SpanExpressionProto{Node: *exprIntersection} + batchEvalIntersection := &batchedInvertedExprEvaluator{ + exprs: []*invertedexpr.SpanExpressionProto{&protoIntersection, nil}, + } + expectedSpans := "[a, n) " + expectedFragmentedSpans := + "span: [a, c) indexes (expr, set): (0, 6) (0, 2) \n" + + "span: [c, d) indexes (expr, set): (0, 2) \n" + + "span: [d, e) indexes (expr, set): (0, 5) \n" + + "span: [e, f) indexes (expr, set): (0, 5) (0, 3) \n" + + "span: [f, g) indexes (expr, set): (0, 3) \n" + + "span: [g, h) indexes (expr, set): (0, 3) (0, 4) \n" + + "span: [h, i) indexes (expr, set): (0, 4) \n" + + "span: [i, j) indexes (expr, set): (0, 1) (0, 4) \n" + + "span: [j, k) indexes (expr, set): (0, 4) \n" + + "span: [k, m) indexes (expr, set): (0, 4) (0, 1) \n" + + "span: [m, n) indexes (expr, set): (0, 1) \n" + + require.Equal(t, expectedSpans, spansToString(batchEvalUnion.getSpans())) + require.Equal(t, expectedFragmentedSpans, + fragmentedSpansToString(batchEvalUnion.fragmentedSpans)) + require.Equal(t, expectedSpans, spansToString(batchEvalIntersection.getSpans())) + require.Equal(t, expectedFragmentedSpans, + fragmentedSpansToString(batchEvalIntersection.fragmentedSpans)) + + // Construct the test input. This is hand-constructed to exercise cases where + // intersections sometimes propagate an index across multiple levels. + // This input is not in order of the inverted key since the evaluator does + // not care. In fact, we randomize the input order to exercise the code. + indexRows := []keyAndIndex{{"ii", 0}, {"a", 1}, {"ee", 2}, + {"aa", 3}, {"ab", 4}, {"g", 4}, {"d", 3}, + {"mn", 5}, {"mp", 6}, {"gh", 6}, {"gi", 7}, + {"aaa", 8}, {"e", 8}, {"aaa", 1}} + // These expected values were hand-constructed by evaluating the expression over + // these inputs. + // (([a, d) ∩ [e, h)) U [i, j) U [k, n)) U + // (([d, f) ∩ [a, c)) U [g, m)) + expectedUnion := "0: 0 3 4 5 6 7 8 \n1: \n" + // (([a, d) ∩ [e, h)) U [i, j) U [k, n)) ∩ + // (([d, f) ∩ [a, c)) U [g, m)) + expectedIntersection := "0: 0 4 6 8 \n1: \n" + rand.Shuffle(len(indexRows), func(i, j int) { + indexRows[i], indexRows[j] = indexRows[j], indexRows[i] + }) + for _, elem := range indexRows { + batchEvalUnion.addIndexRow(invertedexpr.EncInvertedVal(elem.key), KeyIndex(elem.index)) + batchEvalIntersection.addIndexRow(invertedexpr.EncInvertedVal(elem.key), KeyIndex(elem.index)) + } + require.Equal(t, expectedUnion, keyIndexesToString(batchEvalUnion.evaluate())) + require.Equal(t, expectedIntersection, keyIndexesToString(batchEvalIntersection.evaluate())) + + // Now do both exprUnion and exprIntersection in a single batch. + batchBoth := batchEvalUnion + batchBoth.reset() + batchBoth.exprs = append(batchBoth.exprs, &protoUnion, &protoIntersection) + batchBoth.getSpans() + for _, elem := range indexRows { + batchBoth.addIndexRow(invertedexpr.EncInvertedVal(elem.key), KeyIndex(elem.index)) + } + require.Equal(t, "0: 0 3 4 5 6 7 8 \n1: 0 4 6 8 \n", + keyIndexesToString(batchBoth.evaluate())) + + // Reset and evaluate nil expressions. + batchBoth.reset() + batchBoth.exprs = append(batchBoth.exprs, nil, nil) + require.Equal(t, 0, len(batchBoth.getSpans())) + require.Equal(t, "0: \n1: \n", keyIndexesToString(batchBoth.evaluate())) +} + +// Test fragmentation for routing when multiple expressions in the batch have +// overlapping spans. +func TestFragmentedSpans(t *testing.T) { + defer leaktest.AfterTest(t)() + + expr1 := invertedexpr.SpanExpressionProto{ + Node: spanExpression{ + FactoredUnionSpans: []invertedSpan{{Start: []byte("a"), End: []byte("g")}}, + Operator: invertedexpr.None, + }, + } + expr2 := invertedexpr.SpanExpressionProto{ + Node: spanExpression{ + FactoredUnionSpans: []invertedSpan{{Start: []byte("d"), End: []byte("j")}}, + Operator: invertedexpr.None, + }, + } + expr3 := invertedexpr.SpanExpressionProto{ + Node: spanExpression{ + FactoredUnionSpans: []invertedSpan{ + {Start: []byte("e"), End: []byte("f")}, {Start: []byte("i"), End: []byte("l")}, + {Start: []byte("o"), End: []byte("p")}}, + Operator: invertedexpr.None, + }, + } + batchEval := &batchedInvertedExprEvaluator{ + exprs: []*invertedexpr.SpanExpressionProto{&expr1, &expr2, &expr3}, + } + require.Equal(t, "[a, l) [o, p) ", spansToString(batchEval.getSpans())) + require.Equal(t, + "span: [a, d) indexes (expr, set): (0, 0) \n"+ + "span: [d, e) indexes (expr, set): (0, 0) (1, 0) \n"+ + "span: [e, f) indexes (expr, set): (2, 0) (0, 0) (1, 0) \n"+ + "span: [f, g) indexes (expr, set): (0, 0) (1, 0) \n"+ + "span: [g, i) indexes (expr, set): (1, 0) \n"+ + "span: [i, j) indexes (expr, set): (1, 0) (2, 0) \n"+ + "span: [j, l) indexes (expr, set): (2, 0) \n"+ + "span: [o, p) indexes (expr, set): (2, 0) \n", + fragmentedSpansToString(batchEval.fragmentedSpans)) +} + +// TODO(sumeer): randomized inputs for union, intersection and expression evaluation.