From f626e512952972db9d798d988eebcf49debd3e16 Mon Sep 17 00:00:00 2001
From: sumeerbhola <sumeer@cockroachlabs.com>
Date: Mon, 18 May 2020 17:14:12 -0400
Subject: [PATCH] rowexec: add batchedInvertedExprEvaluator for inverted index
 execution

This is for evaluating invertedexpr.SpanExpressionProto, both for join
queries and single table filtering, using an inverted index.

More usage context can be found in #48019 which contains an invertedJoiner
that uses an earlier version of this evaluator.

Release note: None
---
 pkg/sql/rowexec/inverted_expr_evaluator.go    | 447 ++++++++++++++++++
 .../rowexec/inverted_expr_evaluator_test.go   | 316 +++++++++++++
 2 files changed, 763 insertions(+)
 create mode 100644 pkg/sql/rowexec/inverted_expr_evaluator.go
 create mode 100644 pkg/sql/rowexec/inverted_expr_evaluator_test.go

diff --git a/pkg/sql/rowexec/inverted_expr_evaluator.go b/pkg/sql/rowexec/inverted_expr_evaluator.go
new file mode 100644
index 000000000000..c911debafa8a
--- /dev/null
+++ b/pkg/sql/rowexec/inverted_expr_evaluator.go
@@ -0,0 +1,447 @@
+// Copyright 2020 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package rowexec
+
+import (
+	"bytes"
+	"sort"
+
+	"github.com/cockroachdb/cockroach/pkg/sql/opt/invertedexpr"
+)
+
+// The abstractions in this file help with evaluating (batches of)
+// invertedexpr.SpanExpression. The spans in a SpanExpression represent spans
+// of an inverted index, which consists of an inverted column followed by the
+// primary key of the table. The set expressions involve union and
+// intersection over operands. The operands are sets of primary keys contained
+// in the corresponding span. Callers should use batchedInvertedExprEvaluator.
+// This evaluator does not do the actual scan -- it is fed the set elements as
+// the inverted index is scanned, and routes a set element to all the sets to
+// which it belongs (since spans can be overlapping). Once the scan is
+// complete, the expressions are evaluated.
+
+// KeyIndex is used as a set element. It is already de-duped.
+type KeyIndex int
+
+// setContainer is a set of key indexes in increasing order.
+type setContainer []KeyIndex
+
+func (s setContainer) Len() int {
+	return len(s)
+}
+
+func (s setContainer) Less(i, j int) bool {
+	return s[i] < s[j]
+}
+
+func (s setContainer) Swap(i, j int) {
+	s[i], s[j] = s[j], s[i]
+}
+
+func unionSetContainers(a, b setContainer) setContainer {
+	if len(a) == 0 {
+		return b
+	}
+	if len(b) == 0 {
+		return a
+	}
+	var out setContainer
+	var i, j int
+	for i < len(a) && j < len(b) {
+		if a[i] < b[j] {
+			out = append(out, a[i])
+			i++
+		} else if a[i] > b[j] {
+			out = append(out, b[j])
+			j++
+		} else {
+			out = append(out, a[i])
+			i++
+			j++
+		}
+	}
+	for ; i < len(a); i++ {
+		out = append(out, a[i])
+	}
+	for ; j < len(b); j++ {
+		out = append(out, b[j])
+	}
+	return out
+}
+
+func intersectSetContainers(a, b setContainer) setContainer {
+	var out setContainer
+	var i, j int
+	// TODO(sumeer): when one set is much larger than the other
+	// it is more efficient to iterate over the smaller set
+	// and seek into the larger set.
+	for i < len(a) && j < len(b) {
+		if a[i] < b[j] {
+			i++
+		} else if a[i] > b[j] {
+			j++
+		} else {
+			out = append(out, a[i])
+			i++
+			j++
+		}
+	}
+	return out
+}
+
+// setExpression follows the structure of SpanExpression.
+type setExpression struct {
+	op invertedexpr.SetOperator
+	// The index in invertedExprEvaluator.sets
+	unionSetIndex int
+	left          *setExpression
+	right         *setExpression
+}
+
+type invertedSpan = invertedexpr.SpanExpressionProto_Span
+type spanExpression = invertedexpr.SpanExpressionProto_Node
+
+// The spans in a SpanExpression.FactoredUnionSpans and the corresponding index
+// in invertedExprEvaluator.sets. Only populated when FactoredUnionsSpans is
+// non-empty.
+type spansAndSetIndex struct {
+	spans    []invertedSpan
+	setIndex int
+}
+
+// invertedExprEvaluator evaluates a single expression. It should not be directly
+// used -- see batchedInvertedExprEvaluator.
+type invertedExprEvaluator struct {
+	setExpr *setExpression
+	// These are initially populated by calls to addIndexRow() as
+	// the inverted index is scanned.
+	sets []setContainer
+
+	spansIndex []spansAndSetIndex
+}
+
+func newInvertedExprEvaluator(expr *spanExpression) *invertedExprEvaluator {
+	eval := &invertedExprEvaluator{}
+	eval.setExpr = eval.initSetExpr(expr)
+	return eval
+}
+
+func (ev *invertedExprEvaluator) initSetExpr(expr *spanExpression) *setExpression {
+	// Assign it an index even if FactoredUnionSpans is empty, since we will
+	// need it when evaluating.
+	i := len(ev.sets)
+	ev.sets = append(ev.sets, nil)
+	sx := &setExpression{op: expr.Operator, unionSetIndex: i}
+	if len(expr.FactoredUnionSpans) > 0 {
+		ev.spansIndex = append(ev.spansIndex,
+			spansAndSetIndex{spans: expr.FactoredUnionSpans, setIndex: i})
+	}
+	if expr.Left != nil {
+		sx.left = ev.initSetExpr(expr.Left)
+	}
+	if expr.Right != nil {
+		sx.right = ev.initSetExpr(expr.Right)
+	}
+	return sx
+}
+
+// getSpansAndSetIndex returns the spans and corresponding set indexes for
+// this expression. The spans are not in sorted order and can be overlapping.
+func (ev *invertedExprEvaluator) getSpansAndSetIndex() []spansAndSetIndex {
+	return ev.spansIndex
+}
+
+// Adds a row to the given set. KeyIndexes are not added in increasing order,
+// nor do they represent any ordering of the primary key of the table whose
+// inverted index is being read. Also, the same KeyIndex could be added
+// repeatedly to a set.
+func (ev *invertedExprEvaluator) addIndexRow(setIndex int, keyIndex KeyIndex) {
+	// If duplicates in a set become a memory problem in this build phase, we
+	// could do periodic de-duplication as we go. For now, we simply append to
+	// the slice and de-dup at the start of evaluate().
+	ev.sets[setIndex] = append(ev.sets[setIndex], keyIndex)
+}
+
+// Evaluates the expression. The return value is in increasing order
+// of KeyIndex.
+func (ev *invertedExprEvaluator) evaluate() []KeyIndex {
+	// Sort and de-dup the sets so that we can efficiently do set operations.
+	for i, c := range ev.sets {
+		if len(c) == 0 {
+			continue
+		}
+		sort.Sort(c)
+		// De-duplicate
+		set := c[:0]
+		for j := range c {
+			if len(set) > 0 && c[j] == set[len(set)-1] {
+				continue
+			}
+			set = append(set, c[j])
+		}
+		ev.sets[i] = set
+	}
+	return ev.evaluateSetExpr(ev.setExpr)
+}
+
+func (ev *invertedExprEvaluator) evaluateSetExpr(sx *setExpression) setContainer {
+	var left, right setContainer
+	if sx.left != nil {
+		left = ev.evaluateSetExpr(sx.left)
+	}
+	if sx.right != nil {
+		right = ev.evaluateSetExpr(sx.right)
+	}
+	var childrenSet setContainer
+	switch sx.op {
+	case invertedexpr.SetUnion:
+		childrenSet = unionSetContainers(left, right)
+	case invertedexpr.SetIntersection:
+		childrenSet = intersectSetContainers(left, right)
+	}
+	return unionSetContainers(ev.sets[sx.unionSetIndex], childrenSet)
+}
+
+// Supporting struct for invertedSpanRoutingInfo.
+type exprAndSetIndex struct {
+	// An index into batchedInvertedExprEvaluator.exprEvals.
+	exprIndex int
+	// An index into batchedInvertedExprEvaluator.exprEvals[exprIndex].sets.
+	setIndex int
+}
+
+// invertedSpanRoutingInfo contains the list of exprAndSetIndex pairs that
+// need rows from the inverted index span. A []invertedSpanRoutingInfo with
+// spans that are sorted and non-overlapping is used to route an added row to
+// all the expressions and sets that need that row.
+type invertedSpanRoutingInfo struct {
+	span                invertedSpan
+	exprAndSetIndexList []exprAndSetIndex
+}
+
+// batchedInvertedExprEvaluator is for evaluating one or more expressions. The
+// batched evaluator can be reused by calling reset(). In the build phase,
+// append expressions directly to exprs. A nil expression is permitted, and is
+// just a placeholder that will result in a nil []KeyIndex in evaluate().
+// getSpans() must be called before calls to addIndexRow() even if the result
+// of getSpans() is not needed -- it builds the fragmentedSpans used for
+// routing the added rows.
+type batchedInvertedExprEvaluator struct {
+	exprs []*invertedexpr.SpanExpressionProto
+	// The evaluators for all the exprs.
+	exprEvals []*invertedExprEvaluator
+	// Spans here are in sorted order and non-overlapping.
+	fragmentedSpans []invertedSpanRoutingInfo
+
+	// Temporary state used for constructing fragmentedSpans. All spans here
+	// have the same start key. They are not sorted by end key.
+	pendingSpans []invertedSpanRoutingInfo
+}
+
+// Helper used in building fragmentedSpans using pendingSpans. pendingSpans
+// contains spans with the same start key. This fragments and removes all
+// spans up to end key fragmentUntil (or all spans if fragmentUntil == nil).
+//
+// Example 1:
+// pendingSpans contains
+//    c---g
+//    c-----i
+//    c--e
+//
+// And fragmentUntil = i. Since end keys are exclusive we can fragment and
+// remove all spans in pendingSpans. These will be:
+//    c-e-g
+//    c-e-g-i
+//    c-e
+//
+// For the c-e span, all the exprAndSetIndexList slices for these spans are
+// appended since any row in that span needs to be routed to all these
+// expressions and sets. For the e-g span only the exprAndSetIndexList slices
+// for the top two spans are unioned.
+//
+// Example 2:
+//
+// Same pendingSpans, and fragmentUntil = f. The fragments that are generated
+// for fragmentedSpans and the remaining spans in pendingSpans are:
+//
+//    fragments        remaining
+//    c-e-f            f-g
+//    c-e-f            f-i
+//    c-e
+func (b *batchedInvertedExprEvaluator) fragmentPendingSpans(
+	fragmentUntil invertedexpr.EncInvertedVal,
+) {
+	// The start keys are the same, so this only sorts in increasing
+	// order of end keys.
+	sort.Slice(b.pendingSpans, func(i, j int) bool {
+		return bytes.Compare(b.pendingSpans[i].span.End, b.pendingSpans[j].span.End) < 0
+	})
+	for len(b.pendingSpans) > 0 {
+		if fragmentUntil != nil && bytes.Compare(fragmentUntil, b.pendingSpans[0].span.Start) <= 0 {
+			break
+		}
+		// The prefix of pendingSpans that will be completely consumed when
+		// the next fragment is constructed.
+		var removeSize int
+		// The end of the next fragment.
+		var end invertedexpr.EncInvertedVal
+		// The start of the fragment after the next fragment.
+		var nextStart invertedexpr.EncInvertedVal
+		if fragmentUntil != nil && bytes.Compare(fragmentUntil, b.pendingSpans[0].span.End) < 0 {
+			// Can't completely remove any spans from pendingSpans, but a prefix
+			// of these spans will be removed
+			removeSize = 0
+			end = fragmentUntil
+			nextStart = end
+		} else {
+			// We can remove all spans whose end key is the same as span[0].
+			// The end of span[0] is also the end key of this fragment.
+			removeSize = b.pendingLenWithSameEnd()
+			end = b.pendingSpans[0].span.End
+			nextStart = end
+		}
+		// The next span to be added to fragmentedSpans.
+		nextSpan := invertedSpanRoutingInfo{
+			span: invertedSpan{
+				Start: b.pendingSpans[0].span.Start,
+				End:   end,
+			},
+		}
+		for i := 0; i < len(b.pendingSpans); i++ {
+			if i >= removeSize {
+				// This span is not completely removed so adjust its start.
+				b.pendingSpans[i].span.Start = nextStart
+			}
+			// All spans in pendingSpans contribute to exprAndSetIndexList.
+			nextSpan.exprAndSetIndexList =
+				append(nextSpan.exprAndSetIndexList, b.pendingSpans[i].exprAndSetIndexList...)
+		}
+		b.fragmentedSpans = append(b.fragmentedSpans, nextSpan)
+		b.pendingSpans = b.pendingSpans[removeSize:]
+		if removeSize == 0 {
+			// fragmentUntil was earlier than the smallest End key in the pending
+			// spans, so cannot fragment any more.
+			break
+		}
+	}
+}
+
+func (b *batchedInvertedExprEvaluator) pendingLenWithSameEnd() int {
+	length := 1
+	for i := 1; i < len(b.pendingSpans); i++ {
+		if !bytes.Equal(b.pendingSpans[0].span.End, b.pendingSpans[i].span.End) {
+			break
+		}
+		length++
+	}
+	return length
+}
+
+// getSpans fragments the spans for later routing of rows and returns spans
+// representing a union of all the spans (for executing the scan).
+func (b *batchedInvertedExprEvaluator) getSpans() []invertedSpan {
+	if cap(b.exprEvals) < len(b.exprs) {
+		b.exprEvals = make([]*invertedExprEvaluator, len(b.exprs))
+	} else {
+		b.exprEvals = b.exprEvals[:len(b.exprs)]
+	}
+	// Initial spans fetched from all expressions.
+	var routingSpans []invertedSpanRoutingInfo
+	for i, expr := range b.exprs {
+		if expr == nil {
+			b.exprEvals[i] = nil
+			continue
+		}
+		b.exprEvals[i] = newInvertedExprEvaluator(&expr.Node)
+		exprSpans := b.exprEvals[i].getSpansAndSetIndex()
+		for _, spans := range exprSpans {
+			for _, span := range spans.spans {
+				routingSpans = append(routingSpans,
+					invertedSpanRoutingInfo{
+						span:                span,
+						exprAndSetIndexList: []exprAndSetIndex{{exprIndex: i, setIndex: spans.setIndex}},
+					},
+				)
+			}
+		}
+	}
+	if len(routingSpans) == 0 {
+		return nil
+	}
+
+	// Sort the routingSpans in increasing order of start key, and for equal
+	// start keys in increasing order of end key.
+	sort.Slice(routingSpans, func(i, j int) bool {
+		cmp := bytes.Compare(routingSpans[i].span.Start, routingSpans[j].span.Start)
+		if cmp == 0 {
+			cmp = bytes.Compare(routingSpans[i].span.End, routingSpans[j].span.End)
+		}
+		return cmp < 0
+	})
+
+	// The union of the spans, which is returned from this function.
+	var coveringSpans []invertedSpan
+	currentCoveringSpan := routingSpans[0].span
+	b.pendingSpans = append(b.pendingSpans, routingSpans[0])
+	// This loop does both the union of the routingSpans and fragments the
+	// routingSpans.
+	for i := 1; i < len(routingSpans); i++ {
+		span := routingSpans[i]
+		if bytes.Compare(b.pendingSpans[0].span.Start, span.span.Start) < 0 {
+			b.fragmentPendingSpans(span.span.Start)
+			if bytes.Compare(currentCoveringSpan.End, span.span.Start) < 0 {
+				coveringSpans = append(coveringSpans, currentCoveringSpan)
+				currentCoveringSpan = span.span
+			} else if bytes.Compare(currentCoveringSpan.End, span.span.End) < 0 {
+				currentCoveringSpan.End = span.span.End
+			}
+		} else if bytes.Compare(currentCoveringSpan.End, span.span.End) < 0 {
+			currentCoveringSpan.End = span.span.End
+		}
+		// Add this span to the pending list.
+		b.pendingSpans = append(b.pendingSpans, span)
+	}
+	b.fragmentPendingSpans(nil)
+	coveringSpans = append(coveringSpans, currentCoveringSpan)
+	return coveringSpans
+}
+
+// TODO(sumeer): if this will be called in non-decreasing order of enc,
+// use that to optimize the binary search.
+func (b *batchedInvertedExprEvaluator) addIndexRow(
+	enc invertedexpr.EncInvertedVal, keyIndex KeyIndex,
+) {
+	i := sort.Search(len(b.fragmentedSpans), func(i int) bool {
+		return bytes.Compare(b.fragmentedSpans[i].span.Start, enc) > 0
+	})
+	i--
+	for _, elem := range b.fragmentedSpans[i].exprAndSetIndexList {
+		b.exprEvals[elem.exprIndex].addIndexRow(elem.setIndex, keyIndex)
+	}
+}
+
+func (b *batchedInvertedExprEvaluator) evaluate() [][]KeyIndex {
+	result := make([][]KeyIndex, len(b.exprs))
+	for i := range b.exprEvals {
+		if b.exprEvals[i] == nil {
+			continue
+		}
+		result[i] = b.exprEvals[i].evaluate()
+	}
+	return result
+}
+
+func (b *batchedInvertedExprEvaluator) reset() {
+	b.exprs = b.exprs[:0]
+	b.exprEvals = b.exprEvals[:0]
+	b.fragmentedSpans = b.fragmentedSpans[:0]
+	b.pendingSpans = b.pendingSpans[:0]
+}
diff --git a/pkg/sql/rowexec/inverted_expr_evaluator_test.go b/pkg/sql/rowexec/inverted_expr_evaluator_test.go
new file mode 100644
index 000000000000..33728c6e1172
--- /dev/null
+++ b/pkg/sql/rowexec/inverted_expr_evaluator_test.go
@@ -0,0 +1,316 @@
+// Copyright 2020 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package rowexec
+
+import (
+	"fmt"
+	"math/rand"
+	"strings"
+	"testing"
+
+	"github.com/cockroachdb/cockroach/pkg/sql/opt/invertedexpr"
+	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
+	"github.com/stretchr/testify/require"
+)
+
+func setToString(set []KeyIndex) string {
+	var b strings.Builder
+	for i, elem := range set {
+		sep := ","
+		if i == len(set)-1 {
+			sep = ""
+		}
+		fmt.Fprintf(&b, "%d%s", elem, sep)
+	}
+	return b.String()
+}
+
+func writeSpan(b *strings.Builder, span invertedSpan) {
+	fmt.Fprintf(b, "[%s, %s) ", span.Start, span.End)
+}
+
+func spansToString(spans []invertedSpan) string {
+	var b strings.Builder
+	for _, elem := range spans {
+		writeSpan(&b, elem)
+	}
+	return b.String()
+}
+
+func spansIndexToString(spans []spansAndSetIndex) string {
+	var b strings.Builder
+	for _, elem := range spans {
+		b.WriteString("spans: ")
+		for _, s := range elem.spans {
+			writeSpan(&b, s)
+		}
+		fmt.Fprintf(&b, " setIndex: %d\n", elem.setIndex)
+	}
+	return b.String()
+}
+
+func fragmentedSpansToString(spans []invertedSpanRoutingInfo) string {
+	var b strings.Builder
+	for _, elem := range spans {
+		b.WriteString("span: ")
+		writeSpan(&b, elem.span)
+		b.WriteString(" indexes (expr, set): ")
+		for _, indexes := range elem.exprAndSetIndexList {
+			fmt.Fprintf(&b, "(%d, %d) ", indexes.exprIndex, indexes.setIndex)
+		}
+		b.WriteString("\n")
+	}
+	return b.String()
+}
+
+func keyIndexesToString(indexes [][]KeyIndex) string {
+	var b strings.Builder
+	for i, elem := range indexes {
+		fmt.Fprintf(&b, "%d: ", i)
+		for _, index := range elem {
+			fmt.Fprintf(&b, "%d ", index)
+		}
+		b.WriteString("\n")
+	}
+	return b.String()
+}
+
+func TestSetContainerUnion(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+
+	type testCase struct {
+		a        setContainer
+		b        setContainer
+		expected setContainer
+	}
+	cases := []testCase{
+		{a: nil, b: nil, expected: nil},
+		{a: []KeyIndex{5}, b: nil, expected: []KeyIndex{5}},
+		{a: []KeyIndex{5}, b: []KeyIndex{2, 12}, expected: []KeyIndex{2, 5, 12}},
+		{a: []KeyIndex{2, 5}, b: []KeyIndex{12}, expected: []KeyIndex{2, 5, 12}},
+		{a: []KeyIndex{2}, b: []KeyIndex{5, 12}, expected: []KeyIndex{2, 5, 12}},
+		{a: []KeyIndex{2, 12}, b: []KeyIndex{2, 5}, expected: []KeyIndex{2, 5, 12}},
+		{a: []KeyIndex{2, 5}, b: []KeyIndex{5, 12}, expected: []KeyIndex{2, 5, 12}},
+	}
+	for _, c := range cases {
+		require.Equal(t, setToString(c.expected), setToString(unionSetContainers(c.a, c.b)))
+	}
+}
+
+func TestSetContainerIntersection(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+
+	type testCase struct {
+		a        setContainer
+		b        setContainer
+		expected setContainer
+	}
+	cases := []testCase{
+		{a: nil, b: nil, expected: nil},
+		{a: []KeyIndex{5}, b: nil, expected: nil},
+		{a: []KeyIndex{5}, b: []KeyIndex{2, 12}, expected: nil},
+		{a: []KeyIndex{2, 12}, b: []KeyIndex{2, 5}, expected: []KeyIndex{2}},
+		{a: []KeyIndex{2, 5}, b: []KeyIndex{5, 12}, expected: []KeyIndex{5}},
+		{a: []KeyIndex{2, 5, 17, 25, 30}, b: []KeyIndex{2, 12, 13, 17, 23, 30},
+			expected: []KeyIndex{2, 17, 30}},
+	}
+	for _, c := range cases {
+		require.Equal(t, setToString(c.expected), setToString(intersectSetContainers(c.a, c.b)))
+	}
+}
+
+type keyAndIndex struct {
+	key   string
+	index int
+}
+
+// Tests both invertedExprEvaluator and batchedInvertedExprEvaluator.
+func TestInvertedExpressionEvaluator(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+
+	leaf1 := &spanExpression{
+		FactoredUnionSpans: []invertedSpan{{Start: []byte("a"), End: []byte("d")}},
+		Operator:           invertedexpr.None,
+	}
+	leaf2 := &spanExpression{
+		FactoredUnionSpans: []invertedSpan{{Start: []byte("e"), End: []byte("h")}},
+		Operator:           invertedexpr.None,
+	}
+	l1Andl2 := &spanExpression{
+		FactoredUnionSpans: []invertedSpan{
+			{Start: []byte("i"), End: []byte("j")}, {Start: []byte("k"), End: []byte("n")}},
+		Operator: invertedexpr.SetIntersection,
+		Left:     leaf1,
+		Right:    leaf2,
+	}
+	leaf3 := &spanExpression{
+		FactoredUnionSpans: []invertedSpan{{Start: []byte("d"), End: []byte("f")}},
+		Operator:           invertedexpr.None,
+	}
+	leaf4 := &spanExpression{
+		FactoredUnionSpans: []invertedSpan{{Start: []byte("a"), End: []byte("c")}},
+		Operator:           invertedexpr.None,
+	}
+	l3Andl4 := &spanExpression{
+		FactoredUnionSpans: []invertedSpan{
+			{Start: []byte("g"), End: []byte("m")}},
+		Operator: invertedexpr.SetIntersection,
+		Left:     leaf3,
+		Right:    leaf4,
+	}
+	// In reality, the FactoredUnionSpans of l1Andl2 and l3Andl4 would be moved
+	// up to expr, by the factoring code in the invertedexpr package. But the
+	// evaluator does not care, and keeping them separate exercises more code.
+	exprUnion := &spanExpression{
+		Operator: invertedexpr.SetUnion,
+		Left:     l1Andl2,
+		Right:    l3Andl4,
+	}
+
+	exprIntersection := &spanExpression{
+		Operator: invertedexpr.SetIntersection,
+		Left:     l1Andl2,
+		Right:    l3Andl4,
+	}
+
+	expectedSpansAndSetIndex := "spans: [i, j) [k, n)  setIndex: 1\nspans: [a, d)  setIndex: 2\n" +
+		"spans: [e, h)  setIndex: 3\nspans: [g, m)  setIndex: 4\nspans: [d, f)  setIndex: 5\n" +
+		"spans: [a, c)  setIndex: 6\n"
+
+	// Test the getSpansAndSetIndex() method on the invertedExprEvaluator
+	// directly. The rest of the methods we will only exercise through
+	// batchedInvertedExprEvaluator.
+	evalUnion := newInvertedExprEvaluator(exprUnion)
+	// Indexes are being assigned using a pre-order traversal.
+	require.Equal(t, expectedSpansAndSetIndex,
+		spansIndexToString(evalUnion.getSpansAndSetIndex()))
+
+	evalIntersection := newInvertedExprEvaluator(exprIntersection)
+	require.Equal(t, expectedSpansAndSetIndex,
+		spansIndexToString(evalIntersection.getSpansAndSetIndex()))
+
+	// The batchedInvertedExprEvaluators will construct their own
+	// invertedExprEvaluators.
+	protoUnion := invertedexpr.SpanExpressionProto{Node: *exprUnion}
+	batchEvalUnion := &batchedInvertedExprEvaluator{
+		exprs: []*invertedexpr.SpanExpressionProto{&protoUnion, nil},
+	}
+	protoIntersection := invertedexpr.SpanExpressionProto{Node: *exprIntersection}
+	batchEvalIntersection := &batchedInvertedExprEvaluator{
+		exprs: []*invertedexpr.SpanExpressionProto{&protoIntersection, nil},
+	}
+	expectedSpans := "[a, n) "
+	expectedFragmentedSpans :=
+		"span: [a, c)  indexes (expr, set): (0, 6) (0, 2) \n" +
+			"span: [c, d)  indexes (expr, set): (0, 2) \n" +
+			"span: [d, e)  indexes (expr, set): (0, 5) \n" +
+			"span: [e, f)  indexes (expr, set): (0, 5) (0, 3) \n" +
+			"span: [f, g)  indexes (expr, set): (0, 3) \n" +
+			"span: [g, h)  indexes (expr, set): (0, 3) (0, 4) \n" +
+			"span: [h, i)  indexes (expr, set): (0, 4) \n" +
+			"span: [i, j)  indexes (expr, set): (0, 1) (0, 4) \n" +
+			"span: [j, k)  indexes (expr, set): (0, 4) \n" +
+			"span: [k, m)  indexes (expr, set): (0, 4) (0, 1) \n" +
+			"span: [m, n)  indexes (expr, set): (0, 1) \n"
+
+	require.Equal(t, expectedSpans, spansToString(batchEvalUnion.getSpans()))
+	require.Equal(t, expectedFragmentedSpans,
+		fragmentedSpansToString(batchEvalUnion.fragmentedSpans))
+	require.Equal(t, expectedSpans, spansToString(batchEvalIntersection.getSpans()))
+	require.Equal(t, expectedFragmentedSpans,
+		fragmentedSpansToString(batchEvalIntersection.fragmentedSpans))
+
+	// Construct the test input. This is hand-constructed to exercise cases where
+	// intersections sometimes propagate an index across multiple levels.
+	// This input is not in order of the inverted key since the evaluator does
+	// not care. In fact, we randomize the input order to exercise the code.
+	indexRows := []keyAndIndex{{"ii", 0}, {"a", 1}, {"ee", 2},
+		{"aa", 3}, {"ab", 4}, {"g", 4}, {"d", 3},
+		{"mn", 5}, {"mp", 6}, {"gh", 6}, {"gi", 7},
+		{"aaa", 8}, {"e", 8}, {"aaa", 1}}
+	// These expected values were hand-constructed by evaluating the expression over
+	// these inputs.
+	// (([a, d) ∩ [e, h)) U [i, j) U [k, n)) U
+	// (([d, f) ∩ [a, c)) U [g, m))
+	expectedUnion := "0: 0 3 4 5 6 7 8 \n1: \n"
+	// (([a, d) ∩ [e, h)) U [i, j) U [k, n)) ∩
+	// (([d, f) ∩ [a, c)) U [g, m))
+	expectedIntersection := "0: 0 4 6 8 \n1: \n"
+	rand.Shuffle(len(indexRows), func(i, j int) {
+		indexRows[i], indexRows[j] = indexRows[j], indexRows[i]
+	})
+	for _, elem := range indexRows {
+		batchEvalUnion.addIndexRow(invertedexpr.EncInvertedVal(elem.key), KeyIndex(elem.index))
+		batchEvalIntersection.addIndexRow(invertedexpr.EncInvertedVal(elem.key), KeyIndex(elem.index))
+	}
+	require.Equal(t, expectedUnion, keyIndexesToString(batchEvalUnion.evaluate()))
+	require.Equal(t, expectedIntersection, keyIndexesToString(batchEvalIntersection.evaluate()))
+
+	// Now do both exprUnion and exprIntersection in a single batch.
+	batchBoth := batchEvalUnion
+	batchBoth.reset()
+	batchBoth.exprs = append(batchBoth.exprs, &protoUnion, &protoIntersection)
+	batchBoth.getSpans()
+	for _, elem := range indexRows {
+		batchBoth.addIndexRow(invertedexpr.EncInvertedVal(elem.key), KeyIndex(elem.index))
+	}
+	require.Equal(t, "0: 0 3 4 5 6 7 8 \n1: 0 4 6 8 \n",
+		keyIndexesToString(batchBoth.evaluate()))
+
+	// Reset and evaluate nil expressions.
+	batchBoth.reset()
+	batchBoth.exprs = append(batchBoth.exprs, nil, nil)
+	require.Equal(t, 0, len(batchBoth.getSpans()))
+	require.Equal(t, "0: \n1: \n", keyIndexesToString(batchBoth.evaluate()))
+}
+
+// Test fragmentation for routing when multiple expressions in the batch have
+// overlapping spans.
+func TestFragmentedSpans(t *testing.T) {
+	defer leaktest.AfterTest(t)()
+
+	expr1 := invertedexpr.SpanExpressionProto{
+		Node: spanExpression{
+			FactoredUnionSpans: []invertedSpan{{Start: []byte("a"), End: []byte("g")}},
+			Operator:           invertedexpr.None,
+		},
+	}
+	expr2 := invertedexpr.SpanExpressionProto{
+		Node: spanExpression{
+			FactoredUnionSpans: []invertedSpan{{Start: []byte("d"), End: []byte("j")}},
+			Operator:           invertedexpr.None,
+		},
+	}
+	expr3 := invertedexpr.SpanExpressionProto{
+		Node: spanExpression{
+			FactoredUnionSpans: []invertedSpan{
+				{Start: []byte("e"), End: []byte("f")}, {Start: []byte("i"), End: []byte("l")},
+				{Start: []byte("o"), End: []byte("p")}},
+			Operator: invertedexpr.None,
+		},
+	}
+	batchEval := &batchedInvertedExprEvaluator{
+		exprs: []*invertedexpr.SpanExpressionProto{&expr1, &expr2, &expr3},
+	}
+	require.Equal(t, "[a, l) [o, p) ", spansToString(batchEval.getSpans()))
+	require.Equal(t,
+		"span: [a, d)  indexes (expr, set): (0, 0) \n"+
+			"span: [d, e)  indexes (expr, set): (0, 0) (1, 0) \n"+
+			"span: [e, f)  indexes (expr, set): (2, 0) (0, 0) (1, 0) \n"+
+			"span: [f, g)  indexes (expr, set): (0, 0) (1, 0) \n"+
+			"span: [g, i)  indexes (expr, set): (1, 0) \n"+
+			"span: [i, j)  indexes (expr, set): (1, 0) (2, 0) \n"+
+			"span: [j, l)  indexes (expr, set): (2, 0) \n"+
+			"span: [o, p)  indexes (expr, set): (2, 0) \n",
+		fragmentedSpansToString(batchEval.fragmentedSpans))
+}
+
+// TODO(sumeer): randomized inputs for union, intersection and expression evaluation.