diff --git a/pkg/sql/opt/invertedexpr/expression.go b/pkg/sql/opt/invertedexpr/expression.go new file mode 100644 index 000000000000..c26cb3438f1e --- /dev/null +++ b/pkg/sql/opt/invertedexpr/expression.go @@ -0,0 +1,997 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package invertedexpr + +import ( + "bytes" + "fmt" + "strconv" + "strings" + + "github.com/cockroachdb/cockroach/pkg/roachpb" +) + +// EncInvertedVal is the encoded form of a value in the inverted column. +// This library does not care about how the value is encoded. The following +// encoding comment is only relevant for integration purposes, and to justify +// the use of an encoded form. +// +// If the inverted column stores an encoded datum, the encoding is +// DatumEncoding_ASCENDING_KEY, and is performed using +// EncodeTableKey(nil /* prefix */, val tree.Datum, DatumEncoding_ASCENDING_KEY). +// It is used to represent spans of the inverted column. +// +// It would be ideal if the inverted column only contained Datums, since we +// could then work with a Datum here. However, JSON breaks that approach: +// - JSON inverted columns use a custom encoding that uses a special byte +// jsonInvertedIndex, followed by the bytes produced by the various +// implementations of the encodeInvertedIndexKey() method in the JSON +// interface. This could be worked around by using a JSON datum that +// represents a single path as the start key of the span, and representing +// [start, start] spans. We would special case the encoding logic to +// recognize that it is dealing with JSON (we have similar special path code +// for JSON elsewhere). But this is insufficient (next bullet). +// - Expressions like x ? 'b' don't have operands that are JSON, but can be +// represented using a span on the inverted column. +// +// So we make it the job of the caller of this library to encode the inverted +// column. Note that the second bullet above has some similarities with the +// behavior in makeStringPrefixSpan(), except there we can represent the start +// and end keys using the string type. +type EncInvertedVal []byte + +// High-level context: +// +// 1. Semantics of inverted index spans and effect on union and intersection +// +// Unlike spans of a normal index (e.g. the spans in the constraints package), +// the spans of the inverted index cannot be immediately "evaluated" since +// they represent sets of primary keys that we won't know about until we do +// the scan. Using a simple example: [a, d) \intersection [c, f) is not [c, d) +// since the same primary key K could be found under a and f and be part of +// the result. More precisely, the above expression can be simplified to: [c, +// d) \union ([a, c) \intersection [d, f)) +// +// For regular indexes, since each primary key is indexed in one row of the +// index, we can be sure that the same primary key will not appear in both of +// the non-overlapping spans [a, c) and [d, f), so we can immediately throw +// that part away knowing that it is the empty set. This discarding is not +// possible with inverted indexes, though the factoring can be useful for +// speed of execution (it does not limit what we need to scan) and for +// selectivity estimation when making optimizer choices. +// +// One could try to construct a general library that handles both the +// cases handled in the constraints package and here, but the complexity seems +// high. Instead, this package is more general than constraints in a few ways +// but simplifies most other things (so overall much simpler): +// - All the inverted spans are either [start, end) or [start, start]. +// - It handles spans only on the inverted column, with a way to plug-in spans +// generated for the PK columns. For more discussion on multi-column +// constraints for inverted indexes, see the long comment at the end of the +// file. +// +// 2. Representing a canonical "inverted expression" +// +// This package represents a canonical form for all inverted expressions -- it +// is more than the description of a scan. The evaluation machinery will +// evaluate this expression over an inverted index. The support to build that +// canonical form expression is independent of how the original expression is +// represented: instead of taking an opt.Expr parameter and traversing it +// itself, this library assumes the caller is doing a traversal. This is +// partly because the representation of the original expression for the single +// table scan case and the invertedJoiner case are not the same: the latter +// starts with an expression with two unspecified rows, and after the left +// side row is bound (partial application), this library needs to be used to +// construct the InvertedExpression. +// +// TODO(sumeer): work out how this will change when we have partitioned +// inverted indexes, where some columns of the primary key will appear before +// the inverted column. + +// InvertedSpan is a span of the inverted index. +type InvertedSpan struct { + // [start, end) iff end != nil, else represents [start, start]. + start, end EncInvertedVal +} + +// SetOperator is an operator on sets. +type SetOperator int + +const ( + // None is used in a SpanExpression with no children. + None SetOperator = iota + // SetUnion unions the children. + SetUnion + // SetIntersection intersects the children. + SetIntersection +) + +// InvertedExpression is the interface representing an expression or sub-expression +// to be evaluated on the inverted index. Any implementation can be used in the +// builder functions And() and Or(), but in practice there are two useful +// implementations provided here: +// - SpanExpression: this is the normal expression representing unions and +// intersections over spans of the inverted index. A SpanExpression is the +// root of an expression tree containing other SpanExpressions (there is one +// exception when a SpanExpression tree can contain non-SpanExpressions, +// discussed below for Joins). +// - NonInvertedColExpression: this is a marker expression representing the universal +// span, due to it being an expression on the non inverted column. This only appears in +// expression trees with a single node, since Anding with such an expression simply +// changes the tightness to false and Oring with this expression replaces the +// other expression with a NonInvertedColExpression. +// +// Optimizer cost estimation +// +// There are two cases: +// - Single table expression: after generating the InvertedExpression, the +// optimizer will check that it is a *SpanExpression -- if not, it is a +// NonInvertedColExpression, which implies a full inverted index scan, and +// it is definitely not worth using the inverted index. There are two costs for +// using the inverted index: +// - The scan cost: this should be estimated by using SpanExpression.SpansToRead. +// - The cardinality of the output set after evaluating the expression: this +// requires a traversal of the expression to assign cardinality to the +// spans in each FactoredUnionSpans (this could be done using a mean, +// or using histograms). The cardinality of a SpanExpression is the +// cardinality of the union of its FactoredUnionSpans and the intersection +// of its left and right expressions. If the cardinality of the original +// table is C (i.e., the number of primary keys), and we have two subsets +// of cardinality C1 and C2, we can assume that each set itself is a +// drawing without replacement from the original table. This can be +// used to derive the expected cardinality of the union of the two sets +// and the intersection of the two sets. +// +// - Join expression: Assigning a cost is hard since there are two +// parameters, corresponding to the left and right columns. In some cases, +// like Geospatial, the expression that could be generated is a black-box to +// the optimizer since the quad-tree traversal is unknown until partial +// application (when one of the parameters is known). Minimally, we do need to +// know whether the user expression is going to cause a full inverted index +// scan due to parts of the expression referring to non-inverted columns. +// The optimizer will provide its own placeholder implementation of +// InvertedExpression into which it can embed whatever information it wants. +// Let's call this the UnknownExpression -- it will only exist at the +// leaves of the expression tree. It will use this UnknownExpression +// whenever there is an expression involving both the inverted columns. If +// the final expression is a NonInvertedColExpression, it is definitely not +// worth using the inverted index. If the final expression is an +// UnknownExpression (the tree must be a single node) or a *SpanExpression, +// the optimizer could either conjure up some magic cost number or try to +// compose one using costs assigned to each span (as described in the +// previous bullet) and to each leaf-level UnknownExpression. +// +// Query evaluation +// +// There are two cases: +// - Single table expression: The optimizer will convert the *SpanExpression +// into a form that is passed to the evaluation machinery, which can recreate +// the *SpanExpression and evaluate it. The optimizer will have constructed +// the spans for the evaluation using SpanExpression.SpansToRead, so the +// expression evaluating code does not need to concern itself with the spans +// to be read. +// e.g. the query was of the form ... WHERE x <@ '{"a":1, "b":2}'::json +// The optimizer constructs a *SpanExpression, and +// - uses the serialization of the *SpanExpression as the spec for a processor +// that will evaluate the expression. +// - uses the SpanExpression.SpansToRead to specify the inverted index +// input rows that must be fed to the processor. +// - Join expression: The optimizer had an expression tree with the root as +// a *SpanExpression or an UnknownExpression. Therefore it knows that after +// partial application the expression will be a *SpanExpression. It passes the +// inverted expression with two unknowns, as a string, to the join execution +// machinery. The optimizer provides a way to do partial application for each +// input row, and returns a *SpanExpression, which is evaluated on the +// inverted index. +// e.g. the join query was of the form +// ... ON t1.x <@ t2.y OR (t1.x @> t2.y AND t2.y @> '{"a":1, "b":2}'::json) +// and the optimizer decides to use the inverted index on t2.y. The optimizer +// passes an expression string with two unknowns in the InvertedJoinerSpec, +// where @1 represents t1.x and @2 represents t2.y. For each input row of +// t1 the inverted join processor asks the optimizer to apply the value of @1 +// and return a *SpanExpression, which the join processor will evaluate on +// the inverted index. +type InvertedExpression interface { + // IsTight returns whether the inverted expression is tight, i.e., will the + // original expression not need to be reevaluated on each row output by the + // query evaluation over the inverted index. + IsTight() bool + // SetNotTight sets tight to false. + SetNotTight() +} + +// TODO(sumeer): functions to serialize/deserialize SpanExpressions. + +// SpanExpression is an implementation of InvertedExpression. +type SpanExpression struct { + // Tight mirrors the definition of IsTight(). + Tight bool + + // SpansToRead are the spans to read from the inverted index + // to evaluate this SpanExpression. These are non-overlapping + // and sorted. If left or right contains a non-SpanExpression, + // it is not included in the spanning union. + // To illustrate, consider a made up example: + // [2, 10) \intersection [6, 14) + // is factored into: + // [6, 10) \union ([2, 6) \intersection [10, 14)) + // The root expression has a spanning union of [2, 14). + SpansToRead []InvertedSpan + + // FactoredUnionSpans are the spans to be unioned. These are + // non-overlapping and sorted. As mentioned earlier, factoring + // can result in faster evaluation and can be useful for + // optimizer cost estimation. + // + // Using the same example, the FactoredUnionSpans will be + // [6, 10). Now let's extend the above example and say that + // it was just a sub-expression in a bigger expression, and + // the full expression involved an intersection of that + // sub-expression and [5, 8). After factoring, we would get + // [6, 8) \union ([5, 6) \intersection ([8, 10) \union ([2, 6) \intersection [10, 14)))) + // The top-level expression has FactoredUnionSpans [6, 8), and the left and + // right children have factoredUnionSpans [5, 6) and [8, 10) respectively. + // The SpansToRead of this top-level expression is still [2, 14) since the + // intersection with [5, 8) did not add anything to the spans to read. Also + // note that, despite factoring, there are overlapping spans in this + // expression, specifically [2, 6) and [5, 6). + + FactoredUnionSpans []InvertedSpan + + // Operator is the set operation to apply to Left and Right. + // When this is union or intersection, both Left and Right are non-nil, + // else both are nil. + Operator SetOperator + Left InvertedExpression + Right InvertedExpression +} + +var _ InvertedExpression = (*SpanExpression)(nil) + +// IsTight implements the InvertedExpression interface. +func (s *SpanExpression) IsTight() bool { + return s.Tight +} + +// SetNotTight implements the InvertedExpression interface. +func (s *SpanExpression) SetNotTight() { + s.Tight = false +} + +func (s *SpanExpression) String() string { + var b strings.Builder + s.format(&b, nil) + return b.String() +} + +// formatExpression pretty-prints the expr to b. pathSpec specifies the tree +// path representing the position of the root of expr in the larger expression +// -- the left child is represented as false and right child as true. It is +// nil for the root. +func formatExpression(b *strings.Builder, pathSpec []bool, expr InvertedExpression) { + switch e := expr.(type) { + case *SpanExpression: + e.format(b, pathSpec) + default: + indent(b, pathSpec, true /*firstLine*/) + fmt.Fprintf(b, "%v\n", e) + } +} + +// format pretty-prints a SpanExpression. See the comment for +// formatExpression. +func (s *SpanExpression) format(b *strings.Builder, pathSpec []bool) { + indent(b, pathSpec, true /*firstLine*/) + fmt.Fprintf(b, "tight: %t, toRead: ", s.Tight) + formatSpans(b, s.SpansToRead) + b.WriteString(" unionSpans: ") + formatSpans(b, s.FactoredUnionSpans) + b.WriteString("\n") + if s.Operator == None { + return + } + indent(b, pathSpec, false /*firstLine*/) + switch s.Operator { + case SetUnion: + b.WriteString("UNION\n") + case SetIntersection: + b.WriteString("INTERSECTION\n") + } + indent(b, pathSpec, false /*firstLine*/) + b.WriteString("|\n") + formatExpression(b, append(pathSpec, false), s.Left) + formatExpression(b, append(pathSpec, true), s.Right) +} + +// formatSpans pretty-prints the spans. +func formatSpans(b *strings.Builder, spans []InvertedSpan) { + if len(spans) == 0 { + b.WriteString("empty") + return + } + for i := 0; i < len(spans); i++ { + formatSpan(b, spans[i]) + if i != len(spans)-1 { + b.WriteByte(' ') + } + } +} + +func formatSpan(b *strings.Builder, span InvertedSpan) { + end := span.end + spanEndOpenOrClosed := ')' + if span.end == nil { + end = span.start + spanEndOpenOrClosed = ']' + } + fmt.Fprintf(b, "[%s, %s%c", strconv.Quote(string(span.start)), + strconv.Quote(string(end)), spanEndOpenOrClosed) +} + +// indent writes the prefix of a line for an operator positioned +// at pathSpec. +func indent(b *strings.Builder, pathSpec []bool, firstLine bool) { + // Indentation of 4. + const leftIndent = "| " + const rightIndent = " " + const firstLinePrefix = "+-- " + // Write the prefix for the ancestors. + for i := 0; i < len(pathSpec)-1; i++ { + if pathSpec[i] { + b.WriteString(rightIndent) + } else { + b.WriteString(leftIndent) + } + } + if len(pathSpec) > 0 { + // Write the prefix relative to its parent. + right := pathSpec[len(pathSpec)-1] + if firstLine { + b.WriteString(firstLinePrefix) + } else if right { + b.WriteString(rightIndent) + } else { + b.WriteString(leftIndent) + } + } +} + +// NonInvertedColExpression is an expression to use for parts of the +// user expression that do not involve the inverted index. +type NonInvertedColExpression struct{} + +var _ InvertedExpression = NonInvertedColExpression{} + +// IsTight implements the InvertedExpression interface. +func (n NonInvertedColExpression) IsTight() bool { + return false +} + +// SetNotTight implements the InvertedExpression interface. +func (n NonInvertedColExpression) SetNotTight() {} + +// ExprForInvertedSpan constructs a leaf-level SpanExpression +// for an inverted expression. Note that these leaf-level +// expressions may also have tight = false. Geospatial functions +// are all non-tight. +// +// For JSON, expressions like x <@ '{"a":1, "b":2}'::json will have +// tight = false. Say SpanA, SpanB correspond to "a":1 and "b":2 +// respectively). A tight expression would require the following set +// evaluation: +// Set(SpanA) \union Set(SpanB) - Set(ComplementSpan(SpanA \spanunion SpanB)) +// where ComplementSpan(X) is everything in the inverted index +// except for X. +// Since ComplementSpan(SpanA \spanunion SpanB) is likely to +// be very wide when SpanA and SpanB are narrow, or vice versa, +// this tight expression would be very costly to evaluate. +func ExprForInvertedSpan(span InvertedSpan, tight bool) *SpanExpression { + return &SpanExpression{ + Tight: tight, + SpansToRead: []InvertedSpan{span}, + FactoredUnionSpans: []InvertedSpan{span}, + } +} + +// And of two boolean expressions. +func And(left, right InvertedExpression) InvertedExpression { + switch l := left.(type) { + case *SpanExpression: + switch r := right.(type) { + case *SpanExpression: + return intersectSpanExpressions(l, r) + case NonInvertedColExpression: + left.SetNotTight() + return left + default: + return opSpanExpressionAndDefault(l, right, SetIntersection) + } + case NonInvertedColExpression: + right.SetNotTight() + return right + default: + switch r := right.(type) { + case *SpanExpression: + return opSpanExpressionAndDefault(r, left, SetIntersection) + case NonInvertedColExpression: + left.SetNotTight() + return left + default: + return &SpanExpression{ + Tight: left.IsTight() && right.IsTight(), + Operator: SetIntersection, + Left: left, + Right: right, + } + } + } +} + +// Or of two boolean expressions. +func Or(left, right InvertedExpression) InvertedExpression { + switch l := left.(type) { + case *SpanExpression: + switch r := right.(type) { + case *SpanExpression: + return unionSpanExpressions(l, r) + case NonInvertedColExpression: + return r + default: + return opSpanExpressionAndDefault(l, right, SetUnion) + } + case NonInvertedColExpression: + return left + default: + switch r := right.(type) { + case *SpanExpression: + return opSpanExpressionAndDefault(r, left, SetUnion) + case NonInvertedColExpression: + return right + default: + return &SpanExpression{ + Tight: left.IsTight() && right.IsTight(), + Operator: SetUnion, + Left: left, + Right: right, + } + } + } +} + +// Helper that applies op to a left-side that is a *SpanExpression and +// a right-side that is an unknown implementation of InvertedExpression. +func opSpanExpressionAndDefault( + left *SpanExpression, right InvertedExpression, op SetOperator, +) *SpanExpression { + expr := &SpanExpression{ + Tight: left.IsTight() && right.IsTight(), + // The SpansToRead is a lower-bound in this case. Note that + // such an expression is only used for Join costing. + SpansToRead: left.SpansToRead, + Operator: op, + Left: left, + Right: right, + } + if op == SetUnion { + // Promote the left-side union spans. We don't know anything + // about the right-side. + expr.FactoredUnionSpans = left.FactoredUnionSpans + left.FactoredUnionSpans = nil + } + // Else SetIntersection -- we can't factor anything if one side is + // unknown. + return expr +} + +// Intersects two SpanExpressions. +func intersectSpanExpressions(left, right *SpanExpression) *SpanExpression { + expr := &SpanExpression{ + Tight: left.Tight && right.Tight, + SpansToRead: unionSpans(left.SpansToRead, right.SpansToRead), + FactoredUnionSpans: intersectSpans(left.FactoredUnionSpans, right.FactoredUnionSpans), + Operator: SetIntersection, + Left: left, + Right: right, + } + if expr.FactoredUnionSpans != nil { + left.FactoredUnionSpans = subtractSpans(left.FactoredUnionSpans, expr.FactoredUnionSpans) + right.FactoredUnionSpans = subtractSpans(right.FactoredUnionSpans, expr.FactoredUnionSpans) + } + tryPruneChildren(expr, SetIntersection) + return expr +} + +// Unions two SpanExpressions. +func unionSpanExpressions(left, right *SpanExpression) *SpanExpression { + expr := &SpanExpression{ + Tight: left.Tight && right.Tight, + SpansToRead: unionSpans(left.SpansToRead, right.SpansToRead), + FactoredUnionSpans: unionSpans(left.FactoredUnionSpans, right.FactoredUnionSpans), + Operator: SetUnion, + Left: left, + Right: right, + } + left.FactoredUnionSpans = nil + right.FactoredUnionSpans = nil + tryPruneChildren(expr, SetUnion) + return expr +} + +// tryPruneChildren takes an expr with two child *SpanExpression and removes the empty +// children. +func tryPruneChildren(expr *SpanExpression, op SetOperator) { + isEmptyExpr := func(e *SpanExpression) bool { + return len(e.FactoredUnionSpans) == 0 && e.Left == nil && e.Right == nil + } + if isEmptyExpr(expr.Left.(*SpanExpression)) { + expr.Left = nil + } + if isEmptyExpr(expr.Right.(*SpanExpression)) { + expr.Right = nil + } + // Promotes the left and right sub-expressions of child to the parent expr, when + // the other child is empty. + promoteChild := func(child *SpanExpression) { + // For SetUnion, the FactoredUnionSpans for the child is already nil + // since it has been unioned into expr. For SetIntersection, the + // FactoredUnionSpans for the child may be non-empty, but is being + // intersected with the other child that is empty, so can be discarded. + // Either way, we don't need to update expr.FactoredUnionSpans. + expr.Operator = child.Operator + expr.Left = child.Left + expr.Right = child.Right + } + promoteLeft := expr.Left != nil && expr.Right == nil + promoteRight := expr.Left == nil && expr.Right != nil + if promoteLeft { + promoteChild(expr.Left.(*SpanExpression)) + } + if promoteRight { + promoteChild(expr.Right.(*SpanExpression)) + } + if expr.Left == nil && expr.Right == nil { + expr.Operator = None + } +} + +func unionSpans(left []InvertedSpan, right []InvertedSpan) []InvertedSpan { + if len(left) == 0 { + return right + } + if len(right) == 0 { + return left + } + // Both left and right are non-empty. + + // The output spans. + var spans []InvertedSpan + // Contains the current span being merged into. + var mergeSpan InvertedSpan + // Indexes into left and right. + var i, j int + + swapLeftRight := func() { + i, j = j, i + left, right = right, left + } + + // makeMergeSpan is used to initialize mergeSpan. It uses the span from + // left or right that has an earlier start. Additionally, it swaps left + // and right if the mergeSpan was initialized using right, so tha mergeSpan + // is coming from the left. + // REQUIRES: i < len(left) || j < len(right). + makeMergeSpan := func() { + if i >= len(left) || (j < len(right) && bytes.Compare(left[i].start, right[j].start) > 0) { + swapLeftRight() + } + mergeSpan = left[i] + i++ + } + makeMergeSpan() + // We only need to merge spans into mergeSpan while we have more + // spans from the right. Once the right is exhausted we know that + // the remaining spans from the left (including mergeSpan) can be + // appended to the output unchanged. + for j < len(right) { + cmpEndStart := cmpExcEndWithIncStart(mergeSpan, right[j]) + if cmpEndStart >= 0 { + if extendSpanEnd(&mergeSpan, right[j], cmpEndStart) { + // The right side extended the span, so now it plays the + // role of the left. + j++ + swapLeftRight() + } else { + j++ + } + continue + } + // Cannot extend mergeSpan. + spans = append(spans, mergeSpan) + makeMergeSpan() + } + spans = append(spans, mergeSpan) + spans = append(spans, left[i:]...) + return spans +} + +func intersectSpans(left []InvertedSpan, right []InvertedSpan) []InvertedSpan { + if len(left) == 0 || len(right) == 0 { + return nil + } + + // Both left and right are non-empty + + // The output spans. + var spans []InvertedSpan + // Indexes into left and right. + var i, j int + // Contains the current span being intersected. + var mergeSpan InvertedSpan + var mergeSpanInitialized bool + swapLeftRight := func() { + i, j = j, i + left, right = right, left + } + // Initializes mergeSpan. Additionally, arranges it such that the span has + // come from left. i continues to refer to the index used to initialize + // mergeSpan. + // REQUIRES: i < len(left) && j < len(right) + makeMergeSpan := func() { + if bytes.Compare(left[i].start, right[j].start) > 0 { + swapLeftRight() + } + mergeSpan = left[i] + mergeSpanInitialized = true + } + + for i < len(left) && j < len(right) { + if !mergeSpanInitialized { + makeMergeSpan() + } + cmpEndStart := cmpExcEndWithIncStart(mergeSpan, right[j]) + if cmpEndStart > 0 { + // The intersection of these spans is non-empty. Cases: + // - mergeSpan.end != nil + // - mergeSpan.end == nil and right[j].start == currSpan.start + mergeSpan.start = right[j].start + mergeSpanEnd := mergeSpan.end + cmpEnds := cmpEndsWhenEqualStarts(mergeSpan, right[j]) + if cmpEnds > 0 { + // The right span constrains the end of the intersection. + // Note that this is only possible if mergeSpan.end != nil. + // It is possible that right[j].end == nil. + mergeSpan.end = right[j].end + } + // Else the mergeSpan is not constrained by the right span, + // so it is already ready to be appended to the output. + + // Append to the spans that will be output. + spans = append(spans, mergeSpan) + + // Now decide whether we should continue intersecting with what + // is left of the original mergeSpan. + if cmpEnds < 0 { + // The mergeSpan constrained the end of the intersection. + // So nothing left of the original mergeSpan. The rightSpan + // should become the new mergeSpan since it is guaranteed to + // have a start <= the next span from the left and it has + // something leftover. + // Note that in this case right[j].end != nil + i++ + mergeSpan.start = getExclusiveEnd(mergeSpan) + mergeSpan.end = right[j].end + swapLeftRight() + } else if cmpEnds == 0 { + // Both spans end at the same key, so both are consumed. + i++ + j++ + mergeSpanInitialized = false + } else { + // The right span constrained the end of the intersection. + // So there is something left of the original mergeSpan. + // Note that mergeSpanEnd != nil. + j++ + mergeSpan.start = getExclusiveEnd(mergeSpan) + mergeSpan.end = mergeSpanEnd + } + } else { + // Intersection is empty + i++ + mergeSpanInitialized = false + } + } + return spans +} + +// subtractSpans subtracts right from left, under the assumption that right is a +// subset of left. +func subtractSpans(left []InvertedSpan, right []InvertedSpan) []InvertedSpan { + if len(right) == 0 { + return left + } + // Both left and right are non-empty + + // The output spans. + var out []InvertedSpan + + // Contains the current span being subtracted. + var mergeSpan InvertedSpan + var mergeSpanInitialized bool + // Indexes into left and right. + var i, j int + for j < len(right) { + if !mergeSpanInitialized { + mergeSpan = left[i] + mergeSpanInitialized = true + } + cmpEndStart := cmpExcEndWithIncStart(mergeSpan, right[j]) + if cmpEndStart > 0 { + // mergeSpan will have some part subtracted by the right span. Cases: + // - mergeSpan.end != nil + // - mergeSpan.end == nil and right[j].start == mergeSpan.start and + // right[j].end == mergeSpan.end. + if mergeSpan.end == nil { + i++ + j++ + mergeSpanInitialized = false + continue + } + cmpStart := bytes.Compare(mergeSpan.start, right[j].start) + if cmpStart < 0 { + // There is some part of mergeSpan before the right span starts. Add it + // to the output. + out = append(out, InvertedSpan{start: mergeSpan.start, end: right[j].start}) + mergeSpan.start = right[j].start + } + // Else cmpStart == 0 + + // Invariant: mergeSpan.start == right[j].start + cmpEnd := cmpEndsWhenEqualStarts(mergeSpan, right[j]) + if cmpEnd == 0 { + // Both spans end at the same key, so both are consumed. + i++ + j++ + mergeSpanInitialized = false + continue + } + + // Invariant: cmp > 0 + mergeSpan.start = getExclusiveEnd(right[j]) + j++ + } else { + // Right span starts after mergeSpan ends. + out = append(out, mergeSpan) + i++ + mergeSpanInitialized = false + } + } + if mergeSpanInitialized { + out = append(out, mergeSpan) + i++ + } + out = append(out, left[i:]...) + return out +} + +// Compares the exclusive end key of left with the inclusive start key of +// right. +// Examples: +// [a, b), [b, c) == 0 +// [a, a], [a, c) == +1 +// [a, a\x00), [a, c) == +1 +// [a, c), [d, e) == -1 +func cmpExcEndWithIncStart(left, right InvertedSpan) int { + if left.end == nil { + rightLen := len(right.start) + if rightLen == len(left.start)+1 && right.start[rightLen-1] == '\x00' { + // The exclusive end of left is left.start + '\x00' and right.start ends + // with '\x00'. So if we compare the two starts without the last + // character of right.start we get the real comparison between the + // exclusive end of left and right.start. + return bytes.Compare(left.start, right.start[:rightLen-1]) + } + cmp := bytes.Compare(left.start, right.start) + if cmp == 0 { + // left.start and right.start are equal. So the exclusive end of left + // is greater than right.start. + cmp = +1 + } + return cmp + } + return bytes.Compare(left.end, right.start) +} + +// Extends the left span using the right span. Will return true if +// left was extended, i.e., the end of left < end of right, and +// false if end of left > end of right. For equality, will typically +// return false, but can return true (which is harmless). +func extendSpanEnd(left *InvertedSpan, right InvertedSpan, cmpExcEndIncStart int) bool { + if cmpExcEndIncStart == 0 { + // Definitely extends. + if right.end == nil { + left.end = roachpb.BytesNext(right.start) + } else { + left.end = right.end + } + return true + } + // cmpExcEndIncStart > 0, so left covers at least right.start. But may not + // cover right.end. + if right.end == nil { + // right is [right.start, right.start], so no extension. + return false + } + // right.end != nil + if left.end == nil { + // left.start == right.start, and left is [left.start, left.start]. + // It is possible that right is [left.start, BytesNext(left.start)), + // in which case this is not really an extension, but that is harmless. + left.end = right.end + return true + } + // Both ends are non-nil. Simply compare them. + if bytes.Compare(left.end, right.end) < 0 { + left.end = right.end + return true + } + return false +} + +// Returns the exclusive end key of the span. +// Examples: +// [a, c) returns c +// [a, a] returns a\x00 +func getExclusiveEnd(s InvertedSpan) EncInvertedVal { + if s.end == nil { + return roachpb.BytesNext(s.start) + } + return s.end +} + +// Compares the end keys of left and right given that the start +// keys are the same. +func cmpEndsWhenEqualStarts(left, right InvertedSpan) int { + if left.end == nil && right.end == nil { + return 0 + } + cmpMultiplier := +1 + if left.end == nil { + cmpMultiplier = -1 + left, right = right, left + } + if right.end == nil { + // left.end = "c\x00", right = [c, c]. We need to return 0 for + // this case. + leftLen := len(left.end) + if leftLen == len(right.start)+1 && left.end[leftLen-1] == '\x00' { + return cmpMultiplier * bytes.Compare(left.end[:leftLen-1], right.start) + } + return cmpMultiplier + } + return cmpMultiplier * bytes.Compare(left.end, right.end) +} + +// Representing multi-column constraints +// +// Building multi-column constraints is complicated even for the regular +// index case (see idxconstraint and constraints packages). Because the +// constraints code is not generating a full expression and it can immediately +// evaluate intersections, it takes an approach of traversing the expression +// at monotonically increasing column offsets (e.g. makeSpansForAnd() and the +// offset+delta logic). This allows it to build up Key constraints in increasing +// order of the index column (say labeled @1, @2, ...), instead of needing to +// handle an arbitrary order, and then combine them using Constraint.Combine(). +// This repeated traversal at different offsets is a simplification and can +// result in spans that are wider than optimal. +// +// Example 1: +// index-constraints vars=(int, int, int) index=(@1 not null, @2 not null, @3 not null) +// ((@1 = 1 AND @3 = 5) OR (@1 = 3 AND @3 = 10)) AND (@2 = 76) +// ---- +// [/1/76/5 - /1/76/5] +// [/1/76/10 - /1/76/10] +// [/3/76/5 - /3/76/5] +// [/3/76/10 - /3/76/10] +// Remaining filter: ((@1 = 1) AND (@3 = 5)) OR ((@1 = 3) AND (@3 = 10)) +// +// Note that in example 1 we produce the spans with the single key /1/76/10 +// and /3/76/5 which are not possible -- this is because the application of +// the @3 constraint happened at the higher level after the @2 constraint had +// been applied, and at that higher level the @3 constraint was now the set +// {5, 10}, so it needed to be applied to both the /1/76 and /3/76 span. +// +// In contrast example 2 is able to apply the @2 constraint inside each of the +// sub-expressions and results in a tight span. +// +// Example 2: +// index-constraints vars=(int, int, int) index=(@1 not null, @2 not null, @3 not null) +// ((@1 = 1 AND @2 = 5) OR (@1 = 3 AND @2 = 10)) AND (@3 = 76) +// ---- +// [/1/5/76 - /1/5/76] +// [/3/10/76 - /3/10/76] +// +// We note that: +// - Working with spans of only the inverted column is much easier for factoring. +// - It is not yet clear how important multi-column constraints are for inverted +// index performance. +// - We cannot adopt the approach of traversing at monotonically increasing +// column offsets since we are trying to build an expression. We want to +// traverse once, to build up the expression tree. One possibility would be +// to incrementally build the expression tree with the caller traversing once +// but additionally keep track of the span constraints for each PK column at +// each node in the already build expression tree. To illustrate, consider +// an example 1' akin to example 1 where @1 is an inverted column: +// ((f(@1, 1) AND @3 = 5) OR (f(@1, 3) AND @3 = 10)) AND (@2 = 76) +// and the functions f(@1, 1) and f(@1, 3) each give a single value for the +// inverted column (this could be something like f @> '{"a":1}'::json). +// Say we already have the expression tree built for: +// ((f(@1, 1) AND @3 = 5) OR (f(@1, 3) AND @3 = 10)) +// When the constraint for (@2 = 76) is anded we traverse this built tree +// and add this constraint to each node. Note that we are delaying building +// something akin to a constraint.Key since we are encountering the constraints +// in arbitrary column order. Then after the full expression tree is built, +// one traverses and builds the inverted spans and primary key spans (latter +// could reuse constraint.Span for each node). +// - The previous bullet is doable but complicated, and especially increases the +// complexity of factoring spans when unioning and intersecting while building +// up sub-expressions. One needs to either factor taking into account the +// current per-column PK constraints or delay it until the end (I gave up +// half-way through writing the code, as it doesn't seem worth the complexity). +// +// In the following we adopt a much simpler approach. The caller generates the +// the inverted index expression and the PK spans separately. +// +// - Generating the inverted index expression: The caller does a single +// traversal and calls the methods in this package. For every +// leaf-sub-expression on the non-inverted columns it uses a marker +// NonInvertedColExpression. Anding a NonInvertedColExpression results in a +// non-tight inverted expression and Oring a NonInvertedColExpression +// results in discarding the inverted expression built so far. This package +// does factoring for ands and ors involving inverted expressions +// incrementally, and this factoring is straightforward since it involves a +// single column. +// - Generating the PK spans (optional): The caller can use something like +// idxconstraint, pretending that the PK columns of the inverted index +// are the index columns. Every leaf inverted sub-expression is replaced +// with true. This is because when not representing the inverted column +// constraint we need the weakest possible constraint on the PK columns. +// Using example 1' again, +// ((f(@1, 1) AND @3 = 5) OR (f(@1, 3) AND @3 = 10)) AND (@2 = 76) +// when generating the PK constraints we would use +// (@3 = 5 OR @3 = 10) AND (@2 = 76) +// So the PK spans will be: +// [/76/5, /76/5], [/76/10, /76/10] +// - The spans in the inverted index expression can be composed with the +// spans of the PK columns to narrow wherever possible. +// Continuing with example 1', the inverted index expression will be +// v11 \union v13, corresponding to f(@1, 1) and f(@1, 3), where each +// of v11 and v13 are single value spans. And this expression is not tight +// (because of the anding with NonInvertedColExpression). +// The PK spans, [/76/5, /76/5], [/76/10, /76/10], are also single key spans. +// This is a favorable example in that we can compose all these singleton +// spans to get single inverted index rows: +// /v11/76/5, /v11/76/10, /v13/76/5, /v13/76/10 +// (this is also a contrived example since with such narrow constraints +// on the PK, we would possibly not use the inverted index). +// +// If one constructs example 2' (derived from example 2 in the same way +// we derived example 1'), we would have +// ((f(@1, 1) AND @2 = 5) OR (f(@1, 3) AND @2 = 10)) AND (@3 = 76) +// and the inverted index expression would be: +// v11 \union v13 +// and the PK spans: +// [/5/76, /5/76], [/10/76, /10/76] +// And so the inverted index rows would be: +// /v11/5/76, /v11/10/76, /v13/5/76, /v13/10/76 +// This is worse than example 2 (and resembles example 1 and 1') since +// we are taking the cross-product. +// +// TODO(sumeer): write this composition code. diff --git a/pkg/sql/opt/invertedexpr/expression_test.go b/pkg/sql/opt/invertedexpr/expression_test.go new file mode 100644 index 000000000000..f34cba82d33b --- /dev/null +++ b/pkg/sql/opt/invertedexpr/expression_test.go @@ -0,0 +1,299 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package invertedexpr + +import ( + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/datadriven" + "github.com/stretchr/testify/require" +) + +/* +Format for the datadriven test: + +new-span-leaf name= tight= span=[,] +---- + + + Creates a new leaf spanExpression with the given name + +new-unknown-leaf name= tight= +---- + + Creates a new leaf unknownExpression with the given name + +new-non-inverted-leaf name= +---- + + Creates a new NonInvertedColExpression with the given name + +and result= left= right= +---- + + + Ands the left and right expressions and stores the result + +or result= left= right= +---- + + + Ors the left and right expressions and stores the result +*/ + +func getSpan(t *testing.T, d *datadriven.TestData) InvertedSpan { + var str string + d.ScanArgs(t, "span", &str) + parts := strings.Split(str, ",") + span := InvertedSpan{start: []byte(parts[0])} + if len(parts) > 2 { + d.Fatalf(t, "incorrect span format: %s", str) + } else if len(parts) == 2 { + span.end = []byte(parts[1]) + } + return span +} + +type UnknownExpression struct { + tight bool +} + +func (u UnknownExpression) IsTight() bool { return u.tight } +func (u UnknownExpression) SetNotTight() { u.tight = false } +func (u UnknownExpression) String() string { + return fmt.Sprintf("UnknownExpression: tight=%t", u.tight) +} + +func getExprCopy( + t *testing.T, d *datadriven.TestData, name string, exprsByName map[string]InvertedExpression, +) InvertedExpression { + expr := exprsByName[name] + if expr == nil { + d.Fatalf(t, "unknown expr: %s", name) + } + switch e := expr.(type) { + case *SpanExpression: + return &SpanExpression{ + Tight: e.Tight, + SpansToRead: append([]InvertedSpan(nil), e.SpansToRead...), + FactoredUnionSpans: append([]InvertedSpan(nil), e.FactoredUnionSpans...), + Operator: e.Operator, + Left: e.Left, + Right: e.Right, + } + case NonInvertedColExpression: + return NonInvertedColExpression{} + case UnknownExpression: + return UnknownExpression{tight: e.tight} + default: + d.Fatalf(t, "unknown expr type") + return nil + } +} + +func toString(expr InvertedExpression) string { + var b strings.Builder + formatExpression(&b, nil, expr) + return b.String() +} + +func getLeftAndRightExpr( + t *testing.T, d *datadriven.TestData, exprsByName map[string]InvertedExpression, +) (InvertedExpression, InvertedExpression) { + var leftName, rightName string + d.ScanArgs(t, "left", &leftName) + d.ScanArgs(t, "right", &rightName) + return getExprCopy(t, d, leftName, exprsByName), getExprCopy(t, d, rightName, exprsByName) +} + +func TestExpression(t *testing.T) { + defer leaktest.AfterTest(t)() + exprsByName := make(map[string]InvertedExpression) + + datadriven.RunTest(t, "testdata/expression", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "new-span-leaf": + var name string + d.ScanArgs(t, "name", &name) + var tight bool + d.ScanArgs(t, "tight", &tight) + span := getSpan(t, d) + expr := ExprForInvertedSpan(span, tight) + exprsByName[name] = expr + return expr.String() + case "new-unknown-leaf": + var name string + d.ScanArgs(t, "name", &name) + var tight bool + d.ScanArgs(t, "tight", &tight) + expr := UnknownExpression{tight: tight} + exprsByName[name] = expr + return fmt.Sprintf("%v", expr) + case "new-non-inverted-leaf": + var name string + d.ScanArgs(t, "name", &name) + exprsByName[name] = NonInvertedColExpression{} + return "" + case "and": + var name string + d.ScanArgs(t, "result", &name) + left, right := getLeftAndRightExpr(t, d, exprsByName) + expr := And(left, right) + exprsByName[name] = expr + return toString(expr) + case "or": + var name string + d.ScanArgs(t, "result", &name) + left, right := getLeftAndRightExpr(t, d, exprsByName) + expr := Or(left, right) + exprsByName[name] = expr + return toString(expr) + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func span(start, end string) InvertedSpan { + return InvertedSpan{start: []byte(start), end: []byte(end)} +} + +func single(start string) InvertedSpan { + return InvertedSpan{start: []byte(start)} +} + +func checkEqual(t *testing.T, expected, actual []InvertedSpan) { + require.Equal(t, len(expected), len(actual)) + for i := range expected { + require.Equal(t, expected[i].start, actual[i].start) + require.Equal(t, expected[i].end, actual[i].end) + } +} + +func TestSetUnion(t *testing.T) { + checkEqual(t, + []InvertedSpan{span("b", "b\x00")}, + unionSpans( + []InvertedSpan{single("b")}, + []InvertedSpan{span("b", "b\x00")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("b", "b\x00\x00")}, + unionSpans( + []InvertedSpan{single("b")}, + []InvertedSpan{single("b\x00")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("b", "c")}, + unionSpans( + []InvertedSpan{single("b")}, + []InvertedSpan{span("b\x00", "c")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("b", "c")}, + unionSpans( + []InvertedSpan{span("b", "b\x00")}, + []InvertedSpan{span("b", "c")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("b", "c"), single("d\x00")}, + unionSpans( + []InvertedSpan{span("b", "c")}, + []InvertedSpan{single("d\x00")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("b", "c"), single("d"), single("e")}, + unionSpans( + []InvertedSpan{span("b", "c"), single("e")}, + []InvertedSpan{single("d")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("b", "d"), single("e")}, + unionSpans( + []InvertedSpan{span("b", "c"), single("e")}, + []InvertedSpan{span("c", "d")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("b", "f")}, + unionSpans( + []InvertedSpan{span("b", "c"), single("e")}, + []InvertedSpan{span("c", "f")}, + ), + ) +} + +func TestSetIntersection(t *testing.T) { + checkEqual(t, + []InvertedSpan{single("b")}, + intersectSpans( + []InvertedSpan{single("b")}, + []InvertedSpan{span("b", "b\x00")}, + ), + ) + checkEqual(t, + nil, + intersectSpans( + []InvertedSpan{single("b")}, + []InvertedSpan{span("b\x00", "c")}, + ), + ) + checkEqual(t, + []InvertedSpan{single("b"), span("d", "d\x00"), span("dd", "e"), span("f", "ff")}, + intersectSpans( + []InvertedSpan{single("b"), span("d", "e"), span("f", "g")}, + []InvertedSpan{span("b", "d\x00"), span("dd", "ff")}, + ), + ) +} + +func TestSetSubtraction(t *testing.T) { + checkEqual(t, + nil, + subtractSpans( + []InvertedSpan{single("b")}, + []InvertedSpan{span("b", "b\x00")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("b\x00", "d")}, + subtractSpans( + []InvertedSpan{span("b", "d")}, + []InvertedSpan{span("b", "b\x00")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("b", "d"), span("e", "ea")}, + subtractSpans( + []InvertedSpan{span("b", "d"), span("e", "f")}, + []InvertedSpan{span("ea", "f")}, + ), + ) + checkEqual(t, + []InvertedSpan{span("d", "da"), span("da\x00", "dc"), + span("dd", "df"), span("fa", "g")}, + subtractSpans( + []InvertedSpan{single("b"), span("d", "e"), span("f", "g")}, + []InvertedSpan{span("b", "b\x00"), single("da"), + span("dc", "dd"), span("df", "e"), span("f", "fa")}, + ), + ) + +} diff --git a/pkg/sql/opt/invertedexpr/testdata/expression b/pkg/sql/opt/invertedexpr/testdata/expression new file mode 100644 index 000000000000..0bb4e251798e --- /dev/null +++ b/pkg/sql/opt/invertedexpr/testdata/expression @@ -0,0 +1,269 @@ +new-span-leaf name=b tight=true span=b +---- +tight: true, toRead: ["b", "b"] unionSpans: ["b", "b"] + +new-unknown-leaf name=u-tight tight=true +---- +UnknownExpression: tight=true + +new-unknown-leaf name=u-not-tight tight=false +---- +UnknownExpression: tight=false + +# ----------------------------------------------------- +# Tests involving UnknownExpression. +# ----------------------------------------------------- + +# Or of tight [b, b] with tight UnknownExpression. They +# become the left and right child and the result is tight. +or result=bt left=b right=u-tight +---- +tight: true, toRead: ["b", "b"] unionSpans: ["b", "b"] +UNION +| ++-- tight: true, toRead: ["b", "b"] unionSpans: empty ++-- UnknownExpression: tight=true + +# Same as previous with left and right reversed in the +# call to Or. +or result=bt left=u-tight right=b +---- +tight: true, toRead: ["b", "b"] unionSpans: ["b", "b"] +UNION +| ++-- tight: true, toRead: ["b", "b"] unionSpans: empty ++-- UnknownExpression: tight=true + +# Or of tight [b, b] with non-tight UnknownExpression. +# Unlike bt, the result here is not tight. +or result=bnt left=b right=u-not-tight +---- +tight: false, toRead: ["b", "b"] unionSpans: ["b", "b"] +UNION +| ++-- tight: true, toRead: ["b", "b"] unionSpans: empty ++-- UnknownExpression: tight=false + +# And of tight [b, b] with tight UnknownExpression. +# No factoring is possible. +and result=b-and-unknown left=b right=u-tight +---- +tight: true, toRead: ["b", "b"] unionSpans: empty +INTERSECTION +| ++-- tight: true, toRead: ["b", "b"] unionSpans: ["b", "b"] ++-- UnknownExpression: tight=true + +# Similar and as previous but with non-tight UnknownExpression. +# Only output difference is that the result is not tight. +and result=b-and-unknown left=b right=u-not-tight +---- +tight: false, toRead: ["b", "b"] unionSpans: empty +INTERSECTION +| ++-- tight: true, toRead: ["b", "b"] unionSpans: ["b", "b"] ++-- UnknownExpression: tight=false + +# And of bt and bnt. Factoring is possible. The result is +# not tight. +and result=bt-and-bnt left=bt right=bnt +---- +tight: false, toRead: ["b", "b"] unionSpans: ["b", "b"] +INTERSECTION +| ++-- tight: true, toRead: ["b", "b"] unionSpans: empty +| UNION +| | +| +-- tight: true, toRead: ["b", "b"] unionSpans: empty +| +-- UnknownExpression: tight=true ++-- tight: false, toRead: ["b", "b"] unionSpans: empty + UNION + | + +-- tight: true, toRead: ["b", "b"] unionSpans: empty + +-- UnknownExpression: tight=false + +# Or of bt and bnt. Similar to And in toRead and unionSpans. +or result=bt-or-bnt left=bnt right=bt +---- +tight: false, toRead: ["b", "b"] unionSpans: ["b", "b"] +UNION +| ++-- tight: false, toRead: ["b", "b"] unionSpans: empty +| UNION +| | +| +-- tight: true, toRead: ["b", "b"] unionSpans: empty +| +-- UnknownExpression: tight=false ++-- tight: true, toRead: ["b", "b"] unionSpans: empty + UNION + | + +-- tight: true, toRead: ["b", "b"] unionSpans: empty + +-- UnknownExpression: tight=true + +# ----------------------------------------------------- +# Tests involving NonInvertedColExpression. +# ----------------------------------------------------- + +new-non-inverted-leaf name=niexpr +---- + +# And with a NonInvertedColExpression makes the result +# not tight. +and result=bt-and-niexpr left=bt right=niexpr +---- +tight: false, toRead: ["b", "b"] unionSpans: ["b", "b"] +UNION +| ++-- tight: true, toRead: ["b", "b"] unionSpans: empty ++-- UnknownExpression: tight=true + +# Or with a NonInvertedColExpression results in a +# NonInvertedColExpression. +or result=bt-or-niexpr left=niexpr right=bt +---- +{} + +# ----------------------------------------------------- +# Tests involving only SpanExpressions. +# ----------------------------------------------------- + +# Trivial union with self. +or result=b-or-b left=b right=b +---- +tight: true, toRead: ["b", "b"] unionSpans: ["b", "b"] + +# Trivial intersection with self. +and result=b-and-b left=b right=b +---- +tight: true, toRead: ["b", "b"] unionSpans: ["b", "b"] + +new-span-leaf name=b-not-tight tight=false span=b +---- +tight: false, toRead: ["b", "b"] unionSpans: ["b", "b"] + +# Trivial union with tight and non-tight. +or result=_ left=b right=b-not-tight +---- +tight: false, toRead: ["b", "b"] unionSpans: ["b", "b"] + +# Trivial intersection with tight and non-tight. +and result=_ left=b-not-tight right=b +---- +tight: false, toRead: ["b", "b"] unionSpans: ["b", "b"] + +new-span-leaf name=ac tight=true span=a,c +---- +tight: true, toRead: ["a", "c") unionSpans: ["a", "c") + +# [b, b] or [a, c) = [a, c) +or result=_ left=b right=ac +---- +tight: true, toRead: ["a", "c") unionSpans: ["a", "c") + +# [b, b] and [a, c) = [b, b] +and result=_ left=b right=ac +---- +tight: true, toRead: ["a", "c") unionSpans: ["b", "b"] + +new-span-leaf name=bj tight=true span=b,j +---- +tight: true, toRead: ["b", "j") unionSpans: ["b", "j") + +# [b, b] or [b, j) = [b, j) +or result=_ left=bj right=b +---- +tight: true, toRead: ["b", "j") unionSpans: ["b", "j") + +# [b, b] or [b, j) = [b, b] +and result=_ left=b right=bj +---- +tight: true, toRead: ["b", "j") unionSpans: ["b", "b"] + +# [b, j) or [a, c) = [a, j) +or result=aj left=bj right=ac +---- +tight: true, toRead: ["a", "j") unionSpans: ["a", "j") + +# [b, j) and [a, c) +and result=bj-and-ac left=bj right=ac +---- +tight: true, toRead: ["a", "j") unionSpans: ["b", "c") +INTERSECTION +| ++-- tight: true, toRead: ["b", "j") unionSpans: ["c", "j") ++-- tight: true, toRead: ["a", "c") unionSpans: ["a", "b") + +# And of these expressions promotes the factored span [b, c) +and result=foo left=aj right=bj-and-ac +---- +tight: true, toRead: ["a", "j") unionSpans: ["b", "c") +INTERSECTION +| ++-- tight: true, toRead: ["a", "j") unionSpans: ["a", "b") ["c", "j") ++-- tight: true, toRead: ["a", "j") unionSpans: empty + INTERSECTION + | + +-- tight: true, toRead: ["b", "j") unionSpans: ["c", "j") + +-- tight: true, toRead: ["a", "c") unionSpans: ["a", "b") + +# Same parameters reversed +and result=foo left=bj-and-ac right=aj +---- +tight: true, toRead: ["a", "j") unionSpans: ["b", "c") +INTERSECTION +| ++-- tight: true, toRead: ["a", "j") unionSpans: empty +| INTERSECTION +| | +| +-- tight: true, toRead: ["b", "j") unionSpans: ["c", "j") +| +-- tight: true, toRead: ["a", "c") unionSpans: ["a", "b") ++-- tight: true, toRead: ["a", "j") unionSpans: ["a", "b") ["c", "j") + +# Or of these expressions causes the children of bj-and-ac to be +# promoted. +or result=bar left=aj right=bj-and-ac +---- +tight: true, toRead: ["a", "j") unionSpans: ["a", "j") +INTERSECTION +| ++-- tight: true, toRead: ["b", "j") unionSpans: ["c", "j") ++-- tight: true, toRead: ["a", "c") unionSpans: ["a", "b") + +and result=_ left=foo right=bar +---- +tight: true, toRead: ["a", "j") unionSpans: ["b", "c") +INTERSECTION +| ++-- tight: true, toRead: ["a", "j") unionSpans: empty +| INTERSECTION +| | +| +-- tight: true, toRead: ["a", "j") unionSpans: empty +| | INTERSECTION +| | | +| | +-- tight: true, toRead: ["b", "j") unionSpans: ["c", "j") +| | +-- tight: true, toRead: ["a", "c") unionSpans: ["a", "b") +| +-- tight: true, toRead: ["a", "j") unionSpans: ["a", "b") ["c", "j") ++-- tight: true, toRead: ["a", "j") unionSpans: ["a", "b") ["c", "j") + INTERSECTION + | + +-- tight: true, toRead: ["b", "j") unionSpans: ["c", "j") + +-- tight: true, toRead: ["a", "c") unionSpans: ["a", "b") + +or result=_ left=foo right=bar +---- +tight: true, toRead: ["a", "j") unionSpans: ["a", "j") +UNION +| ++-- tight: true, toRead: ["a", "j") unionSpans: empty +| INTERSECTION +| | +| +-- tight: true, toRead: ["a", "j") unionSpans: empty +| | INTERSECTION +| | | +| | +-- tight: true, toRead: ["b", "j") unionSpans: ["c", "j") +| | +-- tight: true, toRead: ["a", "c") unionSpans: ["a", "b") +| +-- tight: true, toRead: ["a", "j") unionSpans: ["a", "b") ["c", "j") ++-- tight: true, toRead: ["a", "j") unionSpans: empty + INTERSECTION + | + +-- tight: true, toRead: ["b", "j") unionSpans: ["c", "j") + +-- tight: true, toRead: ["a", "c") unionSpans: ["a", "b")