From cfd4e375b05ea1aa1c9f634cca3074a70dc5344f Mon Sep 17 00:00:00 2001 From: Marcel van Lohuizen Date: Tue, 20 Feb 2024 17:35:26 +0100 Subject: [PATCH] internal/core/adt: first implementation of disjunctions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an incomplete implementation. The following mechanisms are still TODO: - cross product of disjunctions - inheriting tasks from "forked" nodeContexts (this is especially relavant for list values). - efficient dedupping of incomplete disjuncts - efficient memory management - "quick check" fast paths The main algorithm is implemented in disjunct2.go and overlay.go. See a description of the algorith in disjuncts2.go. Tests are enabled in a follow-up CL. Issue #2884 Issue #2851 Signed-off-by: Marcel van Lohuizen Change-Id: Ibaa052678572302abd380530fc891fc9495c75cc Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1190800 Unity-Result: CUE porcuepine TryBot-Result: CUEcueckoo Reviewed-by: Daniel Martí --- cue/testdata/eval/insertion.txtar | 38 +- cue/testdata/eval/v0.7.txtar | 30 +- internal/core/adt/binop.go | 7 + internal/core/adt/composite.go | 2 +- internal/core/adt/conjunct.go | 83 +++- internal/core/adt/debug.go | 25 +- internal/core/adt/disjunct.go | 28 +- internal/core/adt/disjunct2.go | 726 ++++++++++++++++++++++++++++++ internal/core/adt/eval.go | 20 +- internal/core/adt/eval_test.go | 3 +- internal/core/adt/fields.go | 52 ++- internal/core/adt/overlay.go | 460 +++++++++++++++++++ internal/core/adt/sched.go | 18 + internal/core/adt/states.go | 7 + internal/core/adt/tasks.go | 21 +- internal/core/adt/unify.go | 34 +- 16 files changed, 1490 insertions(+), 64 deletions(-) create mode 100644 internal/core/adt/disjunct2.go create mode 100644 internal/core/adt/overlay.go diff --git a/cue/testdata/eval/insertion.txtar b/cue/testdata/eval/insertion.txtar index 71c7d479e8c..7e65c47c674 100644 --- a/cue/testdata/eval/insertion.txtar +++ b/cue/testdata/eval/insertion.txtar @@ -258,16 +258,15 @@ Disjuncts: 73 y: (int){ 5 } } v: (int){ 1 } - w: (int){ 2 } x: (int){ 2 } y: (int){ 5 } + w: (int){ 2 } } } recursive: (struct){ t1: (struct){ e: (struct){ f: (int){ 1 } - g: (int){ 1 } } c: (struct){ d: (struct){ @@ -292,7 +291,6 @@ Disjuncts: 73 } g: (int){ 1 } } - g: (int){ 1 } } } embeddedFunctionalExpr: (string){ @@ -308,19 +306,39 @@ Disjuncts: 73 diff old new --- old +++ new -@@ -47,10 +47,10 @@ +@@ -47,9 +47,9 @@ v: (int){ 1 } y: (int){ 5 } } -- x: (int){ 2 } -- y: (int){ 5 } - v: (int){ 1 } ++ v: (int){ 1 } + x: (int){ 2 } + y: (int){ 5 } +- v: (int){ 1 } w: (int){ 2 } -+ x: (int){ 2 } -+ y: (int){ 5 } } } - recursive: (struct){ +@@ -57,7 +57,6 @@ + t1: (struct){ + e: (struct){ + f: (int){ 1 } +- g: (int){ 1 } + } + c: (struct){ + d: (struct){ +@@ -82,7 +81,6 @@ + } + g: (int){ 1 } + } +- g: (int){ 1 } + } + } + embeddedFunctionalExpr: (string){ +-- diff/todo/p0 -- +Dropped fields: this is a result of registering for notification after +inserting conjuncts, instead of before it. This change was made to avoid +forcing adding a level of conjunction insertion unnecessarily. To fix this +we should uncouple adding notification from insertion. +See scheduleVertexConjuncts in conjuncts.go. -- diff/todo/p3 -- Reordering. -- out/compile -- diff --git a/cue/testdata/eval/v0.7.txtar b/cue/testdata/eval/v0.7.txtar index 828e1740cff..d19d6557a92 100644 --- a/cue/testdata/eval/v0.7.txtar +++ b/cue/testdata/eval/v0.7.txtar @@ -913,7 +913,6 @@ Result: } e: (struct){ f: (int){ 1 } - g: (int){ 1 } } } } @@ -925,8 +924,8 @@ Result: a: (int){ 1 } b: (int){ 2 } } - b: (int){ 2 } a: (int){ 1 } + b: (int){ 2 } } x: (struct){ b: (struct){ @@ -1025,8 +1024,8 @@ Result: a: (int){ 1 } b: (int){ 2 } } - b: (int){ 2 } a: (int){ 1 } + b: (int){ 2 } } x: (struct){ b: (struct){ @@ -1220,10 +1219,14 @@ diff old new t0: (struct){ ok: (struct){ c: (struct){ -@@ -264,134 +248,130 @@ +@@ -260,138 +244,133 @@ } - } - } + e: (struct){ + f: (int){ 1 } +- g: (int){ 1 } +- } +- } +- } - t4: (_|_){ - // [eval] - ok: (_|_){ @@ -1352,6 +1355,9 @@ diff old new - y: (struct){ - c: (struct){ - a: (int){ 1 } ++ } ++ } ++ } + t4: (struct){ + ok: (struct){ + p0: (struct){ @@ -1360,8 +1366,8 @@ diff old new + a: (int){ 1 } + b: (int){ 2 } + } -+ b: (int){ 2 } + a: (int){ 1 } ++ b: (int){ 2 } + } + x: (struct){ + b: (struct){ @@ -1460,8 +1466,8 @@ diff old new + a: (int){ 1 } + b: (int){ 2 } + } -+ b: (int){ 2 } + a: (int){ 1 } ++ b: (int){ 2 } + } + x: (struct){ + b: (struct){ @@ -1479,7 +1485,7 @@ diff old new } } } -@@ -401,23 +381,23 @@ +@@ -401,23 +380,23 @@ d: (struct){ b: (int){ 2 } } @@ -1522,6 +1528,12 @@ diff old new check: (struct){ -- diff/explanation -- Bug fixes +-- diff/todo/p0 -- +Dropped fields: this is a result of registering for notification after +inserting conjuncts, instead of before it. This change was made to avoid +forcing adding a level of conjunction insertion unnecessarily. To fix this +we should uncouple adding notification from insertion. +See scheduleVertexConjuncts in conjuncts.go. -- diff/todo/p2 -- Reordering / positions. -- out/compile -- diff --git a/internal/core/adt/binop.go b/internal/core/adt/binop.go index 1837e7c99e4..fefcd48838d 100644 --- a/internal/core/adt/binop.go +++ b/internal/core/adt/binop.go @@ -208,6 +208,13 @@ func BinOp(c *OpContext, op Op, left, right Value) Value { n := c.newInlineVertex(nil, nil, MakeConjunct(c.Env(0), list, c.ci)) n.CompleteArcs(c) + // NOTE: if we set isData to true, whoever processes the result will + // avoid having to process the expressions again. This improves + // performance. It also change the a potential cycle error message + // to a more concrete messages as if this result was unified as is. + // TODO: uncomment this and see if we like the result. + // n.isData = true + return n } diff --git a/internal/core/adt/composite.go b/internal/core/adt/composite.go index 76761bf5509..39ea2d3e8da 100644 --- a/internal/core/adt/composite.go +++ b/internal/core/adt/composite.go @@ -249,7 +249,7 @@ type Vertex struct { func (v *Vertex) rootCloseContext() *closeContext { if v.cc == nil { v.cc = &closeContext{ - group: &ConjunctGroup{}, + group: (*ConjunctGroup)(&v.Conjuncts), parent: nil, src: v, parentConjuncts: v, diff --git a/internal/core/adt/conjunct.go b/internal/core/adt/conjunct.go index 816dce93928..ecaedcff77c 100644 --- a/internal/core/adt/conjunct.go +++ b/internal/core/adt/conjunct.go @@ -66,7 +66,7 @@ func (n *nodeContext) scheduleConjunct(c Conjunct, id CloseInfo) { id.cc = n.node.rootCloseContext() } if id.cc == cc { - panic("inconsistent state") + panic("inconsistent state: same closeContext") } var t closeNodeType if c.CloseInfo.FromDef { @@ -84,7 +84,7 @@ func (n *nodeContext) scheduleConjunct(c Conjunct, id CloseInfo) { } if id.cc.src != n.node { - panic("inconsistent state") + panic("inconsistent state: nodes differ") } default: @@ -163,8 +163,22 @@ func (n *nodeContext) scheduleConjunct(c Conjunct, id CloseInfo) { n.scheduleTask(handleListLit, env, x, id) case *DisjunctionExpr: - panic("unimplemented") - // n.addDisjunction(env, x, id) + // TODO(perf): reuse envDisjunct values so that we can also reuse the + // disjunct slice. + d := envDisjunct{ + env: env, + cloneID: id, + src: x, + expr: x, + } + for _, dv := range x.Values { + d.disjuncts = append(d.disjuncts, disjunct{ + expr: dv.Val, + isDefault: dv.Default, + mode: mode(x.HasDefaults, dv.Default), + }) + } + n.scheduleDisjunction(d) case *Comprehension: // always a partial comprehension. @@ -351,21 +365,39 @@ func (n *nodeContext) scheduleVertexConjuncts(c Conjunct, arc *Vertex, closeInfo defer dc.decDependent(n.ctx, DEFER, nil) } - if state := arc.getState(n.ctx); state != nil { - state.addNotify2(n.node, closeInfo) - } - - for i := 0; i < len(arc.Conjuncts); i++ { - c := arc.Conjuncts[i] - - // Note that we are resetting the tree here. We hereby assume that - // closedness conflicts resulting from unifying the referenced arc were - // already caught there and that we can ignore further errors here. - // c.CloseInfo = closeInfo + // TODO: need to split initialization phases to allow only initializing + // notifications without evaluating the node. This will be done later. + // + // if !n.node.nonRooted || n.node.IsDynamic { + // if state := arc.getState(n.ctx); state != nil { + // // n.completeNodeTasks() + // } + // } + + if d, ok := arc.BaseValue.(*Disjunction); ok && false { + n.scheduleConjunct(MakeConjunct(c.Env, d, closeInfo), closeInfo) + } else { + for i := 0; i < len(arc.Conjuncts); i++ { + c := arc.Conjuncts[i] + + // Note that we are resetting the tree here. We hereby assume that + // closedness conflicts resulting from unifying the referenced arc were + // already caught there and that we can ignore further errors here. + // c.CloseInfo = closeInfo + + // We can use the original, but we know it will not be used + + n.scheduleConjunct(c, closeInfo) + } - // We can use the original, but we know it will not be used + } - n.scheduleConjunct(c, closeInfo) + if !n.node.nonRooted || n.node.IsDynamic { + // TODO: Make a distinction between initializing state and just allowing + // notifications to be registered. + if state := arc.getState(n.ctx); state != nil { + state.addNotify2(n.node, closeInfo) + } } } @@ -482,7 +514,22 @@ func (n *nodeContext) insertValueConjunct(env *Environment, v Value, id CloseInf switch x := v.(type) { case *Disjunction: - n.addDisjunctionValue(env, x, id) + // TODO(perf): reuse envDisjunct values so that we can also reuse the + // disjunct slice. + d := envDisjunct{ + env: env, + cloneID: id, + src: x, + value: x, + } + for i, dv := range x.Values { + d.disjuncts = append(d.disjuncts, disjunct{ + expr: dv, + isDefault: i < x.NumDefaults, + mode: mode(x.HasDefaults, i < x.NumDefaults), + }) + } + n.scheduleDisjunction(d) case *Conjunction: for _, x := range x.Values { diff --git a/internal/core/adt/debug.go b/internal/core/adt/debug.go index 54ad8ffb37b..94ce223a70c 100644 --- a/internal/core/adt/debug.go +++ b/internal/core/adt/debug.go @@ -142,6 +142,9 @@ const ( // TASK dependencies are used to track the completion of a task. TASK + // DISJUNCT is used to mark an incomplete disjunct. + DISJUNCT + // EVAL tracks that the conjunct associated with a closeContext has been // inserted using scheduleConjunct. A closeContext may not be deleted // as long as the conjunct has not been evaluated yet. @@ -175,6 +178,8 @@ func (k depKind) String() string { return "NOTIFY" case TASK: return "TASK" + case DISJUNCT: + return "DISJUNCT" case EVAL: return "EVAL" case ROOT: @@ -470,7 +475,7 @@ func (m *mermaidContext) task(d *ccDep) string { fmt.Fprintf(vc.tasks, "subgraph %s_tasks[tasks]\n", m.vertexID(v)) } - if v != d.task.node.node { + if d.task != nil && v != d.task.node.node { panic("inconsistent task") } taskID := fmt.Sprintf("%s_%d", m.vertexID(v), d.taskID) @@ -539,14 +544,17 @@ func (m *mermaidContext) cc(cc *closeContext) { return } fallthrough - case ARC, NOTIFY: + case ARC, NOTIFY, DISJUNCT: w = global indentLevel = 1 name = m.pstr(d.dependency) case TASK: w = node - taskID := m.task(d) + taskID := "disjunct" + if d.task != nil { + taskID = m.task(d) + } name = fmt.Sprintf("%s((%d))", taskID, d.taskID) case ROOT, INIT: w = node @@ -614,7 +622,7 @@ func (m *mermaidContext) pstr(cc *closeContext) string { w.WriteString(open) w.WriteString("cc") if cc.conjunctCount > 0 { - fmt.Fprintf(w, " c:%d", cc.conjunctCount) + fmt.Fprintf(w, " c:%d: d:%d", cc.conjunctCount, cc.disjunctCount) } indentOnNewline(w, 3) w.WriteString(ptr) @@ -634,7 +642,14 @@ func (m *mermaidContext) pstr(cc *closeContext) string { w.WriteString(close) - if cc.conjunctCount > 0 { + switch { + case cc.conjunctCount == 0: + case cc.conjunctCount <= cc.disjunctCount: + // TODO: Extra checks for disjunctions? + // E.g.: cc.src is not a disjunction + default: + // If cc.conjunctCount > cc.disjunctCount. + // TODO: count the number of non-decremented DISJUNCT dependencies. fmt.Fprintf(w, ":::err") if cc.src == m.v { m.hasError = true diff --git a/internal/core/adt/disjunct.go b/internal/core/adt/disjunct.go index 5f2ca85efcb..42392d10da7 100644 --- a/internal/core/adt/disjunct.go +++ b/internal/core/adt/disjunct.go @@ -83,8 +83,16 @@ import ( // - So only need to check exact labels for vertices. type envDisjunct struct { - env *Environment - cloneID CloseInfo + env *Environment + cloneID CloseInfo + + // fields for new evaluator + + src Node + disjuncts []disjunct + + // fields for old evaluator + expr *DisjunctionExpr value *Disjunction hasDefaults bool @@ -108,13 +116,21 @@ func (n *nodeContext) addDisjunction(env *Environment, x *DisjunctionExpr, clone } } - n.disjunctions = append(n.disjunctions, - envDisjunct{env, cloneID, x, nil, numDefaults > 0, false, false}) + n.disjunctions = append(n.disjunctions, envDisjunct{ + env: env, + cloneID: cloneID, + expr: x, + hasDefaults: numDefaults > 0, + }) } func (n *nodeContext) addDisjunctionValue(env *Environment, x *Disjunction, cloneID CloseInfo) { - n.disjunctions = append(n.disjunctions, - envDisjunct{env, cloneID, nil, x, x.HasDefaults, false, false}) + n.disjunctions = append(n.disjunctions, envDisjunct{ + env: env, + cloneID: cloneID, + value: x, + hasDefaults: x.HasDefaults, + }) } diff --git a/internal/core/adt/disjunct2.go b/internal/core/adt/disjunct2.go new file mode 100644 index 00000000000..1ed0d443f05 --- /dev/null +++ b/internal/core/adt/disjunct2.go @@ -0,0 +1,726 @@ +// Copyright 2024 CUE Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package adt + +// # Overview +// +// This files contains the disjunction algorithm of the CUE evaluator. It works +// in unison with the code in overlay.go. +// +// In principle, evaluating disjunctions is a matter of unifying each disjunct +// with the non-disjunct values, eliminate those that fail and see what is left. +// In case of multiple disjunctions it is a simple cross product of disjuncts. +// The key is how to do this efficiently. +// +// # Classification of disjunction performance +// +// The key to an efficient disjunction algorithm is to minimize the impact of +// taking cross product of disjunctions. This is especially pertinent if +// disjunction expressions can be unified with themselves, as can be the case in +// recursive definitions, as this can lead to exponential time complexity. +// +// We identify the following categories of importance for performance +// optimization: +// +// - Eliminate duplicates +// - For completed disjunctions +// - For partially computed disjuncts +// - Fail early / minimize work before failure +// - Filter disjuncts before unification (TODO) +// - Based on discriminator field +// - Based on a non-destructive unification of the disjunct and +// the current value computed so far +// - During the regular destructive unification +// - Traverse arcs where failure may occur +// - Copy on write (TODO) +// +// We discuss these aspects in more detail below. +// +// # Eliminating completed duplicates +// +// Eliminating completed duplicates can be achieved by comparing them for +// equality. A disjunct can only be considered completed if all disjuncts have +// been selected and evaluated, or at any time if processing fails. +// +// The following values should be recursively considered for equality: +// +// - the value of the node, +// - the value of its arcs, +// - the key and value of the pattern constraints, and +// - the expression of the allowed fields. +// +// In some of these cases it may not be possible to detect if two nodes are +// equal. For instance, two pattern constraints with two different regular +// expressions as patterns, but that define an identical language, should be +// considered equal. In practice, however, this is hard to distinguish. +// +// In the end this is mostly a matter of performance. As we noted, the biggest +// concern is to avoid a combinatorial explosion when disjunctions are unified +// with itself. The hope is that we can at least catch these cases, either +// because they will evaluate to the same values, or because we can identify +// that the underlying expressions are the same, or both. +// +// # Eliminating partially-computed duplicates +// +// We start with some observations and issues regarding partially evaluated +// nodes. +// +// ## Issue: Closedness +// +// Two identical CUE values with identical field, values, and pattern +// constraints, may still need to be consider as different, as they may exhibit +// different closedness behavior. Consider, for instance, this example: +// +// #def: { +// {} | {c: string} // D1 +// {} | {a: string} // D2 +// } +// x: #def +// x: c: "foo" +// +// Now, consider the case of the cross product that unifies the two empty +// structs for `x`. Note that `x` already has a field `c`. After unifying the +// first disjunction with `x`, both intermediate disjuncts will have the value +// `{c: "foo"}`: +// +// {c: "foo"} & ({} | {c: string}) +// => +// {c: "foo"} | {c: "foo"} +// +// One would think that one of these disjuncts can be eliminated. Nonetheless, +// there is a difference. The second disjunct, which resulted from unifying +// `{c: "foo"}` with `{c: string}`, will remain valid. The first disjunct, +// however, will fail after it is unified and completed with the `{}` of the +// second disjunctions (D2): only at this point is it known that x was unified +// with an empty closed struct, and that field `c` needs to be rejected. +// +// One possible solution would be to fully compute the cross product of `#def` +// and use this expanded disjunction for unification, as this would mean that +// full knowledge of closedness information is available. +// +// Although this is possible in some cases and can be a useful performance +// optimization, it is not always possible to use the fully evaluated disjuncts +// in such a precomputed cross product. For instance, if a disjunction relies on +// a comprehension or a default value, it is not possible to fully evaluate the +// disjunction, as merging it with another value may change the inputs for such +// expressions later on. This means that we can only rely on partial evaluation +// in some cases. +// +// ## Issue: Outstanding tasks in partial results +// +// Some tasks may not be completed until all conjuncts are known. For cross +// products of disjunctions this may mean that such tasks cannot be completed +// until all cross products are done. For instance, it is typically not possible +// to evaluate a tasks that relies on taking a default value that may change as +// more disjuncts are added. A similar argument holds for comprehensions on +// values that may still be changed as more disjunctions come in. +// +// ## Evaluating equality of partially evaluated nodes +// +// Because unevaluated expressions may depend on results that have yet been +// computed, we cannot reliably compare the results of a Vertex to determine +// equality. We need a different strategy. +// +// The strategy we take is based on the observation that at the start of a cross +// product, the base conjunct is the same for all disjuncts. We can factor these +// inputs out and focus on the differences between the disjuncts. In other +// words, we can focus solely on the differences that manifest at the insertion +// points (or "disjunction holes") of the disjuncts. +// +// In short, two disjuncts are equal if: +// +// 1. the disjunction holes that were already processed are equal, and +// 2. they have either no outstanding tasks, or the outstanding tasks are equal +// +// Coincidentally, analyzing the differences as discussed in this section is +// very similar in nature to precomputing a disjunct and using that. The main +// difference is that we potentially have more information to prematurely +// evaluate expressions and thus to prematurely filter values. For instance, the +// mixed in value may have fixed a value that previously was not fixed. This +// means that any expression referencing this value may be evaluated early and +// can cause a disjunct to fail and be eliminated earlier. +// +// A disadvantage of this approach, however, is that it is not fully precise: it +// may not filter some disjuncts that are logically identical. There are +// strategies to further optimize this. For instance, if all remaining holes do +// not contribute to closedness, which can be determined by walking up the +// closedness parent chain, we may be able to safely filter disjuncts with equal +// results. +// +// # Invariants +// +// We use the following assumptions in the below implementation: +// +// - No more conjuncts are added to a disjunct after its processing begins. +// If a disjunction results in a value that causes more fields to be added +// later, this may not influence the result of the disjunction, i.e., those +// changes must be idempotent. +// - TODO: consider if any other assumptions are made. +// +// # Algorithm +// +// The evaluator accumulates all disjuncts of a Vertex in the nodeContext along +// with the closeContext at which each was defined. A single task is scheduled +// to process them all at once upon the first encounter of a disjunction. +// +// The algorithm is as follows: +// - Initialize the current Vertex n with the result evaluated so far as a +// list of "previous disjuncts". +// - Iterate over each disjunction +// - For each previous disjunct x +// - For each disjunct y in the current disjunctions +// - Unify +// - Discard if error, store in the list of current disjunctions if +// it differs from all other disjunctions in this list. +// - Set n to the result of the disjunction. +// +// This algorithm is recursive: if a disjunction is encountered in a disjunct, +// it is processed as part of the evaluation of that disjunct. +// + +// A disjunct is the expanded form of the disjuncts of either an Disjunction or +// a DisjunctionExpr. +// +// TODO(perf): encode ADT structures in the correct form so that we do not have to +// compute these each time. +type disjunct struct { + expr Expr + err *Bottom + + isDefault bool + mode defaultMode +} + +// disjunctHole associates a closeContext copy representing a disjunct hole with +// the underlying closeContext from which it originally was branched. +// We could include this information in the closeContext itself, but since this +// is relatively rare, we keep it separate to avoid bloating the closeContext. +type disjunctHole struct { + cc *closeContext + underlying *closeContext +} + +func (n *nodeContext) scheduleDisjunction(d envDisjunct) { + if len(n.disjunctions) == 0 { + // This processes all disjunctions in a single pass. + n.scheduleTask(handleDisjunctions, nil, nil, CloseInfo{}) + } + + // ccHole is the closeContext in which the individual disjuncts are + // scheduled. + ccHole := d.cloneID.cc + + // This counter can be decremented after either a disjunct has been + // scheduled in the clone. Note that it will not be closed in the original + // as the result will either be an error, a single disjunct, in which + // case mergeVertex will override the original value, or multiple disjuncts, + // in which case the original is set to the disjunct itself. + ccHole.incDisjunct(DISJUNCT) + + n.disjunctions = append(n.disjunctions, d) + + n.disjunctCCs = append(n.disjunctCCs, disjunctHole{ + cc: ccHole, // this value is cloned in doDisjunct. + underlying: ccHole, + }) +} + +func (n *nodeContext) processDisjunctions() *Bottom { + defer func() { + // TODO: + // Clear the buffers. + // TODO: we may want to retain history of which disjunctions were + // processed. In that case we can set a disjunction position to end + // of the list and schedule new tasks if this position equals the + // disjunction list length. + }() + + a := n.disjunctions + n.disjunctions = n.disjunctions[:0] + + // TODO(perf): single pass for quick filter on all disjunctions. + + // Initially we compute the cross product of a disjunction with the + // nodeContext as it is processed so far. + cross := []*nodeContext{n} + results := []*nodeContext{} // TODO: use n.disjuncts as buffer. + + // Slow path for processing all disjunctions. Do not use `range` in case + // evaluation adds more disjunctions. + for i := 0; i < len(a); i++ { + d := &a[i] + + mode := attemptOnly + if i == len(a)-1 { + mode = finalize + } + results = n.crossProduct(results, cross, d, mode) + + // TODO: do we unwind only at the end or also intermittently? + switch len(results) { + case 0: + // TODO: now we have disjunct counters, do we plug holes at all? + + // We add a "top" value to disable closedness checking for this + // disjunction to avoid a spurious "field not allowed" error. + // We return the errors below, which will, in turn, be reported as + // the error. + // TODO: probably no longer needed: + for i++; i < len(a); i++ { + c := MakeConjunct(d.env, top, a[i].cloneID) + n.scheduleConjunct(c, d.cloneID) + } + + // Empty intermediate result. Further processing will not result in + // any new result, so we can terminate here. + // TODO(errors): investigate remaining disjunctions for errors. + return n.collectErrors(d) + + case 1: + // TODO: consider injecting the disjuncts into the main nodeContext + // here. This would allow other values that this disjunctions + // depends on to be evaluated. However, we should investigate + // whether this might lead to a situation where the order of + // evaluating disjunctions matters. So to be safe, we do not allow + // this for now. + } + + // switch up buffers. + cross, results = results, cross[:0] + } + + switch len(cross) { + case 0: + panic("unreachable: empty disjunction already handled above") + + case 1: + d := cross[0].node + n.node.BaseValue = d + + default: + // append, rather than assign, to allow reusing the memory of + // a pre-existing slice. + n.disjuncts = append(n.disjuncts, cross...) + } + + return nil +} + +// crossProduct computes the cross product of the disjuncts of a disjunction +// with an existing set of results. +func (n *nodeContext) crossProduct(dst, cross []*nodeContext, dn *envDisjunct, mode runMode) []*nodeContext { + + for _, p := range cross { + for j, d := range dn.disjuncts { + c := MakeConjunct(dn.env, d.expr, dn.cloneID) + r, err := p.doDisjunct(c, d.mode, mode) + + if err != nil { + // TODO: store more error context + dn.disjuncts[j].err = err + continue + } + + // Unroll nested disjunctions. + switch len(r.disjuncts) { + case 0: + // r did not have a nested disjunction. + dst = appendDisjunct(n.ctx, dst, r) + + case 1: + panic("unexpected number of disjuncts") + + default: + for _, x := range r.disjuncts { + dst = appendDisjunct(n.ctx, dst, x) + } + } + } + } + return dst +} + +// collectErrors collects errors from a failed disjunctions. +func (n *nodeContext) collectErrors(dn *envDisjunct) (errs *Bottom) { + for _, d := range dn.disjuncts { + if d.err != nil { + errs = CombineErrors(dn.src.Source(), errs, d.err) + } + } + return errs +} + +func (n *nodeContext) doDisjunct(c Conjunct, m defaultMode, mode runMode) (*nodeContext, *Bottom) { + if c.CloseInfo.cc == nil { + panic("nil closeContext during init") + } + n.ctx.stats.Disjuncts++ + + oc := &overlayContext{ctx: n.ctx} + + var ccHole *closeContext + + // TODO(perf): resuse buffer, for instance by keeping a buffer handy in oc + // and then swapping it with disjunctCCs in the new nodeContext. + holes := make([]disjunctHole, 0, len(n.disjunctCCs)) + + // Clone the closeContexts of all open disjunctions and dependencies. + for _, d := range n.disjunctCCs { + // TODO: remove filled holes. + + // Note that the root is already cloned as part of cloneVertex and that + // a closeContext corresponding to a disjunction always has a parent. + // We therefore do not need to check whether x.parent is nil. + o := oc.allocCC(d.cc) + if c.CloseInfo.cc == d.underlying { + ccHole = o + } + holes = append(holes, disjunctHole{o, d.underlying}) + } + + if ccHole == nil { + panic("expected non-nil overlay closeContext") + } + + n.scheduler.blocking = n.scheduler.blocking[:0] + + d := oc.cloneRoot(n) + + v := d.node + + // Clear relevant scheduler states. + // TODO: do something more principled: just ensure that a node that has + // not all holes filled out yet is not finalized. This may require + // a special mode, or evaluating more aggressively if finalize is not given. + v.status = unprocessed + v.state.scheduler.completed &^= subFieldsProcessed + v.state.scheduler.frozen &^= subFieldsProcessed + + d.overlays = n + d.disjunctCCs = append(d.disjunctCCs, holes...) + d.defaultMode = combineDefault(m, n.defaultMode) + d.disjunct = c + c.CloseInfo.cc = ccHole + d.scheduleConjunct(c, c.CloseInfo) + ccHole.decDisjunct(n.ctx, DISJUNCT) + + oc.unlinkOverlay() + + v.unify(n.ctx, allKnown, mode) + + if err := d.getError(); err != nil && !isCyclePlaceholder(err) { + d.free() + return nil, err + } + + return d, nil +} + +func (n *nodeContext) finalizeDisjunctions() { + if len(n.disjuncts) == 0 { + return + } + + a := make([]Value, len(n.disjuncts)) + p := 0 + hasDefaults := false + for i, x := range n.disjuncts { + switch x.defaultMode { + case isDefault: + a[i] = a[p] + a[p] = x.node + p++ + hasDefaults = true + + case notDefault: + hasDefaults = true + fallthrough + case maybeDefault: + a[i] = x.node + } + } + + d := &Disjunction{ + Values: a, + NumDefaults: p, + HasDefaults: hasDefaults, + } + + v := n.node + v.BaseValue = d + + // The conjuncts will have too much information. Better have no + // information than incorrect information. + v.Arcs = nil + v.ChildErrors = nil +} + +func (n *nodeContext) getError() *Bottom { + if b, ok := n.node.BaseValue.(*Bottom); ok { + return b + } + if n.node.ChildErrors != nil { + return n.node.ChildErrors + } + if errs := n.errs; errs != nil { + return errs + } + if n.ctx.errs != nil { + return n.ctx.errs + } + return nil +} + +// appendDisjunct appends a disjunct x to a, if it is not a duplicate. +func appendDisjunct(ctx *OpContext, a []*nodeContext, x *nodeContext) []*nodeContext { + if x == nil { + return a + } + + nv := x.node.Indirect() + nx := nv.BaseValue + if nx == nil || isCyclePlaceholder(nx) { + nx = x.getValidators(finalized) + } + + // check uniqueness + // TODO: if a node is not finalized, we could check that the parent + // (overlayed) closeContexts are identical. +outer: + for _, xn := range a { + xv := xn.node.Indirect() + if xv.status != finalized || nv.status != finalized { + // Partial node + + // TODO: we could consider supporting an option here to disable + // the filter. This way, if there is a bug, users could disable + // it, trading correctness for performance. + // If enabled, we would simply "continue" here. + + for i, h := range xn.disjunctCCs { // TODO(perf): only iterate over completed + x, y := findIntersections(h.cc, x.disjunctCCs[i].cc) + if !equalPartialNode(xn.ctx, x, y) { + continue outer + } + } + if len(xn.tasks) != len(x.tasks) { + continue + } + for i, t := range xn.tasks { + s := x.tasks[i] + if s.x != t.x || s.id.cc != t.id.cc { + continue outer + } + } + vx, okx := nx.(Value) + ny := xv.BaseValue + if ny == nil || isCyclePlaceholder(ny) { + ny = x.getValidators(finalized) + } + vy, oky := ny.(Value) + if okx && oky && !Equal(ctx, vx, vy, CheckStructural) { + continue outer + + } + } else { + // Complete nodes. + if !Equal(ctx, xn.node.Indirect(), x.node.Indirect(), CheckStructural) { + continue outer + } + } + + // free vertex + if x.defaultMode == isDefault { + xn.defaultMode = isDefault + } + x.free() + return a + } + + return append(a, x) +} + +// isPartialNode reports whether a node must be evaluated as a partial node. +func isPartialNode(d *nodeContext) bool { + if d.node.status == finalized { + return true + } + // TODO: further optimizations + return false +} + +// findIntersections reports the closeContext, relative to the two given +// disjunction holds, that should be used in comparing the arc set. +// x and y MUST both be originating from the same disjunct hole. This ensures +// that the depth of the parent chain is the same and that they have the +// same underlying closeContext. +// +// Currently, we just take the parent. We should investigate if that is always +// sufficient. +// +// Tradeoffs: if we do not go up enough, the two nodes may not be equal and we +// miss the opportunity to filter. On the other hand, if we go up too far, we +// end up comparing more arcs than potentially necessary. +func findIntersections(x, y *closeContext) (cx, cy *closeContext) { + cx = x.parent + cy = y.parent + + return cx, cy +} + +func equalPartialNode(ctx *OpContext, x, y *closeContext) bool { + nx := x.src.getState(ctx) + ny := y.src.getState(ctx) + + if nx == nil && ny == nil { + // Both nodes were finalized. We can compare them directly. + return Equal(ctx, x.src, y.src, CheckStructural) + } + + // TODO: process the nodes with allKnown, attemptOnly. + + if nx == nil || ny == nil { + return false + } + + if !isEqualNodeValue(nx, ny) { + return false + } + + if len(x.Patterns) != len(y.Patterns) { + return false + } + // Assume patterns are in the same order. + for i, p := range x.Patterns { + if !Equal(ctx, p, y.Patterns[i], 0) { + return false + } + } + + if !Equal(ctx, x.Expr, y.Expr, 0) { + return false + } + + if len(x.arcs) != len(y.arcs) { + return false + } + + // TODO(perf): use merge sort +outer: + for _, a := range x.arcs { + if a.kind != ARC { + continue outer + } + for _, b := range y.arcs { + if b.kind != ARC { + continue + } + if a.key.src.Label != b.key.src.Label { + continue + } + if !equalPartialNode(ctx, a.cc, b.cc) { + return false + } + continue outer + } + return false + } + return true +} + +// isEqualNodeValue reports whether the two nodes are of the same type and have +// the same value. +// +// TODO: this could be done much more cleanly if we are more deligent in early +// evaluation. +func isEqualNodeValue(x, y *nodeContext) bool { + xk := x.kind + yk := y.kind + + // If a node is mid evaluation, the kind might not be actual if the type is + // a struct, as whether a struct is a struct kind or an embedded type is + // determined later. This is just a limitation of the current + // implementation, we should update the kind more directly so that this code + // is not necessary. + // TODO: verify that this is still necessary and if so fix it so that this + // can be removed. + if x.aStruct != nil { + xk &= StructKind + } + if y.aStruct != nil { + yk &= StructKind + } + + if xk != yk { + return false + } + if x.hasTop != y.hasTop { + return false + } + if !isEqualBaseValue(x.ctx, x.scalar, y.scalar) { + return false + } + + // Do some quick checks first. + if len(x.checks) != len(y.checks) { + return false + } + if len(x.tasks) != len(y.tasks) { + return false + } + + if !isEqualBaseValue(x.ctx, x.lowerBound, y.lowerBound) { + return false + } + if !isEqualBaseValue(x.ctx, x.upperBound, y.upperBound) { + return false + } + + // Assume that checks are added in the same order. + for i, c := range x.checks { + d := y.checks[i] + if !Equal(x.ctx, c, d, CheckStructural) { + return false + } + } + + for i, t := range x.tasks { + s := y.tasks[i] + if s.x != t.x { + return false + } + if s.id.cc != t.id.cc { + // FIXME: we should compare this too. For this to work we need to + // have access to the underlying closeContext, which we do not + // have at the moment. + // return false + } + } + + return true +} + +func isEqualBaseValue(ctx *OpContext, x, y BaseValue) bool { + if x == y { + return true + } + xv, _ := x.(Value) + yv, _ := y.(Value) + if xv == nil || yv == nil { + return false + } + + return Equal(ctx, xv, yv, CheckStructural) +} diff --git a/internal/core/adt/eval.go b/internal/core/adt/eval.go index 7d04633b897..3363b8d0f56 100644 --- a/internal/core/adt/eval.go +++ b/internal/core/adt/eval.go @@ -131,7 +131,7 @@ func (c *OpContext) evaluate(v *Vertex, r Resolver, state combinedFlags) Value { return nil } - if v.status < finalized && v.state != nil { + if v.status < finalized && v.state != nil && !c.isDevVersion() { // TODO: errors are slightly better if we always add addNotify, but // in this case it is less likely to cause a performance penalty. // See https://cuelang.org/issue/661. It may be possible to @@ -971,6 +971,10 @@ type nodeContext struct { // for source-level debuggers. node *Vertex + // overlays is set if this node is the root of a disjunct created in + // doDisjunct. It points to the direct parent nodeContext. + overlays *nodeContext + nodeContextState scheduler @@ -1006,15 +1010,28 @@ type nodeContext struct { // Disjunction handling disjunctions []envDisjunct + // disjunctCCs holds the close context that represent "holes" in which + // pending disjuncts are to be inserted for the clone represented by this + // nodeContext. Holes that are not yet filled will always need to be cloned + // when a disjunction branches in doDisjunct. + // + // Holes may accumulate as nested disjunctions get added and filled holes + // may be removed. So the list of disjunctCCs may differ from the number + // of disjunctions. + disjunctCCs []disjunctHole + // usedDefault indicates the for each of possibly multiple parent // disjunctions whether it is unified with a default disjunct or not. // This is then later used to determine whether a disjunction should // be treated as a marked disjunction. usedDefault []defaultInfo + // disjuncts holds disjuncts that evaluated to a non-bottom value. + // TODO: come up with a better name. disjuncts []*nodeContext buffer []*nodeContext disjunctErrs []*Bottom + disjunct Conjunct // snapshot holds the last value of the vertex before calling postDisjunct. snapshot Vertex @@ -1169,6 +1186,7 @@ func (c *OpContext) newNodeContext(node *Vertex) *nodeContext { vLists: n.vLists[:0], exprs: n.exprs[:0], disjunctions: n.disjunctions[:0], + disjunctCCs: n.disjunctCCs[:0], usedDefault: n.usedDefault[:0], disjunctErrs: n.disjunctErrs[:0], disjuncts: n.disjuncts[:0], diff --git a/internal/core/adt/eval_test.go b/internal/core/adt/eval_test.go index 4c0a214af27..97027c59f7f 100644 --- a/internal/core/adt/eval_test.go +++ b/internal/core/adt/eval_test.go @@ -134,8 +134,7 @@ func skipFiles(a ...*ast.File) (reason string) { func runEvalTest(t *cuetxtar.Test, version internal.EvaluatorVersion) (errorCount int) { a := t.Instance() - // TODO: use version once we implement disjunctions. - r := runtime.NewVersioned(internal.DefaultVersion) + r := runtime.NewVersioned(version) v, err := r.Build(nil, a) if err != nil { diff --git a/internal/core/adt/fields.go b/internal/core/adt/fields.go index 2041ba5a27c..c8930d6e23a 100644 --- a/internal/core/adt/fields.go +++ b/internal/core/adt/fields.go @@ -157,6 +157,10 @@ type closeContext struct { // Used to recursively insert Vertices. parent *closeContext + // overlay is used to temporarily link a closeContext to its "overlay" copy, + // as it is used in a corresponding disjunction. + overlay *closeContext + dependencies []*ccDep // For testing only. See debug.go // externalDeps lists the closeContexts associated with a root node for @@ -171,6 +175,7 @@ type closeContext struct { // against (&&). If there are more than one, these additional nodes are // linked with next. Only closed nodes with patterns are added. Arc sets are // already merged during processing. + // A child is always done. This means it cannot be modified. child *closeContext // next holds a linked list of nodes to process. @@ -182,6 +187,12 @@ type closeContext struct { // be an additional increment at the start before any processing is done. conjunctCount int + // disjunctCount counts the number of disjunctions that contribute to + // conjunctCount. When a node is unfinished, for instance due to an error, + // we allow disjunctions to not be decremented. This count is then used + // to suppress errors about missing decrements. + disjunctCount int + src *Vertex // isDef indicates whether the closeContext is created as part of a @@ -285,6 +296,7 @@ type conjunctGrouper interface { } func (n *nodeContext) getArc(f Feature, mode ArcType) (arc *Vertex, isNew bool) { + // TODO(disjunct,perf): CopyOnRead v := n.node for _, a := range v.Arcs { if a.Label == f { @@ -485,7 +497,7 @@ func (c *closeContext) addDependency(kind depKind, key, child, root *closeContex // immediately. func (c *closeContext) incDependent(kind depKind, dependant *closeContext) (debug *ccDep) { if c.src == nil { - panic("incDependent: unexpected nil state") + panic("incDependent: unexpected nil src") } debug = c.addDependent(kind, dependant) @@ -551,7 +563,9 @@ func (c *closeContext) decDependent(ctx *OpContext, kind depKind, dependant *clo // Root pattern, set allowed patterns. if pcs := v.PatternConstraints; pcs != nil { if pcs.Allowed != nil { - panic("unexpected allowed set") + // This can happen for lists. + // TODO: unify the values. + // panic("unexpected allowed set") } pcs.Allowed = c.Expr return @@ -594,6 +608,38 @@ func (c *closeContext) decDependent(ctx *OpContext, kind depKind, dependant *clo } } +// incDisjunct increases disjunction-related counters. We require kind to be +// passed explicitly so that we can easily find the points where certain kinds +// are used. +func (c *closeContext) incDisjunct(kind depKind) { + if kind != DISJUNCT { + panic("unexpected kind") + } + c.incDependent(DISJUNCT, nil) + + // TODO: the counters are only used in debug mode and we could skip this + // if debug is disabled. + for ; c != nil; c = c.parent { + c.disjunctCount++ + } +} + +// decDisjunct decreases disjunction-related counters. We require kind to be +// passed explicitly so that we can easily find the points where certain kinds +// are used. +func (c *closeContext) decDisjunct(ctx *OpContext, kind depKind) { + if kind != DISJUNCT { + panic("unexpected kind") + } + c.decDependent(ctx, DISJUNCT, nil) + + // TODO: the counters are only used in debug mode and we could skip this + // if debug is disabled. + for ; c != nil; c = c.parent { + c.disjunctCount++ + } +} + // linkPatterns merges the patterns of child into c, if needed. func (c *closeContext) linkPatterns(child *closeContext) { if len(child.Patterns) > 0 { @@ -681,7 +727,7 @@ func (n *nodeContext) insertArc(f Feature, mode ArcType, c Conjunct, id CloseInf v, insertedArc := n.getArc(f, mode) if v.ArcType == ArcNotPresent { - n.node.reportFieldCycleError(n.ctx, c.Source().Pos(), f) + n.node.reportFieldCycleError(n.ctx, pos(c.x), f) return v } diff --git a/internal/core/adt/overlay.go b/internal/core/adt/overlay.go new file mode 100644 index 00000000000..2c7180c8cbf --- /dev/null +++ b/internal/core/adt/overlay.go @@ -0,0 +1,460 @@ +// Copyright 2024 CUE Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package adt + +import "slices" + +// This file implements a Vertex overlay. This is used by the disjunction +// algorithm to fork an existing Vertex value without modifying the original. +// +// At the moment, the forked value is a complete copy of the original. +// The copy points to the original to keep track of pointer equivalence. +// Conversely, while a copy is evaluated, the value of which it is a copy +// references the copy. Dereferencing will then take care that the copy is used +// during evaluation. +// +// nodeContext (main) <- +// - deref \ +// | \ +// | nodeContext (d1) | <- +// \ - overlays -------/ \ +// \ \ +// -> nodeContext (d2) | +// - overlays --------/ +// +// TODO: implement dereferencing +// TODO(perf): implement copy on write: instead of copying the entire tree, we +// could get by with only copying arcs to that are modified in the copy. + +// An overlayContext keeps track of copied vertices, closeContexts, and tasks. +// This allows different passes to know which of each were created, without +// having to walk the entire tree. +type overlayContext struct { + ctx *OpContext + + // closeContexts holds the allocated closeContexts created by allocCC. + // + // In the first pass, closeContexts are copied using allocCC. This also + // walks the parent tree, and allocates copies for ConjunctGroups. + // + // In the second pass, initCloneCC can be finalized by initializing each + // closeContext in this slice. + // + // Note that after the copy is completed, the overlay pointer should be + // deleted. + closeContexts []*closeContext + + // vertices holds the original, non-overlay vertices. The overlay for a + // vertex v can be obtained by looking up v.cc.overlay.src. + vertices []*Vertex +} + +// cloneRoot clones the a Vertex in which disjunctions are defined to allow +// inserting selected disjuncts into a new Vertex. +func (ctx *overlayContext) cloneRoot(root *nodeContext) *nodeContext { + // Clone all vertices that need to be cloned to support the overlay. + v := ctx.cloneVertex(root.node) + + // TODO: patch notifications to any node that is within the disjunct to + // point to the new vertex instead. + + // Initialize closeContexts: at this point, all closeContexts that need to + // be cloned have been allocated and stored in closeContexts and can now be + // initialized. + for _, cc := range ctx.closeContexts { + ctx.initCloneCC(cc) + } + + // Note that this may trigger tasks to run, so it should be called after + // everything is initialized. + v.state.signal(allAncestorsProcessed) + + // TODO: walk overlay vertices and decrement counters of non-disjunction + // running tasks? + // TODO: find a faster way to do this. Walking over vertices would + // probably be faster. + for _, cc := range ctx.closeContexts { + for _, d := range cc.dependencies { + if d.task == nil { + // The test case that makes this necessary: + // #A: ["a" | "b"] | {} + // #A: ["a" | "b"] | {} + // b: #A & ["b"] + // + // TODO: invalidate task instead? + continue + } + if d.kind == TASK && d.task.state == taskRUNNING { + cc.overlay.decDependent(ctx.ctx, TASK, nil) + } + } + } + + return v.state +} + +// unlinkOverlay unlinks helper pointers. This should be done after the +// evaluation of a disjunct is complete. Keeping the linked pointers around +// will allow for dereferencing a vertex to its overlay, which, in turn, +// allows a disjunct to refer to parents vertices of the disjunct that +// recurse into the disjunct. +// +// TODO(perf): consider using generation counters. +func (ctx *overlayContext) unlinkOverlay() { + for _, cc := range ctx.closeContexts { + cc.overlay = nil + } +} + +// cloneVertex copies the contents of x into a new Vertex. +// +// It copies all Arcs, Conjuncts, and Structs, recursively. +// +// TODO(perf): it would probably be faster to copy vertices on demand. But this +// is more complicated and it would be worth measuring how much of a performance +// benefit this gives. More importantly, we should first implement the filter +// to eliminate disjunctions pre-copy based on discriminator fields and what +// have you. This is not unlikely to eliminate +func (ctx *overlayContext) cloneVertex(x *Vertex) *Vertex { + if o := x.cc.overlay; o != nil && o.src != nil { + // This path could happen with structure sharing or user-constructed + // values. + return o.src + } + + v := &Vertex{} + *v = *x + + ctx.vertices = append(ctx.vertices, v) + + v.cc = ctx.allocCC(x.cc) + + v.cc.src = v + v.cc.parentConjuncts = v + v.Conjuncts = *v.cc.group + + if a := x.Arcs; len(a) > 0 { + // TODO(perf): reuse buffer. + v.Arcs = make([]*Vertex, len(a)) + for i, arc := range a { + // TODO(perf): reuse when finalized. + arc := ctx.cloneVertex(arc) + v.Arcs[i] = arc + arc.Parent = v + } + } + + v.Structs = slices.Clone(v.Structs) + + if v.state != nil { + v.state = ctx.cloneNodeContext(x.state) + v.state.node = v + + ctx.cloneScheduler(v.state, x.state) + } + + // TODO: clone the scheduler/tasks as well. + + return v +} + +func (ctx *overlayContext) cloneNodeContext(n *nodeContext) *nodeContext { + d := n.ctx.newNodeContext(n.node) + + d.refCount++ + + d.ctx = n.ctx + d.node = n.node + + d.nodeContextState = n.nodeContextState + + d.arcMap = append(d.arcMap, n.arcMap...) + d.checks = append(d.checks, n.checks...) + + // TODO: do we need to add cyclicConjuncts? Typically, cyclicConjuncts + // gets cleared at the end of a unify call. There are cases, however, where + // this is possible. We should decide whether cyclicConjuncts should be + // forced to be processed in the parent node, or that we allow it to be + // copied to the disjunction. By taking no action here, we assume it is + // processed in the parent node. Investigate whether this always will lead + // to correct results. + // d.cyclicConjuncts = append(d.cyclicConjuncts, n.cyclicConjuncts...) + + if len(n.disjunctions) > 0 { + for _, de := range n.disjunctions { + // Do not clone cc, as it is identified by underlying. We only need + // to clone the cc in disjunctCCs. + // de.cloneID.cc = ctx.allocCC(de.cloneID.cc) + d.disjunctions = append(d.disjunctions, de) + } + for _, h := range n.disjunctCCs { + h.cc = ctx.allocCC(h.cc) + d.disjunctCCs = append(d.disjunctCCs, h) + } + } + + return d +} + +// cloneConjunct prepares a tree of conjuncts for copying by first allocating +// a clone for each closeContext. +func (ctx *overlayContext) copyConjunct(c Conjunct) Conjunct { + cc := c.CloseInfo.cc + if cc == nil { + return c + } + overlay := ctx.allocCC(cc) + c.CloseInfo.cc = overlay + return c +} + +// Phase 1: alloc +func (ctx *overlayContext) allocCC(cc *closeContext) *closeContext { + // TODO(perf): if the original is "done", it can no longer be modified and + // we can use the original, even if the values will not be correct. + if cc.overlay != nil { + return cc.overlay + } + + o := &closeContext{} + cc.overlay = o + + if cc.parent != nil { + o.parent = ctx.allocCC(cc.parent) + } + + // Copy the conjunct group if it exists. + if cc.group != nil { + // Copy the group of conjuncts. + g := make([]Conjunct, len(*cc.group)) + o.group = (*ConjunctGroup)(&g) + for i, c := range *cc.group { + g[i] = ctx.copyConjunct(c) + } + + if o.parent != nil { + // validate invariants + ca := *cc.parent.group + if ca[cc.parentIndex].x != cc.group { + panic("group misaligned") + } + + (*o.parent.group)[cc.parentIndex].x = o.group + } + } + + // This must come after allocating the parent so that we can always read + // the src vertex from the parent during initialization. This assumes that + // src is set in the root closeContext when cloning a vertex. + ctx.closeContexts = append(ctx.closeContexts, cc) + + // needsCloseInSchedule is used as a boolean. The pointer to the original + // closeContext is just used for reporting purposes. + if cc.needsCloseInSchedule != nil { + o.needsCloseInSchedule = ctx.allocCC(cc.needsCloseInSchedule) + } + + // We only explicitly tag dependencies of type ARC. Notifications that + // point within the disjunct overlay will be tagged elsewhere. + for _, a := range cc.arcs { + if a.kind == ARC { + ctx.allocCC(a.cc) + } + } + + return o +} + +func (ctx *overlayContext) initCloneCC(x *closeContext) { + o := x.overlay + + if p := x.parent; p != nil { + o.parent = p.overlay + o.src = o.parent.src + } + + o.conjunctCount = x.conjunctCount + o.disjunctCount = x.disjunctCount + o.isDef = x.isDef + o.hasEllipsis = x.hasEllipsis + o.hasTop = x.hasTop + o.hasNonTop = x.hasNonTop + o.isClosedOnce = x.isClosedOnce + o.isEmbed = x.isEmbed + o.isClosed = x.isClosed + o.isTotal = x.isTotal + o.done = x.done + o.isDecremented = x.isDecremented + o.parentIndex = x.parentIndex + o.Expr = x.Expr + o.Patterns = append(o.Patterns, x.Patterns...) + + // child and next always point to completed closeContexts. Moreover, only + // fields that are immutable, such as Expr, are used. It is therefor not + // necessary to use overlays. + o.child = x.child + if x.child != nil && x.child.overlay != nil { + panic("unexpected overlay in child") + } + o.next = x.next + if x.next != nil && x.next.overlay != nil { + panic("unexpected overlay in next") + } + + for _, d := range x.dependencies { + if d.decremented { + continue + } + + if d.dependency.overlay == nil { + // This dependency is irrelevant for the current overlay. We can + // eliminate it as long as we decrement the accompanying counter. + if o.conjunctCount < 2 { + // This node can only be relevant if it has at least one other + // dependency. Check that we are not decrementing the counter + // to 0. + panic("unexpected conjunctCount: must be at least 2") + } + o.conjunctCount-- + continue + } + + dep := d.dependency + if dep.overlay != nil { + dep = dep.overlay + } + o.dependencies = append(o.dependencies, &ccDep{ + dependency: dep, + kind: d.kind, + decremented: false, + }) + } + + switch p := x.parentConjuncts.(type) { + case *closeContext: + if p.overlay == nil { + panic("expected overlay") + } + o.parentConjuncts = p.overlay + + case *Vertex: + o.parentConjuncts = o.src + } + + if o.src == nil { + // fall back to original vertex. We have only copied the vertices to + // catch the counts. + // TODO: try eliminating EVAL dependencies of arcs that are the parent + // of the disjunction root. + o.src = x.src + } + + if o.parentConjuncts == nil { + panic("expected parentConjuncts") + } + + for _, a := range x.arcs { + // If an arc does not have an overlay, we should not decrement the + // dependency counter. We simply remove the dependency in that case. + if a.cc.overlay == nil { + continue + } + if a.key.overlay != nil { + a.key = a.key.overlay // TODO: is this necessary? + } + a.cc = a.cc.overlay + o.arcs = append(o.arcs, a) + } + + // NOTE: copying externalDeps is hard and seems unnecessary, as it needs to + // be resolved in the base anyway. +} + +func (ctx *overlayContext) cloneScheduler(dst, src *nodeContext) { + ss := &src.scheduler + ds := &dst.scheduler + + ds.state = ss.state + ds.completed = ss.completed + ds.needs = ss.needs + ds.provided = ss.provided + ds.frozen = ss.frozen + ds.isFrozen = ss.isFrozen + ds.counters = ss.counters + + ss.blocking = ss.blocking[:0] + + for _, t := range ss.tasks { + switch t.state { + case taskWAITING: + // Do not unblock previously blocked tasks, unless they are + // associated with this node. + // TODO: an edge case is when a task is blocked on another node + // within the same disjunction. We could solve this by associating + // each nodeContext with a unique ID (like a generation counter) for + // the disjunction. + if t.node != src || t.blockedOn != ss { + break + } + t := ctx.cloneTask(t, ds, ss) + ds.tasks = append(ds.tasks, t) + ds.blocking = append(ds.blocking, t) + ctx.ctx.blocking = append(ctx.ctx.blocking, t) + + case taskREADY: + t := ctx.cloneTask(t, ds, ss) + ds.tasks = append(ds.tasks, t) + } + } +} + +func (ctx *overlayContext) cloneTask(t *task, dst, src *scheduler) *task { + if t.node != src.node { + panic("misaligned node") + } + + id := t.id + if id.cc != nil { + id.cc = ctx.allocCC(t.id.cc) // TODO: may be nil for disjunctions. + } + + // TODO: alloc from buffer. + d := &task{ + run: t.run, + state: t.state, + completes: t.completes, + unblocked: t.unblocked, + blockCondition: t.blockCondition, + err: t.err, + env: t.env, + x: t.x, + id: id, + + node: dst.node, + + // TODO: need to copy closeContexts? + comp: t.comp, + leaf: t.leaf, + } + + if t.blockedOn != nil { + if t.blockedOn != src { + panic("invalid scheduler") + } + d.blockedOn = dst + } + + return d +} diff --git a/internal/core/adt/sched.go b/internal/core/adt/sched.go index d4028cfad3c..dae878bd22d 100644 --- a/internal/core/adt/sched.go +++ b/internal/core/adt/sched.go @@ -561,6 +561,10 @@ type runner struct { // needes indicates which states of the corresponding node need to be // completed before this task can be run. needs condition + + // a lower priority indicates a preference to run a task before tasks + // of a higher priority. + priority int8 } type task struct { @@ -575,6 +579,9 @@ type task struct { // the scheduler, which conditions it is blocking on, and the stack of // tasks executed leading to the block. + // blockedOn cannot be needed in a clone for a disjunct, because as long + // as the disjunct is unresolved, its value cannot contribute to another + // scheduler. blockedOn *scheduler blockCondition condition blockStack []*task // TODO: use; for error reporting. @@ -623,6 +630,17 @@ func (s *scheduler) insertTask(t *task) { } } s.tasks = append(s.tasks, t) + + // Sort by priority. This code is optimized for the case that there are + // very few tasks with higher priority. This loop will almost always + // terminate within 0 or 1 iterations. + for i := len(s.tasks) - 1; i > s.taskPos; i-- { + if s.tasks[i-1].run.priority <= s.tasks[i].run.priority { + break + } + s.tasks[i], s.tasks[i-1] = s.tasks[i-1], s.tasks[i] + } + if s.completed&needs != needs { t.waitFor(s, needs) } diff --git a/internal/core/adt/states.go b/internal/core/adt/states.go index ba7eae94c4f..2ed0ccd658c 100644 --- a/internal/core/adt/states.go +++ b/internal/core/adt/states.go @@ -166,6 +166,10 @@ const ( // subFieldsProcessed + // disjunctionTask indicates that this task is a disjunction. This is + // used to trigger finalization of disjunctions. + disjunctionTask + leftOfMaxCoreCondition finalStateKnown condition = leftOfMaxCoreCondition - 1 @@ -193,6 +197,9 @@ const ( valueKnown | fieldConjunctsKnown + // genericDisjunction is used to record processDisjunction tasks. + genericDisjunction = genericConjunct | disjunctionTask + // a fieldConjunct is on that only adds a new field to the struct. fieldConjunct = allTasksCompleted | fieldConjunctsKnown diff --git a/internal/core/adt/tasks.go b/internal/core/adt/tasks.go index 900e5d3238f..153de34e118 100644 --- a/internal/core/adt/tasks.go +++ b/internal/core/adt/tasks.go @@ -27,7 +27,7 @@ var ( handleComprehension *runner handleListLit *runner handleListVertex *runner - handleDisjunction *runner + handleDisjunctions *runner ) // Use init to avoid a (spurious?) cyclic dependency in Go. @@ -69,6 +69,12 @@ func init() { completes: fieldConjunct, needs: listTypeKnown, } + handleDisjunctions = &runner{ + name: "Disjunctions", + f: processDisjunctions, + completes: genericDisjunction, + priority: 1, + } } // This file contains task runners (func(ctx *OpContext, t *task, mode runMode)). @@ -165,6 +171,17 @@ func processComprehension(ctx *OpContext, t *task, mode runMode) { t.comp.vertex.state.addBottom(err) } +func processDisjunctions(c *OpContext, t *task, mode runMode) { + n := t.node + err := n.processDisjunctions() + t.err = CombineErrors(nil, t.err, err) +} + +func processFinalizeDisjunctions(c *OpContext, t *task, mode runMode) { + n := t.node + n.finalizeDisjunctions() +} + func processListLit(c *OpContext, t *task, mode runMode) { n := t.node @@ -298,7 +315,7 @@ func (n *nodeContext) updateListType(list Expr, id CloseInfo, isClosed bool, ell m = &ListMarker{ IsOpen: true, } - n.node.setValue(n.ctx, partial, m) + n.node.setValue(n.ctx, conjuncts, m) } m.IsOpen = m.IsOpen && !isClosed diff --git a/internal/core/adt/unify.go b/internal/core/adt/unify.go index 81e3c6697ea..e31f0a3b971 100644 --- a/internal/core/adt/unify.go +++ b/internal/core/adt/unify.go @@ -111,10 +111,14 @@ func (v *Vertex) unify(c *OpContext, needs condition, mode runMode) bool { // So this condition is very likely to trigger. If for some reason the // parent has not been processed yet, we could attempt to process more // of its tasks to increase the chances of being able to find the - // information we are looking for here. For now we just continue as is, - // though. + // information we are looking for here. For now we just continue as is. + // // For dynamic nodes, the parent only exists to provide a path context. - if v.Label.IsLet() || v.IsDynamic || v.Parent.allChildConjunctsKnown() { + // + // Note that if mode is final, we will guarantee that the conditions for + // this if clause are met down the line. So we assume this is already the + // case and set the signal accordingly if so. + if v.Label.IsLet() || v.IsDynamic || v.Parent.allChildConjunctsKnown() || mode == finalize { n.signal(allAncestorsProcessed) } @@ -140,6 +144,9 @@ func (v *Vertex) unify(c *OpContext, needs condition, mode runMode) bool { case n.meets(nodeOnlyNeeds): // pass through next phase. case mode != finalize: + // TODO: disjunctions may benefit from evaluation as much prematurely + // as possible, as this increases the chances of premature failure. + // We should consider doing a recursive "attemptOnly" evaluation here. return false } @@ -150,7 +157,9 @@ func (v *Vertex) unify(c *OpContext, needs condition, mode runMode) bool { n.updateNodeType(StructKind, n.aStruct, n.aStructID) } - n.validateValue(finalized) + // TODO: rewrite to use mode when we get rid of old evaluator. + state := finalized + n.validateValue(state) if err, ok := n.node.BaseValue.(*Bottom); ok { for _, arc := range n.node.Arcs { @@ -185,13 +194,18 @@ func (v *Vertex) unify(c *OpContext, needs condition, mode runMode) bool { switch { case assertStructuralCycle(n): + // TODO: consider bailing on error if n.errs != nil. case n.completeAllArcs(needs, mode): } - n.signal(subFieldsProcessed) + if mode == finalize { + n.signal(subFieldsProcessed) + } if v.BaseValue == nil { - v.BaseValue = n.getValidators(finalized) + // TODO: this seems to not be possible. Possibly remove. + state := finalized + v.BaseValue = n.getValidators(state) } if v := n.node.Value(); v != nil && IsConcrete(v) { // Ensure that checks are not run again when this value is used @@ -220,11 +234,17 @@ func (v *Vertex) unify(c *OpContext, needs condition, mode runMode) bool { n.node.BaseValue = err } + if mode == attemptOnly { + return n.meets(needs) + } + if mask := n.completed & needs; mask != 0 { // TODO: phase3: validation n.signal(mask) } + n.finalizeDisjunctions() + // validationCompleted if n.completed&(subFieldsProcessed) != 0 { n.node.updateStatus(finalized) @@ -363,7 +383,7 @@ func (n *nodeContext) completeAllArcs(needs condition, mode runMode) bool { a := n.node.Arcs[n.arcPos] n.arcPos++ - if !a.unify(n.ctx, needs, finalize) { + if !a.unify(n.ctx, needs, mode) { success = false }