Skip to content

Commit

Permalink
tree2: Fix chunk bugs (#16652)
Browse files Browse the repository at this point in the history
## Description

Fix 2 bugs in chunked forest, and improve test coverage.

Note that bug in tryShapeForSchema was preventing the existing coverage
for using uniform chunks in the forest by detecting all schema as
polymorphic fixing that was enough to make the coverage expose the other
bug, but coverage of that area was increased anyway.

Note that chunked forest is not currently used by the tree in the public
API, so this has no impact on users of this package.
  • Loading branch information
CraigMacomber authored Aug 2, 2023
1 parent d9ba203 commit a3a4839
Show file tree
Hide file tree
Showing 6 changed files with 364 additions and 98 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
StoredSchemaRepository,
CursorLocationType,
SchemaData,
forbiddenFieldKindIdentifier,
} from "../../core";
import { FullSchemaPolicy, Multiplicity } from "../modular-schema";
import { fail } from "../../util";
Expand All @@ -39,22 +40,34 @@ export interface Disposable {
export function makeTreeChunker(
schema: StoredSchemaRepository,
policy: FullSchemaPolicy,
): ChunkPolicy & Disposable {
): IChunker {
return new Chunker(
schema,
policy,
defaultChunkPolicy.sequenceChunkInlineThreshold,
defaultChunkPolicy.sequenceChunkInlineThreshold,
defaultChunkPolicy.uniformChunkNodeCount,
tryShapeFromSchema,
);
}

/**
* Indicates that there are multiple possible `TreeShapes` trees with a given type can have.
* Extends ChunkPolicy to include stateful details required by ChunkedForest.
*
* This extra complexity is mostly due to the fact that schema can change over time,
* and that chunk policy uses caching which thus needs invalidation.
*/
export interface IChunker extends ChunkPolicy, Disposable {
readonly schema: StoredSchemaRepository;
clone(schema: StoredSchemaRepository): IChunker;
}

/**
* Indicates that there are multiple possible `TreeShape` trees with a given type can have.
*
* @remarks
* For example, a schema transitively containing a sequence field, optional field, or allowing multiple child types will be Polymorphic.
* See `tryShapeForSchema` for how to tell if a type is Polymorphic.
* See `tryShapeFromSchema` for how to tell if a type is Polymorphic.
*
* TODO: cache some of the possible shapes here.
*/
Expand All @@ -71,9 +84,9 @@ export const polymorphic = new Polymorphic();
* Information about the possible shapes a tree could take based on its type.
* Note that this information is for a specific version of the schema.
*/
type ShapeInfo = TreeShape | Polymorphic;
export type ShapeInfo = TreeShape | Polymorphic;

class Chunker implements ChunkPolicy, Disposable {
export class Chunker implements IChunker {
/**
* Cache for information about possible shapes for types.
* Corresponds to the version of the schema in `schema`.
Expand All @@ -92,17 +105,37 @@ class Chunker implements ChunkPolicy, Disposable {
public readonly sequenceChunkSplitThreshold: number,
public readonly sequenceChunkInlineThreshold: number,
public readonly uniformChunkNodeCount: number,
// eslint-disable-next-line @typescript-eslint/no-shadow
private readonly tryShapeFromSchema: (
schema: SchemaData,
policy: FullSchemaPolicy,
type: TreeSchemaIdentifier,
shapes: Map<TreeSchemaIdentifier, ShapeInfo>,
) => ShapeInfo,
) {
this.dependent = new SimpleObservingDependent(() => this.schemaChanged());
}

public schemaToShape(schema: TreeSchemaIdentifier): ShapeInfo {
public clone(schema: StoredSchemaRepository): IChunker {
// This does not preserve the cache.
// This is probably fine, but is a potential way it could be optimized in the future (with care to ensure invalidation work properly).
return new Chunker(
schema,
this.policy,
this.sequenceChunkSplitThreshold,
this.sequenceChunkInlineThreshold,
this.uniformChunkNodeCount,
this.tryShapeFromSchema,
);
}

public shapeFromSchema(schema: TreeSchemaIdentifier): ShapeInfo {
const cached = this.typeShapes.get(schema);
if (cached !== undefined) {
return cached;
}
recordDependency(this.dependent, this.schema);
return tryShapeForSchema(this.schema, this.policy, schema, this.typeShapes);
return this.tryShapeFromSchema(this.schema, this.policy, schema, this.typeShapes);
}

public dispose(): void {
Expand Down Expand Up @@ -168,7 +201,7 @@ export function shapesFromSchema(
): Map<TreeSchemaIdentifier, ShapeInfo> {
const shapes: Map<TreeSchemaIdentifier, ShapeInfo> = new Map();
for (const identifier of schema.treeSchema.keys()) {
tryShapeForSchema(schema, policy, identifier, shapes);
tryShapeFromSchema(schema, policy, identifier, shapes);
}
return shapes;
}
Expand All @@ -178,7 +211,7 @@ export function shapesFromSchema(
*
* Note that this does not tolerate optional or sequence fields, nor does it optimize for patterns of specific values.
*/
function tryShapeForSchema(
export function tryShapeFromSchema(
schema: SchemaData,
policy: FullSchemaPolicy,
type: TreeSchemaIdentifier,
Expand All @@ -189,12 +222,12 @@ function tryShapeForSchema(
return cached;
}
const treeSchema = schema.treeSchema.get(type) ?? fail("missing schema");
if (treeSchema.mapFields !== undefined) {
if (treeSchema.mapFields.kind.identifier !== forbiddenFieldKindIdentifier) {
return polymorphic;
}
const fieldsArray: FieldShape[] = [];
for (const [key, field] of treeSchema.structFields) {
const fieldShape = tryShapeForFieldSchema(schema, policy, field, key, shapes);
const fieldShape = tryShapeFromFieldSchema(schema, policy, field, key, shapes);
if (fieldShape === undefined) {
return polymorphic;
}
Expand All @@ -211,7 +244,7 @@ function tryShapeForSchema(
*
* Note that this does not tolerate optional or sequence fields, nor does it optimize for patterns of specific values.
*/
function tryShapeForFieldSchema(
export function tryShapeFromFieldSchema(
schema: SchemaData,
policy: FullSchemaPolicy,
type: FieldStoredSchema,
Expand All @@ -226,7 +259,7 @@ function tryShapeForFieldSchema(
return undefined;
}
const childType = [...type.types][0];
const childShape = tryShapeForSchema(schema, policy, childType, shapes);
const childShape = tryShapeFromSchema(schema, policy, childType, shapes);
if (childShape instanceof Polymorphic) {
return undefined;
}
Expand All @@ -246,14 +279,14 @@ export const defaultChunkPolicy: ChunkPolicy = {
uniformChunkNodeCount: 400,
// Without knowing what the schema is, all shapes are possible.
// Use `makeTreeChunker` to do better.
schemaToShape: () => polymorphic,
shapeFromSchema: () => polymorphic,
};

export const basicOnlyChunkPolicy: ChunkPolicy = {
sequenceChunkSplitThreshold: Number.POSITIVE_INFINITY,
sequenceChunkInlineThreshold: Number.POSITIVE_INFINITY,
uniformChunkNodeCount: 0,
schemaToShape: () => polymorphic,
shapeFromSchema: () => polymorphic,
};

/**
Expand Down Expand Up @@ -281,7 +314,7 @@ export interface ChunkPolicy {
/**
* Returns information about the shapes trees of type `schema` can take.
*/
schemaToShape(schema: TreeSchemaIdentifier): ShapeInfo;
shapeFromSchema(schema: TreeSchemaIdentifier): ShapeInfo;
}

function newBasicChunkTree(cursor: ITreeCursorSynchronous, policy: ChunkPolicy): BasicChunk {
Expand Down Expand Up @@ -340,7 +373,7 @@ export function chunkRange(
assert(cursor.mode === CursorLocationType.Nodes, 0x580 /* should be in nodes */);
// TODO: if provided, use schema to consider using UniformChunks
const type = cursor.type;
const shape = policy.schemaToShape(type);
const shape = policy.shapeFromSchema(type);
if (shape instanceof TreeShape) {
const nodesPerTopLevelNode = shape.positions.length;
const maxTopLevelLength = Math.ceil(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ import {
ForestEvents,
ITreeSubscriptionCursorState,
rootFieldKey,
mapCursorField,
} from "../../core";
import { brand, fail, getOrAddEmptyToMap } from "../../util";
import { createEmitter } from "../../events";
import { FullSchemaPolicy } from "../modular-schema";
import { BasicChunk, BasicChunkCursor, SiblingsOrKey } from "./basicChunk";
import { basicChunkTree, ChunkPolicy, chunkTree, Disposable, makeTreeChunker } from "./chunkTree";
import { basicChunkTree, chunkTree, IChunker } from "./chunkTree";
import { ChunkedCursor, TreeChunk } from "./chunk";

function makeRoot(): BasicChunk {
Expand All @@ -48,27 +48,24 @@ interface StackNode {
*/
class ChunkedForest extends SimpleDependee implements IEditableForest {
private readonly dependent = new SimpleObservingDependent(() => this.invalidateDependents());
// TODO: dispose of this when forest is disposed.
private readonly chunker: Disposable & ChunkPolicy;

private readonly events = createEmitter<ForestEvents>();

/**
* @param roots - dummy node above the root under which detached fields are stored. All content of the forest is reachable from this.
* @param schema - schema which all content in this forest is assumed to comply with.
* @param policy - provides information needed to interpret the schema, mainly multiplicities for each field kind which are used for chunking policy.
* @param chunker - Chunking policy. TODO: dispose of this when forest is disposed.
* @param anchors - anchorSet used to track location in this forest across changes. Callers of applyDelta must ensure this is updated accordingly.
*/
public constructor(
public roots: BasicChunk,
public readonly schema: StoredSchemaRepository,
public readonly policy: FullSchemaPolicy,
public readonly chunker: IChunker,
public readonly anchors: AnchorSet = new AnchorSet(),
) {
super("object-forest.ChunkedForest");
// Invalidate forest if schema change.
recordDependency(this.dependent, this.schema);
this.chunker = makeTreeChunker(schema, policy);
}

public on<K extends keyof ForestEvents>(eventName: K, listener: ForestEvents[K]): () => void {
Expand All @@ -77,7 +74,7 @@ class ChunkedForest extends SimpleDependee implements IEditableForest {

public clone(schema: StoredSchemaRepository, anchors: AnchorSet): ChunkedForest {
this.roots.referenceAdded();
return new ChunkedForest(this.roots, schema, this.policy, anchors);
return new ChunkedForest(this.roots, schema, this.chunker.clone(schema), anchors);
}

public forgetAnchor(anchor: Anchor): void {
Expand Down Expand Up @@ -177,11 +174,15 @@ class ChunkedForest extends SimpleDependee implements IEditableForest {
// 2. Support traversing sequence chunks.
//
// Maybe build path when visitor navigates then lazily sync to chunk tree when editing?

const newChunk = basicChunkTree(found.cursor(), this.chunker);
chunks[indexOfChunk] = newChunk;
const newChunks = mapCursorField(found.cursor(), (cursor) =>
basicChunkTree(cursor, this.chunker),
);
// TODO: this could fail for really long chunks being split (due to argument count limits).
// Current implementations of chunks shouldn't ever be that long, but it could be an issue if they get bigger.
chunks.splice(indexOfChunk, 1, ...newChunks);
found.referenceRemoved();
found = newChunk;

found = newChunks[indexWithinChunk];
}
assert(found instanceof BasicChunk, 0x536 /* chunk should have been normalized */);
if (found.isShared()) {
Expand Down Expand Up @@ -355,6 +356,7 @@ class Cursor extends BasicChunkCursor implements ITreeSubscriptionCursor {
this.index = 0;
this.indexOfChunk = 0;
this.indexWithinChunk = 0;
this.nestedCursor = undefined;
}

public override fork(): Cursor {
Expand Down Expand Up @@ -400,10 +402,6 @@ class Cursor extends BasicChunkCursor implements ITreeSubscriptionCursor {
/**
* @returns an implementation of {@link IEditableForest} with no data or schema.
*/
export function buildChunkedForest(
schema: StoredSchemaRepository,
policy: FullSchemaPolicy,
anchors?: AnchorSet,
): IEditableForest {
return new ChunkedForest(makeRoot(), schema, policy, anchors);
export function buildChunkedForest(chunker: IChunker, anchors?: AnchorSet): IEditableForest {
return new ChunkedForest(makeRoot(), chunker.schema, chunker, anchors);
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
import {
basicChunkTree,
defaultChunkPolicy,
makeTreeChunker,
// eslint-disable-next-line import/no-internal-modules
} from "../../../feature-libraries/chunked-forest/chunkTree";
import { jsonRoot } from "../../../domains";
Expand Down Expand Up @@ -113,7 +114,9 @@ function bench(
[
"chunked-forest Cursor",
() => {
const forest = buildChunkedForest(schema, defaultSchemaPolicy);
const forest = buildChunkedForest(
makeTreeChunker(schema, defaultSchemaPolicy),
);
initializeForest(forest, [singleTextCursor(encodedTree)]);
const cursor = forest.allocateCursor();
moveToDetachedField(forest, cursor);
Expand Down
Loading

0 comments on commit a3a4839

Please sign in to comment.