Skip to content

Commit

Permalink
[Flight] Optimize Large Strings by Not Escaping Them (facebook#26932)
Browse files Browse the repository at this point in the history
This introduces a Text row (T) which is essentially a string blob and
refactors the parsing to now happen at the binary level.

```
RowID + ":" + "T" + ByteLengthInHex + "," + Text
```

Today, we encode all row data in JSON, which conveniently never has
newline characters and so we use newline as the line terminator. We
can't do that if we pass arbitrary unicode without escaping it. Instead,
we pass the byte length (in hexadecimal) in the leading header for this
row tag followed by a comma.

We could be clever and use fixed or variable-length binary integers for
the row id and length but it's not worth the more difficult
debuggability so we keep these human readable in text.

Before this PR, we used to decode the binary stream into UTF-8 strings
before parsing them. This is inefficient because sometimes the slices
end up having to be copied so it's better to decode it directly into the
format. The follow up to this is also to add support for binary data and
then we can't assume the entire payload is UTF-8 anyway. So this
refactors the parser to parse the rows in binary and then decode the
result into UTF-8. It does add some overhead to decoding on a per row
basis though.

Since we do this, we need to encode the byte length that we want decode
- not the string length. Therefore, this requires clients to receive
binary data and why I had to delete the string option.

It also means that I had to add a way to get the byteLength from a chunk
since they're not always binary. For Web streams it's easy since they're
always typed arrays. For Node streams it's trickier so we use the
byteLength helper which may not be very efficient. Might be worth
eagerly encoding them to UTF8 - perhaps only for this case.
  • Loading branch information
sebmarkbage authored and AndyPengc12 committed Apr 15, 2024
1 parent 2cc4128 commit e026324
Show file tree
Hide file tree
Showing 11 changed files with 267 additions and 42 deletions.
184 changes: 152 additions & 32 deletions packages/react-client/src/ReactFlightClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ export type JSONValue =
| {+[key: string]: JSONValue}
| $ReadOnlyArray<JSONValue>;

const ROW_ID = 0;
const ROW_TAG = 1;
const ROW_LENGTH = 2;
const ROW_CHUNK_BY_NEWLINE = 3;
const ROW_CHUNK_BY_LENGTH = 4;

type RowParserState = 0 | 1 | 2 | 3 | 4;

const PENDING = 'pending';
const BLOCKED = 'blocked';
const RESOLVED_MODEL = 'resolved_model';
Expand Down Expand Up @@ -165,9 +173,13 @@ export type Response = {
_bundlerConfig: SSRManifest,
_callServer: CallServerCallback,
_chunks: Map<number, SomeChunk<any>>,
_partialRow: string,
_fromJSON: (key: string, value: JSONValue) => any,
_stringDecoder: StringDecoder,
_rowState: RowParserState,
_rowID: number, // parts of a row ID parsed so far
_rowTag: number, // 0 indicates that we're currently parsing the row ID
_rowLength: number, // remaining bytes in the row. 0 indicates that we're looking for a newline.
_buffer: Array<Uint8Array>, // chunks received so far as part of this row
};

function readChunk<T>(chunk: SomeChunk<T>): T {
Expand Down Expand Up @@ -276,6 +288,14 @@ function createResolvedModuleChunk<T>(
return new Chunk(RESOLVED_MODULE, value, null, response);
}

function createInitializedTextChunk(
response: Response,
value: string,
): InitializedChunk<string> {
// $FlowFixMe[invalid-constructor] Flow doesn't support functions as constructors
return new Chunk(INITIALIZED, value, null, response);
}

function resolveModelChunk<T>(
chunk: SomeChunk<T>,
value: UninitializedModel,
Expand Down Expand Up @@ -665,9 +685,13 @@ export function createResponse(
_bundlerConfig: bundlerConfig,
_callServer: callServer !== undefined ? callServer : missingCall,
_chunks: chunks,
_partialRow: '',
_stringDecoder: createStringDecoder(),
_fromJSON: (null: any),
_rowState: 0,
_rowID: 0,
_rowTag: 0,
_rowLength: 0,
_buffer: [],
};
// Don't inline this call because it causes closure to outline the call above.
response._fromJSON = createFromJSONCallback(response);
Expand All @@ -688,6 +712,13 @@ function resolveModel(
}
}

function resolveText(response: Response, id: number, text: string): void {
const chunks = response._chunks;
// We assume that we always reference large strings after they've been
// emitted.
chunks.set(id, createInitializedTextChunk(response, text));
}

function resolveModule(
response: Response,
id: number,
Expand Down Expand Up @@ -802,33 +833,40 @@ function resolveHint(
code: string,
model: UninitializedModel,
): void {
const hintModel = parseModel<HintModel>(response, model);
const hintModel: HintModel = parseModel(response, model);
dispatchHint(code, hintModel);
}

function processFullRow(response: Response, row: string): void {
if (row === '') {
return;
function processFullRow(
response: Response,
id: number,
tag: number,
buffer: Array<Uint8Array>,
lastChunk: string | Uint8Array,
): void {
let row = '';
const stringDecoder = response._stringDecoder;
for (let i = 0; i < buffer.length; i++) {
const chunk = buffer[i];
row += readPartialStringChunk(stringDecoder, chunk);
}
if (typeof lastChunk === 'string') {
row += lastChunk;
} else {
row += readFinalStringChunk(stringDecoder, lastChunk);
}
const colon = row.indexOf(':', 0);
const id = parseInt(row.slice(0, colon), 16);
const tag = row[colon + 1];
// When tags that are not text are added, check them here before
// parsing the row as text.
// switch (tag) {
// }
switch (tag) {
case 'I': {
resolveModule(response, id, row.slice(colon + 2));
case 73 /* "I" */: {
resolveModule(response, id, row);
return;
}
case 'H': {
const code = row[colon + 2];
resolveHint(response, code, row.slice(colon + 3));
case 72 /* "H" */: {
const code = row[0];
resolveHint(response, code, row.slice(1));
return;
}
case 'E': {
const errorInfo = JSON.parse(row.slice(colon + 2));
case 69 /* "E" */: {
const errorInfo = JSON.parse(row);
if (__DEV__) {
resolveErrorDev(
response,
Expand All @@ -842,9 +880,13 @@ function processFullRow(response: Response, row: string): void {
}
return;
}
case 84 /* "T" */: {
resolveText(response, id, row);
return;
}
default: {
// We assume anything else is JSON.
resolveModel(response, id, row.slice(colon + 1));
resolveModel(response, id, row);
return;
}
}
Expand All @@ -854,18 +896,96 @@ export function processBinaryChunk(
response: Response,
chunk: Uint8Array,
): void {
const stringDecoder = response._stringDecoder;
let linebreak = chunk.indexOf(10); // newline
while (linebreak > -1) {
const fullrow =
response._partialRow +
readFinalStringChunk(stringDecoder, chunk.subarray(0, linebreak));
processFullRow(response, fullrow);
response._partialRow = '';
chunk = chunk.subarray(linebreak + 1);
linebreak = chunk.indexOf(10); // newline
let i = 0;
let rowState = response._rowState;
let rowID = response._rowID;
let rowTag = response._rowTag;
let rowLength = response._rowLength;
const buffer = response._buffer;
const chunkLength = chunk.length;
while (i < chunkLength) {
let lastIdx = -1;
switch (rowState) {
case ROW_ID: {
const byte = chunk[i++];
if (byte === 58 /* ":" */) {
// Finished the rowID, next we'll parse the tag.
rowState = ROW_TAG;
} else {
rowID = (rowID << 4) | (byte > 96 ? byte - 87 : byte - 48);
}
continue;
}
case ROW_TAG: {
const resolvedRowTag = chunk[i];
if (resolvedRowTag === 84 /* "T" */) {
rowTag = resolvedRowTag;
rowState = ROW_LENGTH;
i++;
} else if (resolvedRowTag > 64 && resolvedRowTag < 91 /* "A"-"Z" */) {
rowTag = resolvedRowTag;
rowState = ROW_CHUNK_BY_NEWLINE;
i++;
} else {
rowTag = 0;
rowState = ROW_CHUNK_BY_NEWLINE;
// This was an unknown tag so it was probably part of the data.
}
continue;
}
case ROW_LENGTH: {
const byte = chunk[i++];
if (byte === 44 /* "," */) {
// Finished the rowLength, next we'll buffer up to that length.
rowState = ROW_CHUNK_BY_LENGTH;
} else {
rowLength = (rowLength << 4) | (byte > 96 ? byte - 87 : byte - 48);
}
continue;
}
case ROW_CHUNK_BY_NEWLINE: {
// We're looking for a newline
lastIdx = chunk.indexOf(10 /* "\n" */, i);
break;
}
case ROW_CHUNK_BY_LENGTH: {
// We're looking for the remaining byte length
if (i + rowLength <= chunk.length) {
lastIdx = i + rowLength;
}
break;
}
}
if (lastIdx > -1) {
// We found the last chunk of the row
const offset = chunk.byteOffset + i;
const length = lastIdx - i;
const lastChunk = new Uint8Array(chunk.buffer, offset, length);
processFullRow(response, rowID, rowTag, buffer, lastChunk);
// Reset state machine for a new row
rowState = ROW_ID;
rowTag = 0;
rowID = 0;
rowLength = 0;
buffer.length = 0;
i = lastIdx + 1;
} else {
// The rest of this row is in a future chunk. We stash the rest of the
// current chunk until we can process the full row.
const offset = chunk.byteOffset + i;
const length = chunk.byteLength - i;
const remainingSlice = new Uint8Array(chunk.buffer, offset, length);
buffer.push(remainingSlice);
// Update how many bytes we're still waiting for. If we're looking for
// a newline, this doesn't hurt since we'll just ignore it.
rowLength -= remainingSlice.byteLength;
break;
}
}
response._partialRow += readPartialStringChunk(stringDecoder, chunk);
response._rowState = rowState;
response._rowID = rowID;
response._rowTag = rowTag;
response._rowLength = rowLength;
}

function parseModel<T>(response: Response, json: UninitializedModel): T {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ export function clonePrecomputedChunk(
return chunk;
}

export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number {
throw new Error('Not implemented.');
}

export function closeWithError(destination: Destination, error: mixed): void {
// $FlowFixMe[incompatible-call]: This is an Error object or the destination accepts other types.
destination.destroy(error);
Expand Down
4 changes: 4 additions & 0 deletions packages/react-server-dom-fb/src/ReactServerStreamConfigFB.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ export function clonePrecomputedChunk(
return chunk;
}

export function byteLengthOfChunk(chunk: Chunk | PrecomputedChunk): number {
throw new Error('Not implemented.');
}

export function closeWithError(destination: Destination, error: mixed): void {
destination.done = true;
destination.fatal = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,25 @@ describe('ReactFlightDOMEdge', () => {
const result = await readResult(ssrStream);
expect(result).toEqual('<span>Client Component</span>');
});

it('should encode long string in a compact format', async () => {
const testString = '"\n\t'.repeat(500) + '🙃';

const stream = ReactServerDOMServer.renderToReadableStream({
text: testString,
});
const [stream1, stream2] = stream.tee();

const serializedContent = await readResult(stream1);
// The content should be compact an unescaped
expect(serializedContent.length).toBeLessThan(2000);
expect(serializedContent).not.toContain('\\n');
expect(serializedContent).not.toContain('\\t');
expect(serializedContent).not.toContain('\\"');
expect(serializedContent).toContain('\t');

const result = await ReactServerDOMClient.createFromReadableStream(stream2);
// Should still match the result when parsed
expect(result.text).toBe(testString);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,31 @@ describe('ReactFlightDOMNode', () => {
const result = await readResult(ssrStream);
expect(result).toEqual('<span>Client Component</span>');
});

it('should encode long string in a compact format', async () => {
const testString = '"\n\t'.repeat(500) + '🙃';

const stream = ReactServerDOMServer.renderToPipeableStream({
text: testString,
});

const readable = new Stream.PassThrough();

const stringResult = readResult(readable);
const parsedResult = ReactServerDOMClient.createFromNodeStream(readable);

stream.pipe(readable);

const serializedContent = await stringResult;
// The content should be compact an unescaped
expect(serializedContent.length).toBeLessThan(2000);
expect(serializedContent).not.toContain('\\n');
expect(serializedContent).not.toContain('\\t');
expect(serializedContent).not.toContain('\\"');
expect(serializedContent).toContain('\t');

const result = await parsedResult;
// Should still match the result when parsed
expect(result.text).toBe(testString);
});
});
Loading

0 comments on commit e026324

Please sign in to comment.