Skip to content

Commit

Permalink
fix(io): Improve readDelims() performance (#867)
Browse files Browse the repository at this point in the history
  • Loading branch information
keroxp authored Apr 24, 2021
1 parent 7498d9d commit 3586ef0
Show file tree
Hide file tree
Showing 3 changed files with 392 additions and 27 deletions.
158 changes: 158 additions & 0 deletions bytes/bytes_list.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/**
* An abstraction of multiple Uint8Arrays
*/
export class BytesList {
private len = 0;
private chunks: {
value: Uint8Array;
start: number; // start offset from head of chunk
end: number; // end offset from head of chunk
offset: number; // offset of head in all bytes
}[] = [];
constructor() {}

/**
* Total size of bytes
*/
size() {
return this.len;
}
/**
* Push bytes with given offset infos
*/
add(value: Uint8Array, start = 0, end = value.byteLength) {
if (value.byteLength === 0 || end - start === 0) {
return;
}
checkRange(start, end, value.byteLength);
this.chunks.push({
value,
end,
start,
offset: this.len,
});
this.len += end - start;
}

/**
* Drop head `n` bytes.
*/
shift(n: number) {
if (n === 0) {
return;
}
if (this.len <= n) {
this.chunks = [];
this.len = 0;
return;
}
const idx = this.getChunkIndex(n);
this.chunks.splice(0, idx);
const [chunk] = this.chunks;
if (chunk) {
const diff = n - chunk.offset;
chunk.start += diff;
}
let offset = 0;
for (const chunk of this.chunks) {
chunk.offset = offset;
offset += chunk.end - chunk.start;
}
this.len = offset;
}

/**
* Find chunk index in which `pos` locates by binary-search
* returns -1 if out of range
*/
getChunkIndex(pos: number): number {
let max = this.chunks.length;
let min = 0;
while (true) {
const i = min + Math.floor((max - min) / 2);
if (i < 0 || this.chunks.length <= i) {
return -1;
}
const { offset, start, end } = this.chunks[i];
const len = end - start;
if (offset <= pos && pos < offset + len) {
return i;
} else if (offset + len <= pos) {
min = i + 1;
} else {
max = i - 1;
}
}
}

/**
* Get indexed byte from chunks
*/
get(i: number): number {
if (i < 0 || this.len <= i) {
throw new Error("out of range");
}
const idx = this.getChunkIndex(i);
const { value, offset, start } = this.chunks[idx];
return value[start + i - offset];
}

/**
* Iterafor of bytes from given position
*/
*iterator(start = 0): IterableIterator<number> {
const startIdx = this.getChunkIndex(start);
if (startIdx < 0) return;
const first = this.chunks[startIdx];
let firstOffset = start - first.offset;
for (let i = startIdx; i < this.chunks.length; i++) {
const chunk = this.chunks[i];
for (let j = chunk.start + firstOffset; j < chunk.end; j++) {
yield chunk.value[j];
}
firstOffset = 0;
}
}

/**
* Returns subset of bytes copied
*/
slice(start: number, end: number = this.len): Uint8Array {
if (end === start) {
return new Uint8Array();
}
checkRange(start, end, this.len);
const result = new Uint8Array(end - start);
const startIdx = this.getChunkIndex(start);
const endIdx = this.getChunkIndex(end - 1);
let written = 0;
for (let i = startIdx; i < endIdx; i++) {
const chunk = this.chunks[i];
const len = chunk.end - chunk.start;
result.set(chunk.value.subarray(chunk.start, chunk.end), written);
written += len;
}
const last = this.chunks[endIdx];
const rest = end - start - written;
result.set(last.value.subarray(last.start, last.start + rest), written);
return result;
}
/**
* Concatenate chunks into single Uint8Array copied.
*/
concat(): Uint8Array {
const result = new Uint8Array(this.len);
let sum = 0;
for (const { value, start, end } of this.chunks) {
result.set(value.subarray(start, end), sum);
sum += end - start;
}
return result;
}
}

function checkRange(start: number, end: number, len: number) {
if (start < 0 || len < start || end < 0 || len < end || end < start) {
throw new Error("invalid range");
}
}
212 changes: 212 additions & 0 deletions bytes/bytes_list_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
import { assertEquals, assertThrows } from "../testing/asserts.ts";
import { BytesList } from "./bytes_list.ts";
import * as bytes from "./mod.ts";
function setup() {
const arr = new BytesList();
const part1 = new Uint8Array([0, 1, 2]);
const part2 = new Uint8Array([3, 4, 5, 6]);
const part3 = new Uint8Array([7]);
const part4 = new Uint8Array([0, 0, 8, 0]);
const part5 = new Uint8Array([0, 9]);
arr.add(part1);
arr.add(part2);
arr.add(part3);
arr.add(new Uint8Array());
arr.add(part3, 0, 0);
arr.add(part4, 2, 3);
arr.add(part5, 1, 2);
return arr;
}

Deno.test("[byets] BytesList.size", () => {
assertEquals(new BytesList().size(), 0);
assertEquals(setup().size(), 10);
});

Deno.test("[bytes] BytesList.getChunkIndex", () => {
const arr = setup();
assertEquals(arr.getChunkIndex(-1), -1);
assertEquals(arr.getChunkIndex(0), 0);
assertEquals(arr.getChunkIndex(1), 0);
assertEquals(arr.getChunkIndex(2), 0);
assertEquals(arr.getChunkIndex(3), 1);
assertEquals(arr.getChunkIndex(4), 1);
assertEquals(arr.getChunkIndex(5), 1);
assertEquals(arr.getChunkIndex(6), 1);
assertEquals(arr.getChunkIndex(7), 2);
assertEquals(arr.getChunkIndex(8), 3);
assertEquals(arr.getChunkIndex(9), 4);
assertEquals(arr.getChunkIndex(10), -1);
});

Deno.test("[bytes] BytesList.get", () => {
const arr = setup();
for (let i = 0; i < arr.size(); i++) {
assertEquals(arr.get(i), i);
}
assertThrows(
() => {
arr.get(-100);
},
Error,
"out of range",
);
assertThrows(
() => {
arr.get(100);
},
Error,
"out of range",
);
});

Deno.test("[bytes] BytesList.add should ignore empty buf and range", () => {
const arr = new BytesList();
const buf = new Uint8Array([0]);
arr.add(new Uint8Array());
arr.add(buf, 0, 0);
assertEquals(arr.size(), 0);
});
Deno.test("[bytes] BytesList.add should throw if invalid range", () => {
const arr = new BytesList();
const buf = new Uint8Array([0]);
assertThrows(
() => {
arr.add(buf, -1, 0);
},
Error,
"invalid range",
);
assertThrows(
() => {
arr.add(buf, 0, -1);
},
Error,
"invalid range",
);
assertThrows(
() => {
arr.add(buf, 4, 0);
},
Error,
"invalid range",
);
assertThrows(
() => {
arr.add(buf, 0, 4);
},
Error,
"invalid range",
);
});
Deno.test("[bytes] BytesList.slice", () => {
const arr = setup();
assertEquals(
bytes.equals(arr.slice(0, 4), new Uint8Array([0, 1, 2, 3])),
true,
);
assertEquals(bytes.equals(arr.slice(3, 5), new Uint8Array([3, 4])), true);
assertEquals(
bytes.equals(arr.slice(0), new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
true,
);
assertThrows(
() => {
arr.slice(9, 11);
},
Error,
"invalid range",
);
assertThrows(
() => {
arr.slice(-1, 1);
},
Error,
"invalid range",
);
assertThrows(
() => {
arr.slice(1, 0);
},
Error,
"invalid range",
);
});
Deno.test("[bytes] BytesList.concat", () => {
const arr = setup();
assertEquals(
bytes.equals(
arr.concat(),
new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
),
true,
);
});
Deno.test("[bytes] BytesList.shift", () => {
const arr = setup();
arr.shift(3);
assertEquals(arr.size(), 7);
assertEquals(
bytes.equals(
arr.concat(),
new Uint8Array([3, 4, 5, 6, 7, 8, 9]),
),
true,
);
arr.shift(4);
assertEquals(arr.size(), 3);
assertEquals(
bytes.equals(
arr.concat(),
new Uint8Array([7, 8, 9]),
),
true,
);
});
Deno.test("[bytes] BytesList.shift 2", () => {
const arr = new BytesList();
arr.add(new Uint8Array([0, 0, 0, 1, 2, 0]), 0, 5);
arr.shift(2);
assertEquals(arr.size(), 3);
assertEquals(
bytes.equals(
arr.concat(),
new Uint8Array([
0,
1,
2,
]),
),
true,
);
arr.shift(2);
assertEquals(arr.size(), 1);
assertEquals(
bytes.equals(
arr.concat(),
new Uint8Array([
2,
]),
),
true,
);
});

Deno.test("[bytes] BytesList.shift 3", () => {
const arr = new BytesList();
arr.add(new Uint8Array([0, 0, 0, 1, 2, 0]), 0, 5);
arr.shift(100);
assertEquals(arr.size(), 0);
assertEquals(arr.concat().byteLength, 0);
});

Deno.test("[bytes] BytesList.iterator()", () => {
const arr = setup();
assertEquals(Array.from(arr.iterator()), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
assertEquals(Array.from(arr.iterator(5)), [5, 6, 7, 8, 9]);
assertEquals(Array.from(arr.iterator(-1)), []);
assertEquals(Array.from(arr.iterator(100)), []);
});

Deno.test("[bytes] ByteList.iterator() range", () => {
});
Loading

0 comments on commit 3586ef0

Please sign in to comment.