Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LazyBlob helper class to lazy load file from filesystem #104

Merged
merged 16 commits into from
Mar 20, 2023
12 changes: 12 additions & 0 deletions packages/hub/src/lib/commit.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { commit } from "./commit";
import { createRepo } from "./create-repo";
import { deleteRepo } from "./delete-repo";
import { downloadFile } from "./download-file";
import { LazyBlob } from "../utils/LazyBlob";

const lfsContent = "O123456789".repeat(100_000);

Expand All @@ -29,6 +30,8 @@ describe("commit", () => {
const readme1 = await downloadFile({ repo, path: "README.md" });
assert.strictEqual(readme1?.status, 200);

const lazyBlob = await LazyBlob.create("./package.json");

try {
await commit({
repo,
Expand All @@ -47,6 +50,11 @@ describe("commit", () => {
content: new Blob([lfsContent]),
path: "test.lfs.txt",
},
{
operation: "addOrUpdate",
content: lazyBlob,
path: "package.json",
},
{
operation: "delete",
path: "README.md",
Expand All @@ -62,6 +70,10 @@ describe("commit", () => {
assert.strictEqual(lfsFileContent?.status, 200);
assert.strictEqual(await lfsFileContent?.text(), lfsContent);

const packageJsonContent = await downloadFile({ repo, path: "package.json" });
assert.strictEqual(packageJsonContent?.status, 200);
assert.strictEqual(await packageJsonContent?.text(), await lazyBlob.text());

const lfsFilePointer = await fetch(`${HUB_URL}/${repoName}/raw/main/test.lfs.txt`);
assert.strictEqual(lfsFilePointer.status, 200);
assert.strictEqual(
Expand Down
2 changes: 1 addition & 1 deletion packages/hub/src/lib/commit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export interface CommitDeletedEntry {
path: string;
}

type ContentSource = Blob; // Todo: offer a smart Blob wrapper around (filePath + size) for Node.js
type ContentSource = Blob;

export interface CommitFile {
operation: "addOrUpdate";
Expand Down
45 changes: 45 additions & 0 deletions packages/hub/src/utils/LazyBlob.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { open, stat } from "node:fs/promises";
import { TextDecoder } from "node:util";
import { describe, expect, it } from "vitest";
import { LazyBlob } from "./LazyBlob";

describe("LazyBlob", () => {
it("should create a LazyBlob with a slice on the entire file", async () => {
const file = await open("package.json", "r");
const { size } = await stat("package.json");

const lazyBlob = await LazyBlob.create("package.json");

expect(lazyBlob).toMatchObject({
path: "package.json",
start: 0,
end: size,
});
expect(lazyBlob.size).toBe(size);
expect(lazyBlob.type).toBe("");
const text = await lazyBlob.text();
const expectedText = (await file.read(Buffer.alloc(size), 0, size)).buffer.toString("utf8");
expect(text).toBe(expectedText);
const result = await lazyBlob.stream().getReader().read();
expect(new TextDecoder().decode(result.value)).toBe(expectedText);
});

it("should create a slice on the file", async () => {
const file = await open("package.json", "r");
const lazyBlob = await LazyBlob.create("package.json");

const slice = lazyBlob.slice(10, 20);

expect(slice).toMatchObject({
path: "package.json",
start: 10,
end: 20,
});
expect(slice.size).toBe(10);
const sliceText = await slice.text();
const expectedText = (await file.read(Buffer.alloc(10), 0, 10, 10)).buffer.toString("utf8");
expect(sliceText).toBe(expectedText);
const result = await slice.stream().getReader().read();
expect(new TextDecoder().decode(result.value)).toBe(expectedText);
});
});
123 changes: 123 additions & 0 deletions packages/hub/src/utils/LazyBlob.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import { createReadStream } from "node:fs";
import { open, stat } from "node:fs/promises";
import { Readable } from "node:stream";
import type { FileHandle } from "node:fs/promises";

/**
* @internal
*
* A LazyBlob is a replacement for the Blob class that allows to lazy read files
* in order to preserve memory.
*
* It is a drop-in replacement for the Blob class, so you can use it as a Blob.
*
* The main difference is the instantiation, which is done asynchronously using the `LazyBlob.create` method.
*
* @example
* const lazyBlob = await LazyBlob.create("path/to/package.json");
*
* await fetch("https://aschen.tech", { method: "POST", body: lazyBlob });
*/
export class LazyBlob extends Blob {
/**
* Creates a new LazyBlob on the provided file.
*
* @param path Path to the file to be lazy readed
*/
static async create(path: string): Promise<LazyBlob> {
const { size } = await stat(path);

const lazyBlob = new LazyBlob(path, 0, size);

return lazyBlob;
}

private path: string;
private start: number;
private end: number;

private constructor(path: string, start: number, end: number) {
super();

this.path = path;
this.start = start;
this.end = end;
}

/**
* Returns the size of the blob.
*/
get size(): number {
return this.end - this.start;
}

/**
* Returns an empty string.
* (This is a required property of the Blob class)
*/
get type(): string {
return "";
}

/**
* Returns a new instance of LazyBlob that is a slice of the current one.
*
* The slice is inclusive of the start and exclusive of the end.
*
* The slice method does not supports negative start/end.
*
* @param start beginning of the slice
* @param end end of the slice
*/
slice(start = 0, end = this.size): LazyBlob {
if (start < 0 || end < 0) {
new TypeError("Unsupported negative start/end on LazyBlob.slice");
}

const slice = new LazyBlob(this.path, this.start + start, Math.min(this.start + end, this.end));

return slice;
}

/**
* Read the part of the file delimited by the LazyBlob and returns it as an ArrayBuffer.
*/
async arrayBuffer(): Promise<ArrayBuffer> {
const slice = await this.execute((file) => file.read(Buffer.alloc(this.size), 0, this.size, this.start));

return slice.buffer;
}

/**
* Read the part of the file delimited by the LazyBlob and returns it as a string.
*/
async text(): Promise<string> {
const buffer = (await this.arrayBuffer()) as Buffer;

return buffer.toString("utf8");
}

/**
* Returns a stream around the part of the file delimited by the LazyBlob.
*/
stream(): ReturnType<Blob["stream"]> {
return Readable.toWeb(createReadStream(this.path, { start: this.start, end: this.end - 1 })) as ReturnType<
Blob["stream"]
>;
}

/**
* We are opening and closing the file for each action to prevent file descriptor leaks.
*
* It is an intended choice of developer experience over performances.
*/
private async execute<T>(action: (file: FileHandle) => Promise<T>) {
const file = await open(this.path, "r");

const ret = await action(file);

await file.close();

return ret;
}
}
1 change: 1 addition & 0 deletions packages/hub/src/utils/sha256.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export async function sha256(buffer: Blob): Promise<string> {
if (!cryptoModule) {
cryptoModule = await import("./sha256-node");
}

return cryptoModule.sha256Node(buffer);
}

Expand Down