From 52278fb2677537c30b04b4b58f31ac2d53cd9230 Mon Sep 17 00:00:00 2001 From: Aschen Date: Fri, 17 Mar 2023 21:41:20 +0100 Subject: [PATCH 01/14] Add LazyBlob helper class to lazy load file from filesystem --- packages/hub/src/lib/LazyBlob.ts | 84 +++++++++++++++++++++++++++++ packages/hub/src/lib/commit.spec.ts | 13 +++++ packages/hub/src/lib/commit.ts | 15 +++--- packages/hub/src/utils/sha256.ts | 11 ++-- 4 files changed, 114 insertions(+), 9 deletions(-) create mode 100644 packages/hub/src/lib/LazyBlob.ts diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts new file mode 100644 index 000000000..821bf08d3 --- /dev/null +++ b/packages/hub/src/lib/LazyBlob.ts @@ -0,0 +1,84 @@ +import { createReadStream } from "fs"; +import { open, FileHandle } from "fs/promises"; +import { Readable } from "stream"; + +export class LazyBlob { + private path: string; + private file: FileHandle | null; + private totalSize: number; + + constructor(path: string) { + this.path = path; + + this.file = null; + this.totalSize = 0; + } + + get size(): any { + return this.totalSize; + } + + get lentgh() { + return this.size; + } + + get type() { + return ""; + } + + async init() { + this.file = await open(this.path, "r"); + + const { size } = await this.file.stat(); + this.totalSize = size; + } + + async dispose() { + if (this.file === null) { + return; + } + + await this.file.close(); + } + + async slice(start: number, end: number): Promise { + if (this.file === null) { + throw new Error("LazyBlob has not been initialized"); + } + + const size = end - start; + const slice = await this.file.read(Buffer.alloc(size), 0, size, start); + + return new Blob([slice.buffer]); + } + + async blob(): Promise { + if (this.file === null) { + throw new Error("LazyBlob has not been initialized"); + } + + return this.slice(0, this.size); + } + + async arrayBuffer(): Promise { + if (this.file === null) { + throw new Error("LazyBlob has not been initialized"); + } + + const slice = await this.file.read(Buffer.alloc(this.size), 0, this.size, 0); + + return slice.buffer; + } + + async text(): Promise { + const buffer = (await this.arrayBuffer()) as Buffer; + + return buffer.toString("utf8"); + } + + stream(): ReadableStream { + const stream = createReadStream(this.path); + + return stream as unknown as ReadableStream; + } +} diff --git a/packages/hub/src/lib/commit.spec.ts b/packages/hub/src/lib/commit.spec.ts index 696c6bae8..9030f9444 100644 --- a/packages/hub/src/lib/commit.spec.ts +++ b/packages/hub/src/lib/commit.spec.ts @@ -7,6 +7,7 @@ import { commit } from "./commit"; import { createRepo } from "./create-repo"; import { deleteRepo } from "./delete-repo"; import { downloadFile } from "./download-file"; +import { LazyBlob } from "./LazyBlob"; const lfsContent = "O123456789".repeat(100_000); @@ -29,6 +30,9 @@ describe("commit", () => { const readme1 = await downloadFile({ repo, path: "README.md" }); assert.strictEqual(readme1?.status, 200); + const lazyBlob = new LazyBlob("./package.json"); + await lazyBlob.init(); + try { await commit({ repo, @@ -47,6 +51,11 @@ describe("commit", () => { content: new Blob([lfsContent]), path: "test.lfs.txt", }, + { + operation: "addOrUpdate", + content: lazyBlob, + path: "package.json", + }, { operation: "delete", path: "README.md", @@ -62,6 +71,10 @@ describe("commit", () => { assert.strictEqual(lfsFileContent?.status, 200); assert.strictEqual(await lfsFileContent?.text(), lfsContent); + const packageJsonContent = await downloadFile({ repo, path: "package.json" }); + assert.strictEqual(packageJsonContent?.status, 200); + assert.strictEqual(await packageJsonContent?.text(), await lazyBlob.text()); + const lfsFilePointer = await fetch(`${HUB_URL}/${repoName}/raw/main/test.lfs.txt`); assert.strictEqual(lfsFilePointer.status, 200); assert.strictEqual( diff --git a/packages/hub/src/lib/commit.ts b/packages/hub/src/lib/commit.ts index 5645bf064..6c0622cbd 100644 --- a/packages/hub/src/lib/commit.ts +++ b/packages/hub/src/lib/commit.ts @@ -17,6 +17,7 @@ import { chunk } from "../utils/chunk"; import { promisesQueue } from "../utils/promisesQueue"; import { promisesQueueStreaming } from "../utils/promisesQueueStreaming"; import { sha256 } from "../utils/sha256"; +import { LazyBlob } from "./LazyBlob"; const CONCURRENT_SHAS = 5; const CONCURRENT_LFS_UPLOADS = 5; @@ -27,7 +28,7 @@ export interface CommitDeletedEntry { path: string; } -type ContentSource = Blob; // Todo: offer a smart Blob wrapper around (filePath + size) for Node.js +type ContentSource = Blob | LazyBlob; export interface CommitFile { operation: "addOrUpdate"; @@ -100,7 +101,7 @@ async function* commitIter(params: CommitParams): AsyncGenerator ({ path: operation.path, size: operation.content.size, - sample: base64FromBytes(new Uint8Array(await operation.content.slice(0, 512).arrayBuffer())), + sample: base64FromBytes(new Uint8Array(await (await operation.content.slice(0, 512)).arrayBuffer())), })) ), }; @@ -215,7 +216,7 @@ async function* commitIter(params: CommitParams): AsyncGenerator /^[0-9]+$/.test(key)); - if (parts.length !== Math.ceil(content.length / chunkSize)) { + if (parts.length !== Math.ceil(content.size / chunkSize)) { throw new Error("Invalid server response to upload large LFS file, wrong number of parts"); } @@ -232,7 +233,7 @@ async function* commitIter(params: CommitParams): AsyncGenerator { +export async function sha256(buffer: Blob | LazyBlob): Promise { if (buffer.size < 10_000_000 && globalThis.crypto?.subtle) { return hexFromBytes( new Uint8Array( - await globalThis.crypto.subtle.digest("SHA-256", buffer instanceof Blob ? await buffer.arrayBuffer() : buffer) + await globalThis.crypto.subtle.digest( + "SHA-256", + buffer instanceof Blob || buffer instanceof LazyBlob ? await buffer.arrayBuffer() : buffer + ) ) ); } @@ -44,7 +48,8 @@ export async function sha256(buffer: Blob): Promise { if (!cryptoModule) { cryptoModule = await import("./sha256-node"); } - return cryptoModule.sha256Node(buffer); + const clearText = buffer instanceof Blob || buffer instanceof LazyBlob ? await buffer.arrayBuffer() : buffer; + return cryptoModule.sha256Node(clearText); } // eslint-disable-next-line @typescript-eslint/consistent-type-imports From debbb9ddbb87d6079dd8fbfe506239de59424ced Mon Sep 17 00:00:00 2001 From: Aschen Date: Fri, 17 Mar 2023 21:45:39 +0100 Subject: [PATCH 02/14] dispose lazyblob in tests --- packages/hub/src/lib/commit.spec.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/hub/src/lib/commit.spec.ts b/packages/hub/src/lib/commit.spec.ts index 9030f9444..a4c09054a 100644 --- a/packages/hub/src/lib/commit.spec.ts +++ b/packages/hub/src/lib/commit.spec.ts @@ -89,6 +89,7 @@ size ${lfsContent.length} const readme2 = await downloadFile({ repo, path: "README.md" }); assert.strictEqual(readme2, null); } finally { + await lazyBlob.dispose(); await deleteRepo({ repo: { name: repoName, From 2d50d1e3bfd0ab3037fdee64e095f8d20a9c9e13 Mon Sep 17 00:00:00 2001 From: Aschen Date: Fri, 17 Mar 2023 21:49:55 +0100 Subject: [PATCH 03/14] fix lint --- packages/hub/src/lib/LazyBlob.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index 821bf08d3..7ba845c33 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -1,6 +1,6 @@ import { createReadStream } from "fs"; -import { open, FileHandle } from "fs/promises"; -import { Readable } from "stream"; +import { open } from "fs/promises"; +import type { FileHandle } from "fs/promises"; export class LazyBlob { private path: string; @@ -14,26 +14,26 @@ export class LazyBlob { this.totalSize = 0; } - get size(): any { + get size(): number { return this.totalSize; } - get lentgh() { + get lentgh(): number { return this.size; } - get type() { + get type(): string { return ""; } - async init() { + async init(): Promise { this.file = await open(this.path, "r"); const { size } = await this.file.stat(); this.totalSize = size; } - async dispose() { + async dispose(): Promise { if (this.file === null) { return; } From 7e7945a56fd5b94c6715be72192b624a24e97487 Mon Sep 17 00:00:00 2001 From: Adrien Maret Date: Fri, 17 Mar 2023 22:09:42 +0100 Subject: [PATCH 04/14] Update packages/hub/src/lib/LazyBlob.ts Co-authored-by: Eliott C. --- packages/hub/src/lib/LazyBlob.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index 7ba845c33..c950045dc 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -77,7 +77,7 @@ export class LazyBlob { } stream(): ReadableStream { - const stream = createReadStream(this.path); + return Readable.toWeb(createReadStream(this.path)); return stream as unknown as ReadableStream; } From 0210f84691357f85d2f050d678c23e6ee2e50549 Mon Sep 17 00:00:00 2001 From: Adrien Maret Date: Fri, 17 Mar 2023 22:09:50 +0100 Subject: [PATCH 05/14] Update packages/hub/src/lib/LazyBlob.ts Co-authored-by: Eliott C. --- packages/hub/src/lib/LazyBlob.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index c950045dc..9a96d4a0c 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -1,6 +1,6 @@ -import { createReadStream } from "fs"; -import { open } from "fs/promises"; -import type { FileHandle } from "fs/promises"; +import { createReadStream } from "node:fs"; +import { open } from "node:fs/promises"; +import type { FileHandle } from "node:fs/promises"; export class LazyBlob { private path: string; From 13627aeab62d0a40398a6fe2893111d369d755ee Mon Sep 17 00:00:00 2001 From: Aschen Date: Fri, 17 Mar 2023 22:25:10 +0100 Subject: [PATCH 06/14] LazyBlob.create async method --- packages/hub/src/lib/LazyBlob.ts | 30 ++++++++++++++++++----------- packages/hub/src/lib/commit.spec.ts | 3 +-- packages/hub/test.js | 14 ++++++++++++++ 3 files changed, 34 insertions(+), 13 deletions(-) create mode 100644 packages/hub/test.js diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index 9a96d4a0c..71fb0c030 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -1,13 +1,22 @@ import { createReadStream } from "node:fs"; import { open } from "node:fs/promises"; import type { FileHandle } from "node:fs/promises"; +import { Readable } from "node:stream"; export class LazyBlob { + static async create(path: string) { + const lazyBlob = new LazyBlob(path); + + await lazyBlob.init(); + + return lazyBlob; + } + private path: string; private file: FileHandle | null; private totalSize: number; - constructor(path: string) { + private constructor(path: string) { this.path = path; this.file = null; @@ -26,13 +35,6 @@ export class LazyBlob { return ""; } - async init(): Promise { - this.file = await open(this.path, "r"); - - const { size } = await this.file.stat(); - this.totalSize = size; - } - async dispose(): Promise { if (this.file === null) { return; @@ -41,12 +43,13 @@ export class LazyBlob { await this.file.close(); } - async slice(start: number, end: number): Promise { + async slice(start = 0, end = this.size): Promise { if (this.file === null) { throw new Error("LazyBlob has not been initialized"); } - const size = end - start; + const size = Math.abs(end - start) > this.size ? this.size : Math.abs(end - start); + const slice = await this.file.read(Buffer.alloc(size), 0, size, start); return new Blob([slice.buffer]); @@ -78,7 +81,12 @@ export class LazyBlob { stream(): ReadableStream { return Readable.toWeb(createReadStream(this.path)); + } - return stream as unknown as ReadableStream; + private async init(): Promise { + this.file = await open(this.path, "r"); + + const { size } = await this.file.stat(); + this.totalSize = size; } } diff --git a/packages/hub/src/lib/commit.spec.ts b/packages/hub/src/lib/commit.spec.ts index a4c09054a..47723ade8 100644 --- a/packages/hub/src/lib/commit.spec.ts +++ b/packages/hub/src/lib/commit.spec.ts @@ -30,8 +30,7 @@ describe("commit", () => { const readme1 = await downloadFile({ repo, path: "README.md" }); assert.strictEqual(readme1?.status, 200); - const lazyBlob = new LazyBlob("./package.json"); - await lazyBlob.init(); + const lazyBlob = await LazyBlob.create("./package.json"); try { await commit({ diff --git a/packages/hub/test.js b/packages/hub/test.js new file mode 100644 index 000000000..cd09efedf --- /dev/null +++ b/packages/hub/test.js @@ -0,0 +1,14 @@ +if (!process.env.HF_ACCESS_TOKEN) { + const originalFetch = global.fetch; + + global.fetch = (...args) => { + console.log(args); + return originalFetch(...args); + }; +} + +async function run() { + await fetch("https://aschen.tech"); +} + +run(); From 16c0fedd7f9a4b47836aa2931f7705427278f870 Mon Sep 17 00:00:00 2001 From: Aschen Date: Fri, 17 Mar 2023 23:06:11 +0100 Subject: [PATCH 07/14] Blob compatible API --- packages/hub/src/lib/LazyBlob.ts | 74 ++++++++++++++--------------- packages/hub/src/lib/commit.spec.ts | 1 - packages/hub/src/lib/commit.ts | 2 +- packages/hub/src/utils/sha256.ts | 11 ++--- 4 files changed, 42 insertions(+), 46 deletions(-) diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index 71fb0c030..96b85523f 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -3,9 +3,9 @@ import { open } from "node:fs/promises"; import type { FileHandle } from "node:fs/promises"; import { Readable } from "node:stream"; -export class LazyBlob { - static async create(path: string) { - const lazyBlob = new LazyBlob(path); +export class LazyBlob extends Blob { + static async create(path: string, start?: number, end?: number): Promise { + const lazyBlob = new LazyBlob(path, start, end); await lazyBlob.init(); @@ -13,62 +13,53 @@ export class LazyBlob { } private path: string; - private file: FileHandle | null; + private start: number | null; + private end: number | null; private totalSize: number; - private constructor(path: string) { + private constructor(path: string, start?: number, end?: number) { + super(); + this.path = path; + this.start = start || null; + this.end = end || null; - this.file = null; this.totalSize = 0; } get size(): number { - return this.totalSize; - } + if (this.start !== null) { + if (this.end !== null) { + return Math.abs(this.end - this.start); + } - get lentgh(): number { - return this.size; + return this.totalSize - this.start; + } + + return this.totalSize; } get type(): string { return ""; } - async dispose(): Promise { - if (this.file === null) { - return; - } + slice(start = 0, end = this.size): LazyBlob { + const slice = new LazyBlob(this.path, start, end); - await this.file.close(); - } - - async slice(start = 0, end = this.size): Promise { - if (this.file === null) { - throw new Error("LazyBlob has not been initialized"); - } - - const size = Math.abs(end - start) > this.size ? this.size : Math.abs(end - start); - - const slice = await this.file.read(Buffer.alloc(size), 0, size, start); - - return new Blob([slice.buffer]); + return slice; } async blob(): Promise { - if (this.file === null) { - throw new Error("LazyBlob has not been initialized"); - } - return this.slice(0, this.size); } async arrayBuffer(): Promise { - if (this.file === null) { - throw new Error("LazyBlob has not been initialized"); - } + const start = this.start || 0; + const end = this.end || this.size; + const size = Math.abs(end - start); + const cappedSize = size > this.size ? this.size : size; - const slice = await this.file.read(Buffer.alloc(this.size), 0, this.size, 0); + const slice = await this.execute((file) => file.read(Buffer.alloc(cappedSize), 0, cappedSize, 0)); return slice.buffer; } @@ -84,9 +75,18 @@ export class LazyBlob { } private async init(): Promise { - this.file = await open(this.path, "r"); + const { size } = await this.execute((file) => file.stat()); - const { size } = await this.file.stat(); this.totalSize = size; } + + private async execute(action: (file: FileHandle) => Promise) { + const file = await open(this.path, "r"); + + const ret = await action(file); + + await file.close(); + + return ret; + } } diff --git a/packages/hub/src/lib/commit.spec.ts b/packages/hub/src/lib/commit.spec.ts index 47723ade8..c5eb4a951 100644 --- a/packages/hub/src/lib/commit.spec.ts +++ b/packages/hub/src/lib/commit.spec.ts @@ -88,7 +88,6 @@ size ${lfsContent.length} const readme2 = await downloadFile({ repo, path: "README.md" }); assert.strictEqual(readme2, null); } finally { - await lazyBlob.dispose(); await deleteRepo({ repo: { name: repoName, diff --git a/packages/hub/src/lib/commit.ts b/packages/hub/src/lib/commit.ts index 6c0622cbd..387c132d9 100644 --- a/packages/hub/src/lib/commit.ts +++ b/packages/hub/src/lib/commit.ts @@ -28,7 +28,7 @@ export interface CommitDeletedEntry { path: string; } -type ContentSource = Blob | LazyBlob; +type ContentSource = Blob; export interface CommitFile { operation: "addOrUpdate"; diff --git a/packages/hub/src/utils/sha256.ts b/packages/hub/src/utils/sha256.ts index 48846d95f..3c36e11e6 100644 --- a/packages/hub/src/utils/sha256.ts +++ b/packages/hub/src/utils/sha256.ts @@ -1,4 +1,4 @@ -import { LazyBlob } from "../lib/LazyBlob"; +import type { LazyBlob } from "../lib/LazyBlob"; import { hexFromBytes } from "./hexFromBytes"; /** @@ -9,10 +9,7 @@ export async function sha256(buffer: Blob | LazyBlob): Promise { if (buffer.size < 10_000_000 && globalThis.crypto?.subtle) { return hexFromBytes( new Uint8Array( - await globalThis.crypto.subtle.digest( - "SHA-256", - buffer instanceof Blob || buffer instanceof LazyBlob ? await buffer.arrayBuffer() : buffer - ) + await globalThis.crypto.subtle.digest("SHA-256", buffer instanceof Blob ? await buffer.arrayBuffer() : buffer) ) ); } @@ -48,8 +45,8 @@ export async function sha256(buffer: Blob | LazyBlob): Promise { if (!cryptoModule) { cryptoModule = await import("./sha256-node"); } - const clearText = buffer instanceof Blob || buffer instanceof LazyBlob ? await buffer.arrayBuffer() : buffer; - return cryptoModule.sha256Node(clearText); + + return cryptoModule.sha256Node(buffer instanceof Blob ? await buffer.arrayBuffer() : buffer); } // eslint-disable-next-line @typescript-eslint/consistent-type-imports From aa3316fc07cb8f95516f21b4e0d6b7f9804fdce9 Mon Sep 17 00:00:00 2001 From: Aschen Date: Sat, 18 Mar 2023 00:42:14 +0100 Subject: [PATCH 08/14] revers files --- packages/hub/src/lib/LazyBlob.ts | 4 ---- packages/hub/src/lib/commit.ts | 9 +++------ packages/hub/src/utils/sha256.ts | 5 ++--- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index 96b85523f..411c154ab 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -49,10 +49,6 @@ export class LazyBlob extends Blob { return slice; } - async blob(): Promise { - return this.slice(0, this.size); - } - async arrayBuffer(): Promise { const start = this.start || 0; const end = this.end || this.size; diff --git a/packages/hub/src/lib/commit.ts b/packages/hub/src/lib/commit.ts index 387c132d9..73728f2a1 100644 --- a/packages/hub/src/lib/commit.ts +++ b/packages/hub/src/lib/commit.ts @@ -17,7 +17,6 @@ import { chunk } from "../utils/chunk"; import { promisesQueue } from "../utils/promisesQueue"; import { promisesQueueStreaming } from "../utils/promisesQueueStreaming"; import { sha256 } from "../utils/sha256"; -import { LazyBlob } from "./LazyBlob"; const CONCURRENT_SHAS = 5; const CONCURRENT_LFS_UPLOADS = 5; @@ -101,7 +100,7 @@ async function* commitIter(params: CommitParams): AsyncGenerator ({ path: operation.path, size: operation.content.size, - sample: base64FromBytes(new Uint8Array(await (await operation.content.slice(0, 512)).arrayBuffer())), + sample: base64FromBytes(new Uint8Array(await operation.content.slice(0, 512).arrayBuffer())), })) ), }; @@ -233,7 +232,7 @@ async function* commitIter(params: CommitParams): AsyncGenerator { +export async function sha256(buffer: Blob): Promise { if (buffer.size < 10_000_000 && globalThis.crypto?.subtle) { return hexFromBytes( new Uint8Array( @@ -46,7 +45,7 @@ export async function sha256(buffer: Blob | LazyBlob): Promise { cryptoModule = await import("./sha256-node"); } - return cryptoModule.sha256Node(buffer instanceof Blob ? await buffer.arrayBuffer() : buffer); + return cryptoModule.sha256Node(buffer); } // eslint-disable-next-line @typescript-eslint/consistent-type-imports From 2ce190c1d398644b7301b4c4dcecff000b2b3959 Mon Sep 17 00:00:00 2001 From: Aschen Date: Sat, 18 Mar 2023 00:47:02 +0100 Subject: [PATCH 09/14] fix reviews and delete oopsi --- packages/hub/src/lib/LazyBlob.ts | 31 +++++++++++++++---------------- packages/hub/test.js | 14 -------------- 2 files changed, 15 insertions(+), 30 deletions(-) delete mode 100644 packages/hub/test.js diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index 411c154ab..1453168ab 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -1,28 +1,28 @@ import { createReadStream } from "node:fs"; -import { open } from "node:fs/promises"; +import { open, stat } from "node:fs/promises"; import type { FileHandle } from "node:fs/promises"; import { Readable } from "node:stream"; export class LazyBlob extends Blob { - static async create(path: string, start?: number, end?: number): Promise { - const lazyBlob = new LazyBlob(path, start, end); + static async create(path: string, start: number, end: number): Promise { + const { size } = await stat(path); - await lazyBlob.init(); + const lazyBlob = new LazyBlob(path, 0, size); return lazyBlob; } private path: string; - private start: number | null; - private end: number | null; + private start: number; + private end: number; private totalSize: number; - private constructor(path: string, start?: number, end?: number) { + private constructor(path: string, start: number, end: number) { super(); this.path = path; - this.start = start || null; - this.end = end || null; + this.start = start; + this.end = end; this.totalSize = 0; } @@ -44,18 +44,17 @@ export class LazyBlob extends Blob { } slice(start = 0, end = this.size): LazyBlob { - const slice = new LazyBlob(this.path, start, end); + if (start < 0 || end < 0) { + new TypeError("Unsupported negative start/end on LazyBlob.slice"); + } + + const slice = new LazyBlob(this.path, this.start + start, Math.min(this.start + end, this.end)); return slice; } async arrayBuffer(): Promise { - const start = this.start || 0; - const end = this.end || this.size; - const size = Math.abs(end - start); - const cappedSize = size > this.size ? this.size : size; - - const slice = await this.execute((file) => file.read(Buffer.alloc(cappedSize), 0, cappedSize, 0)); + const slice = await this.execute((file) => file.read(Buffer.alloc(this.size), 0, this.size, this.start)); return slice.buffer; } diff --git a/packages/hub/test.js b/packages/hub/test.js deleted file mode 100644 index cd09efedf..000000000 --- a/packages/hub/test.js +++ /dev/null @@ -1,14 +0,0 @@ -if (!process.env.HF_ACCESS_TOKEN) { - const originalFetch = global.fetch; - - global.fetch = (...args) => { - console.log(args); - return originalFetch(...args); - }; -} - -async function run() { - await fetch("https://aschen.tech"); -} - -run(); From 820f5d78ef059cd91940398f3565de111a25c7d2 Mon Sep 17 00:00:00 2001 From: Aschen Date: Sat, 18 Mar 2023 00:48:48 +0100 Subject: [PATCH 10/14] Lazyblob.create fix --- packages/hub/src/lib/LazyBlob.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index 1453168ab..18f5b14b5 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -4,7 +4,7 @@ import type { FileHandle } from "node:fs/promises"; import { Readable } from "node:stream"; export class LazyBlob extends Blob { - static async create(path: string, start: number, end: number): Promise { + static async create(path: string): Promise { const { size } = await stat(path); const lazyBlob = new LazyBlob(path, 0, size); From 59f98e3d198948f020d70fcda1e6913bc0e1a951 Mon Sep 17 00:00:00 2001 From: Aschen Date: Sat, 18 Mar 2023 00:55:03 +0100 Subject: [PATCH 11/14] try to fix ts error --- packages/hub/src/lib/LazyBlob.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index 18f5b14b5..a262eea80 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -66,7 +66,7 @@ export class LazyBlob extends Blob { } stream(): ReadableStream { - return Readable.toWeb(createReadStream(this.path)); + return Readable.toWeb(createReadStream(this.path)) as ReadableStream; } private async init(): Promise { From 48d4fec0e5b527b0d06386d68a085aaa6b7a6e9c Mon Sep 17 00:00:00 2001 From: Aschen Date: Sat, 18 Mar 2023 22:43:02 +0100 Subject: [PATCH 12/14] add tests, fix stream end bug and export class --- packages/hub/src/lib/LazyBlob.spec.ts | 45 ++++++++++++++++ packages/hub/src/lib/LazyBlob.ts | 76 +++++++++++++++++++-------- packages/hub/src/lib/index.ts | 1 + 3 files changed, 101 insertions(+), 21 deletions(-) create mode 100644 packages/hub/src/lib/LazyBlob.spec.ts diff --git a/packages/hub/src/lib/LazyBlob.spec.ts b/packages/hub/src/lib/LazyBlob.spec.ts new file mode 100644 index 000000000..c4dae5a39 --- /dev/null +++ b/packages/hub/src/lib/LazyBlob.spec.ts @@ -0,0 +1,45 @@ +import { describe, expect, it } from "vitest"; +import { open, stat } from "node:fs/promises"; +import { LazyBlob } from "./LazyBlob"; +import { TextDecoder } from "node:util"; + +describe("LazyBlob", () => { + it("should create a LazyBlob with a slice on the entire file", async () => { + const file = await open("package.json", "r"); + const { size } = await stat("package.json"); + + const lazyBlob = await LazyBlob.create("package.json"); + + expect(lazyBlob).toMatchObject({ + path: "package.json", + start: 0, + end: size, + }); + expect(lazyBlob.size).toBe(size); + expect(lazyBlob.type).toBe(""); + const text = await lazyBlob.text(); + const expectedText = (await file.read(Buffer.alloc(size), 0, size)).buffer.toString("utf8"); + expect(text).toBe(expectedText); + const result = await lazyBlob.stream().getReader().read(); + expect(new TextDecoder().decode(result.value)).toBe(expectedText); + }); + + it("should create a slice on the file", async () => { + const file = await open("package.json", "r"); + const lazyBlob = await LazyBlob.create("package.json"); + + const slice = lazyBlob.slice(10, 20); + + expect(slice).toMatchObject({ + path: "package.json", + start: 10, + end: 20, + }); + expect(slice.size).toBe(10); + const sliceText = await slice.text(); + const expectedText = (await file.read(Buffer.alloc(10), 0, 10, 10)).buffer.toString("utf8"); + expect(sliceText).toBe(expectedText); + const result = await slice.stream().getReader().read(); + expect(new TextDecoder().decode(result.value)).toBe(expectedText); + }); +}); diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index a262eea80..6b8887d24 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -1,9 +1,27 @@ import { createReadStream } from "node:fs"; import { open, stat } from "node:fs/promises"; -import type { FileHandle } from "node:fs/promises"; import { Readable } from "node:stream"; +import type { FileHandle } from "node:fs/promises"; +/** + * A LazyBlob is a replacement for the Blob class that allows to partially read files + * in order to preserve memory. + * + * It is a drop-in replacement for the Blob class, so you can use it as a Blob. + * + * The main difference is the instantiation, which is done asynchronously using the `LazyBlob.create` method. + * + * @example + * const lazyBlob = await LazyBlob.create("path/to/package.json"); + * + * await fetch("https://aschen.tech", { method: "POST", body: lazyBlob }); + */ export class LazyBlob extends Blob { + /** + * Creates a new LazyBlob on the provided file. + * + * @param path Path to the file to be lazy readed + */ static async create(path: string): Promise { const { size } = await stat(path); @@ -15,7 +33,6 @@ export class LazyBlob extends Blob { private path: string; private start: number; private end: number; - private totalSize: number; private constructor(path: string, start: number, end: number) { super(); @@ -23,26 +40,33 @@ export class LazyBlob extends Blob { this.path = path; this.start = start; this.end = end; - - this.totalSize = 0; } + /** + * Returns the size of the blob. + */ get size(): number { - if (this.start !== null) { - if (this.end !== null) { - return Math.abs(this.end - this.start); - } - - return this.totalSize - this.start; - } - - return this.totalSize; + return this.end - this.start; } + /** + * Returns an empty string. + * (This is a required property of the Blob class) + */ get type(): string { return ""; } + /** + * Returns a new instance of LazyBlob that is a slice of the current one. + * + * The slice is inclusive of the start and exclusive of the end. + * + * The slice method does not supports negative start/end. + * + * @param start beginning of the slice + * @param end end of the slice + */ slice(start = 0, end = this.size): LazyBlob { if (start < 0 || end < 0) { new TypeError("Unsupported negative start/end on LazyBlob.slice"); @@ -53,28 +77,38 @@ export class LazyBlob extends Blob { return slice; } + /** + * Read the part of the file delimited by the LazyBlob and returns it as an ArrayBuffer. + */ async arrayBuffer(): Promise { const slice = await this.execute((file) => file.read(Buffer.alloc(this.size), 0, this.size, this.start)); return slice.buffer; } + /** + * Read the part of the file delimited by the LazyBlob and returns it as a string. + */ async text(): Promise { const buffer = (await this.arrayBuffer()) as Buffer; return buffer.toString("utf8"); } - stream(): ReadableStream { - return Readable.toWeb(createReadStream(this.path)) as ReadableStream; - } - - private async init(): Promise { - const { size } = await this.execute((file) => file.stat()); - - this.totalSize = size; + /** + * Returns a stream around the part of the file delimited by the LazyBlob. + */ + stream(): ReturnType { + return Readable.toWeb(createReadStream(this.path, { start: this.start, end: this.end - 1 })) as ReturnType< + Blob["stream"] + >; } + /** + * We are opening and closing the file for each action to prevent file descriptor leaks. + * + * It is an intended choice of developer experience over performances. + */ private async execute(action: (file: FileHandle) => Promise) { const file = await open(this.path, "r"); diff --git a/packages/hub/src/lib/index.ts b/packages/hub/src/lib/index.ts index 64916a422..6ff33436f 100644 --- a/packages/hub/src/lib/index.ts +++ b/packages/hub/src/lib/index.ts @@ -8,3 +8,4 @@ export * from "./list-models"; export * from "./list-spaces"; export * from "./list-files"; export * from "./who-am-i"; +export * from "./LazyBlob"; From 245bc9e2bb490846f58be436be8a89c5443bf5fa Mon Sep 17 00:00:00 2001 From: Aschen Date: Sat, 18 Mar 2023 23:16:51 +0100 Subject: [PATCH 13/14] nit --- packages/hub/src/lib/LazyBlob.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/lib/LazyBlob.ts index 6b8887d24..0c9118fee 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/lib/LazyBlob.ts @@ -4,7 +4,7 @@ import { Readable } from "node:stream"; import type { FileHandle } from "node:fs/promises"; /** - * A LazyBlob is a replacement for the Blob class that allows to partially read files + * A LazyBlob is a replacement for the Blob class that allows to lazy read files * in order to preserve memory. * * It is a drop-in replacement for the Blob class, so you can use it as a Blob. From 7a15ed25aa08639a279406f989a8712442090d29 Mon Sep 17 00:00:00 2001 From: Aschen Date: Mon, 20 Mar 2023 18:09:40 +0100 Subject: [PATCH 14/14] move LazyBlob in utils and dont export it --- packages/hub/src/lib/commit.spec.ts | 2 +- packages/hub/src/lib/index.ts | 1 - packages/hub/src/{lib => utils}/LazyBlob.spec.ts | 4 ++-- packages/hub/src/{lib => utils}/LazyBlob.ts | 2 ++ 4 files changed, 5 insertions(+), 4 deletions(-) rename packages/hub/src/{lib => utils}/LazyBlob.spec.ts (100%) rename packages/hub/src/{lib => utils}/LazyBlob.ts (99%) diff --git a/packages/hub/src/lib/commit.spec.ts b/packages/hub/src/lib/commit.spec.ts index c5eb4a951..06e21febb 100644 --- a/packages/hub/src/lib/commit.spec.ts +++ b/packages/hub/src/lib/commit.spec.ts @@ -7,7 +7,7 @@ import { commit } from "./commit"; import { createRepo } from "./create-repo"; import { deleteRepo } from "./delete-repo"; import { downloadFile } from "./download-file"; -import { LazyBlob } from "./LazyBlob"; +import { LazyBlob } from "../utils/LazyBlob"; const lfsContent = "O123456789".repeat(100_000); diff --git a/packages/hub/src/lib/index.ts b/packages/hub/src/lib/index.ts index 6ff33436f..64916a422 100644 --- a/packages/hub/src/lib/index.ts +++ b/packages/hub/src/lib/index.ts @@ -8,4 +8,3 @@ export * from "./list-models"; export * from "./list-spaces"; export * from "./list-files"; export * from "./who-am-i"; -export * from "./LazyBlob"; diff --git a/packages/hub/src/lib/LazyBlob.spec.ts b/packages/hub/src/utils/LazyBlob.spec.ts similarity index 100% rename from packages/hub/src/lib/LazyBlob.spec.ts rename to packages/hub/src/utils/LazyBlob.spec.ts index c4dae5a39..c024233ac 100644 --- a/packages/hub/src/lib/LazyBlob.spec.ts +++ b/packages/hub/src/utils/LazyBlob.spec.ts @@ -1,7 +1,7 @@ -import { describe, expect, it } from "vitest"; import { open, stat } from "node:fs/promises"; -import { LazyBlob } from "./LazyBlob"; import { TextDecoder } from "node:util"; +import { describe, expect, it } from "vitest"; +import { LazyBlob } from "./LazyBlob"; describe("LazyBlob", () => { it("should create a LazyBlob with a slice on the entire file", async () => { diff --git a/packages/hub/src/lib/LazyBlob.ts b/packages/hub/src/utils/LazyBlob.ts similarity index 99% rename from packages/hub/src/lib/LazyBlob.ts rename to packages/hub/src/utils/LazyBlob.ts index 0c9118fee..714c0ddbf 100644 --- a/packages/hub/src/lib/LazyBlob.ts +++ b/packages/hub/src/utils/LazyBlob.ts @@ -4,6 +4,8 @@ import { Readable } from "node:stream"; import type { FileHandle } from "node:fs/promises"; /** + * @internal + * * A LazyBlob is a replacement for the Blob class that allows to lazy read files * in order to preserve memory. *