From 39ab99c3d0c819094b7eb39edd22c81322ca4627 Mon Sep 17 00:00:00 2001 From: Alex Gherghisan Date: Fri, 3 May 2024 09:11:42 +0100 Subject: [PATCH] feat: add proving retries (#6145) This PR adds retries to proving requests. This is especially needed when building real proofs as those touch the filesystem (to generate keys, move bytecode, write public inputs etc.) and that could fail for various reasons. --- yarn-project/prover-client/package.json | 2 +- yarn-project/prover-client/package.local.json | 2 +- .../prover-pool/memory-proving-queue.test.ts | 17 +++++++++++++-- .../src/prover-pool/memory-proving-queue.ts | 21 +++++++++++++++++-- 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/yarn-project/prover-client/package.json b/yarn-project/prover-client/package.json index 3bdf4749602..fff69ca9878 100644 --- a/yarn-project/prover-client/package.json +++ b/yarn-project/prover-client/package.json @@ -27,7 +27,7 @@ "formatting": "run -T prettier --check ./src && run -T eslint ./src", "formatting:fix": "run -T eslint --fix ./src && run -T prettier -w ./src", "bb": "node --no-warnings ./dest/bb/index.js", - "test": "LOG_LEVEL=${LOG_LEVEL:-silent} DEBUG_COLORS=1 NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=600000 --forceExit" + "test": "LOG_LEVEL=${LOG_LEVEL:-silent} DEBUG_COLORS=1 NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=750000 --forceExit" }, "jest": { "moduleNameMapper": { diff --git a/yarn-project/prover-client/package.local.json b/yarn-project/prover-client/package.local.json index 5eeed2af06a..04b562bf213 100644 --- a/yarn-project/prover-client/package.local.json +++ b/yarn-project/prover-client/package.local.json @@ -1,5 +1,5 @@ { "scripts": { - "test": "LOG_LEVEL=${LOG_LEVEL:-silent} DEBUG_COLORS=1 NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=600000 --forceExit" + "test": "LOG_LEVEL=${LOG_LEVEL:-silent} DEBUG_COLORS=1 NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=750000 --forceExit" } } diff --git a/yarn-project/prover-client/src/prover-pool/memory-proving-queue.test.ts b/yarn-project/prover-client/src/prover-pool/memory-proving-queue.test.ts index b9520f333a5..dc3d77ab7dc 100644 --- a/yarn-project/prover-client/src/prover-pool/memory-proving-queue.test.ts +++ b/yarn-project/prover-client/src/prover-pool/memory-proving-queue.test.ts @@ -45,14 +45,27 @@ describe('MemoryProvingQueue', () => { await expect(promise).resolves.toEqual(new RootParityInput(proof, vk, publicInputs)); }); - it('notifies of errors', async () => { + it('retries failed jobs', async () => { const inputs = makeBaseParityInputs(); - const promise = queue.getBaseParityProof(inputs); + void queue.getBaseParityProof(inputs); + const job = await queue.getProvingJob(); expect(job?.request.inputs).toEqual(inputs); const error = new Error('test error'); + await queue.rejectProvingJob(job!.id, error); + await expect(queue.getProvingJob()).resolves.toEqual(job); + }); + + it('notifies errors', async () => { + const promise = queue.getBaseParityProof(makeBaseParityInputs()); + + const error = new Error('test error'); + await queue.rejectProvingJob((await queue.getProvingJob())!.id, error); + await queue.rejectProvingJob((await queue.getProvingJob())!.id, error); + await queue.rejectProvingJob((await queue.getProvingJob())!.id, error); + await expect(promise).rejects.toEqual(error); }); }); diff --git a/yarn-project/prover-client/src/prover-pool/memory-proving-queue.ts b/yarn-project/prover-client/src/prover-pool/memory-proving-queue.ts index 88afa9859ed..c3e5194f027 100644 --- a/yarn-project/prover-client/src/prover-pool/memory-proving-queue.ts +++ b/yarn-project/prover-client/src/prover-pool/memory-proving-queue.ts @@ -33,8 +33,11 @@ type ProvingJobWithResolvers = { id: string; request: T; signal?: AbortSignal; + attempts: number; } & PromiseWithResolvers>; +const MAX_RETRIES = 3; + export class MemoryProvingQueue implements CircuitProver, ProvingJobSource { private jobId = 0; private log = createDebugLogger('aztec:prover-client:prover-pool:queue'); @@ -95,7 +98,18 @@ export class MemoryProvingQueue implements CircuitProver, ProvingJobSource { return Promise.resolve(); } - job.reject(err); + if (job.attempts < MAX_RETRIES) { + job.attempts++; + this.log.warn( + `Job id=${job.id} type=${ProvingRequestType[job.request.type]} failed with error: ${err}. Retry ${ + job.attempts + }/${MAX_RETRIES}`, + ); + this.queue.put(job); + } else { + this.log.error(`Job id=${job.id} type=${ProvingRequestType[job.request.type]} failed with error: ${err}`); + job.reject(err); + } return Promise.resolve(); } @@ -111,13 +125,16 @@ export class MemoryProvingQueue implements CircuitProver, ProvingJobSource { promise, resolve, reject, + attempts: 1, }; if (signal) { signal.addEventListener('abort', () => reject(new AbortedError('Operation has been aborted'))); } - this.log.info(`Adding ${ProvingRequestType[request.type]} proving job to queue`); + this.log.info( + `Adding id=${item.id} type=${ProvingRequestType[request.type]} proving job to queue depth=${this.queue.length()}`, + ); // TODO (alexg) remove the `any` if (!this.queue.put(item as any)) { throw new Error();