From f634660945e185cee1fad20b6070a96d043bb248 Mon Sep 17 00:00:00 2001 From: Joel Zhu <66701969+tianzhu007@users.noreply.github.com> Date: Tue, 30 May 2023 20:56:18 -0700 Subject: [PATCH] Add the Fix for MongoPoolClearedError (#15764) Recently, we had a MongoPoolClearedError. That means the Mongo pool would be destroyed and cleared whenever the scribe crashes off. ![Screenshot 2023-05-30 at 8 18 04 PM](https://github.com/microsoft/FluidFramework/assets/66701969/36169b1e-0722-4610-ba81-e91e0c1f9ea0) To prevent this issue, we runWithRetry with 6 times for 30 seconds intervals to read the document from the document repository. Therefore, once the scribe pod is resumed, we can connect to the document collection again. --- .../packages/lambdas/src/scribe/lambdaFactory.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/server/routerlicious/packages/lambdas/src/scribe/lambdaFactory.ts b/server/routerlicious/packages/lambdas/src/scribe/lambdaFactory.ts index 772918a91cc5..f3385b6e9e55 100644 --- a/server/routerlicious/packages/lambdas/src/scribe/lambdaFactory.ts +++ b/server/routerlicious/packages/lambdas/src/scribe/lambdaFactory.ts @@ -25,6 +25,7 @@ import { ITenantManager, LambdaName, MongoManager, + runWithRetry, } from "@fluidframework/server-services-core"; import { IDocumentSystemMessage, @@ -112,7 +113,15 @@ export class ScribeLambdaFactory const lumberProperties = getLumberBaseProperties(documentId, tenantId); try { - document = await this.documentRepository.readOne({ documentId, tenantId }); + document = (await runWithRetry( + async () => this.documentRepository.readOne({ documentId, tenantId }), + "readIDocumentInScribeLambdaFactory", + 3 /* maxRetries */, + 1000 /* retryAfterMs */, + lumberProperties, + undefined /* shouldIgnoreError */, + (error) => true /* shouldRetry */, + )) as IDocument; if (!isDocumentValid(document)) { // Document sessions can be joined (via Alfred) after a document is functionally deleted.