From 97603f031b30b8a289519ff48f2c2c39b1396656 Mon Sep 17 00:00:00 2001 From: Daniel Walsh Date: Wed, 15 Jan 2025 00:11:10 +0000 Subject: [PATCH] On KV 404, re-insert with 60s TTL to avoid long 404 cache HITs (#7768) --- .changeset/lovely-years-wash.md | 5 +++ .../workers-shared/asset-worker/src/index.ts | 19 +++++++++-- .../asset-worker/src/utils/kv.ts | 34 ++++++++++++++++--- .../asset-worker/tests/kv.test.ts | 29 +++++++++++++++- 4 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 .changeset/lovely-years-wash.md diff --git a/.changeset/lovely-years-wash.md b/.changeset/lovely-years-wash.md new file mode 100644 index 000000000000..72a58e70a6b1 --- /dev/null +++ b/.changeset/lovely-years-wash.md @@ -0,0 +1,5 @@ +--- +"@cloudflare/workers-shared": patch +--- + +fix: on a 404 from KV, we do not want the asset to stay in cache for the normal 1 year TTL. Instead we want to re-insert with a 60s TTL to revalidate and prevent a bad 404 from persisting. diff --git a/packages/workers-shared/asset-worker/src/index.ts b/packages/workers-shared/asset-worker/src/index.ts index 7f1068cc5bd8..e2f3aa16216c 100644 --- a/packages/workers-shared/asset-worker/src/index.ts +++ b/packages/workers-shared/asset-worker/src/index.ts @@ -14,6 +14,7 @@ import type { UnsafePerformanceTimer, } from "../../utils/types"; import type { ColoMetadata, Environment, ReadyAnalytics } from "./types"; +import type { Toucan } from "toucan-js"; export type Env = { /* @@ -100,7 +101,7 @@ export default class extends WorkerEntrypoint { }); } - return await this.env.JAEGER.enterSpan("handleRequest", async (span) => { + return this.env.JAEGER.enterSpan("handleRequest", async (span) => { span.setTags({ hostname: url.hostname, eyeballPath: url.pathname, @@ -114,8 +115,22 @@ export default class extends WorkerEntrypoint { this.unstable_exists.bind(this), this.unstable_getByETag.bind(this) ); - }); + }).catch((err) => + this.handleError(sentry, analytics, performance, startTimeMs, err) + ); } catch (err) { + return this.handleError(sentry, analytics, performance, startTimeMs, err); + } + } + + handleError( + sentry: Toucan | undefined, + analytics: Analytics, + performance: PerformanceTimer, + startTimeMs: number, + err: unknown + ) { + try { const response = new InternalServerErrorResponse(err as Error); // Log to Sentry if we can diff --git a/packages/workers-shared/asset-worker/src/utils/kv.ts b/packages/workers-shared/asset-worker/src/utils/kv.ts index 31142606b1a8..ef8eec3ce5fe 100644 --- a/packages/workers-shared/asset-worker/src/utils/kv.ts +++ b/packages/workers-shared/asset-worker/src/utils/kv.ts @@ -1,3 +1,5 @@ +import type { Toucan } from "toucan-js"; + export type AssetMetadata = { contentType: string; }; @@ -5,16 +7,40 @@ export type AssetMetadata = { export async function getAssetWithMetadataFromKV( assetsKVNamespace: KVNamespace, assetKey: string, + sentry?: Toucan, retries = 1 ) { let attempts = 0; while (attempts <= retries) { try { - return await assetsKVNamespace.getWithMetadata(assetKey, { - type: "stream", - cacheTtl: 31536000, // 1 year - }); + const asset = await assetsKVNamespace.getWithMetadata( + assetKey, + { + type: "stream", + cacheTtl: 31536000, // 1 year + } + ); + + if (asset.value === null) { + // Don't cache a 404 for a year by re-requesting with a minimum cacheTtl + const retriedAsset = + await assetsKVNamespace.getWithMetadata(assetKey, { + type: "stream", + cacheTtl: 60, // Minimum value allowed + }); + + if (retriedAsset.value !== null && sentry) { + sentry.captureException( + new Error( + `Initial request for asset ${assetKey} failed, but subsequent request succeeded.` + ) + ); + } + + return retriedAsset; + } + return asset; } catch (err) { if (attempts >= retries) { throw new Error( diff --git a/packages/workers-shared/asset-worker/tests/kv.test.ts b/packages/workers-shared/asset-worker/tests/kv.test.ts index 1435b1c390c0..ac9151dcfc85 100644 --- a/packages/workers-shared/asset-worker/tests/kv.test.ts +++ b/packages/workers-shared/asset-worker/tests/kv.test.ts @@ -65,11 +65,38 @@ describe("[Asset Worker] Fetching assets from KV", () => { spy.mockReturnValue(Promise.reject("Oeps! Something went wrong")); await expect(() => - getAssetWithMetadataFromKV(mockKVNamespace, "abcd", 2) + getAssetWithMetadataFromKV(mockKVNamespace, "abcd", undefined, 2) ).rejects.toThrowError( "Requested asset abcd could not be fetched from KV namespace." ); expect(spy).toHaveBeenCalledTimes(3); }); + + it("should retry on 404 and cache with 30s ttl", async () => { + let attempts = 0; + spy.mockImplementation(() => { + if (attempts++ === 0) { + return Promise.resolve({ + value: null, + }) as unknown as Promise< + KVNamespaceGetWithMetadataResult + >; + } else { + return Promise.resolve({ + value: "Hello world", + metadata: { + contentType: "text/html", + }, + }) as unknown as Promise< + KVNamespaceGetWithMetadataResult + >; + } + }); + + const asset = await getAssetWithMetadataFromKV(mockKVNamespace, "abcd"); + expect(asset?.value).toBeTruthy(); + // Once for the initial call, once for the 404 + expect(spy).toHaveBeenCalledTimes(2); + }); }); });