Skip to content

Commit

Permalink
fix: ensure unused KV and Cache blobs cleaned up (#4466)
Browse files Browse the repository at this point in the history
cloudflare/miniflare#656 introduced a bug in the SQL statement used
for getting the old blob ID of entries to delete when overriding keys.
This meant if a key was overridden, the old blob pointed to by that
key was not deleted. This lead to an accumulation of garbage when
`kvPersist` and `cachePersist` were enabled.
  • Loading branch information
mrbbot authored Dec 4, 2023
1 parent 6c5bc70 commit 71fb0b8
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 2 deletions.
9 changes: 9 additions & 0 deletions .changeset/slow-pants-fetch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
"miniflare": patch
---

fix: ensure unused KV and Cache blobs cleaned up

When storing data in KV, Cache and R2, Miniflare uses both an SQL database and separate blob store. When writing a key/value pair, a blob is created for the new value and the old blob for the previous value (if any) is deleted. A few months ago, we introduced a change that prevented old blobs being deleted for KV and Cache. R2 was unaffected. This shouldn't have caused any problems, but could lead to persistence directories growing unnecessarily as they filled up with garbage blobs. This change ensures garbage blobs are deleted.

Note existing garbage will not be cleaned up. If you'd like to do this, download this Node script (https://gist.github.com/mrbbot/68787e19dcde511bd99aa94997b39076). If you're using the default Wrangler persistence directory, run `node gc.mjs kv .wrangler/state/v3/kv <namespace_id_1> <namespace_id_2> ...` and `node gc.mjs cache .wrangler/state/v3/cache default named:<cache_name_1> named:<cache_name_2> ...` with each of your KV namespace IDs (not binding names) and named caches.
2 changes: 1 addition & 1 deletion packages/miniflare/src/workers/shared/keyvalue.worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ CREATE INDEX IF NOT EXISTS _mf_entries_expiration_idx ON _mf_entries(expiration)
`;
function sqlStmts(db: TypedSql) {
const stmtGetBlobIdByKey = db.stmt<Pick<Row, "key">, Pick<Row, "blob_id">>(
"SELECT blob_id FROM _mf_entries WHERE :key"
"SELECT blob_id FROM _mf_entries WHERE key = :key"
);
const stmtPut = db.stmt<Row>(
`INSERT OR REPLACE INTO _mf_entries (key, blob_id, expiration, metadata)
Expand Down
37 changes: 37 additions & 0 deletions packages/miniflare/test/plugins/cache/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ async function getControlStub(
return stub;
}

function sqlStmts(object: MiniflareDurableObjectControlStub) {
return {
getBlobIdByKey: async (key: string): Promise<string | undefined> => {
const rows = await object.sqlQuery<{ blob_id: string }>(
"SELECT blob_id FROM _mf_entries WHERE key = ?",
key
);
return rows[0]?.blob_id;
},
};
}

test.beforeEach(async (t) => {
t.context.caches = await t.context.mf.getCaches();

Expand Down Expand Up @@ -230,6 +242,31 @@ test("match respects Range header", async (t) => {
assert(res !== undefined);
t.is(res.status, 416);
});
test("put overrides existing responses", async (t) => {
const cache = t.context.caches.default;
const defaultObject = t.context.defaultObject;
const stmts = sqlStmts(defaultObject);

const resToCache = (body: string) =>
new Response(body, { headers: { "Cache-Control": "max-age=3600" } });

const key = "http://localhost/cache-override";
await cache.put(key, resToCache("body1"));
const blobId = await stmts.getBlobIdByKey(key);
assert(blobId !== undefined);
await cache.put(key, resToCache("body2"));
const res = await cache.match(key);
t.is(await res?.text(), "body2");

// Check deletes old blob
await defaultObject.waitForFakeTasks();
t.is(await defaultObject.getBlob(blobId), null);

// Check created new blob
const newBlobId = await stmts.getBlobIdByKey(key);
assert(newBlobId !== undefined);
t.not(blobId, newBlobId);
});

// Note this macro must be used with `test.serial` to avoid races.
const expireMacro = test.macro({
Expand Down
26 changes: 25 additions & 1 deletion packages/miniflare/test/plugins/kv/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ export const TIME_NOW = 1000;
// Expiration value to signal a key that will expire in the future
export const TIME_FUTURE = 1500;

function sqlStmts(object: MiniflareDurableObjectControlStub) {
return {
getBlobIdByKey: async (key: string): Promise<string | undefined> => {
const rows = await object.sqlQuery<{ blob_id: string }>(
"SELECT blob_id FROM _mf_entries WHERE key = ?",
key
);
return rows[0]?.blob_id;
},
};
}

interface Context extends MiniflareTestContext {
ns: string;
kv: Namespaced<ReplaceWorkersTypes<KVNamespace>>; // :D
Expand Down Expand Up @@ -160,15 +172,27 @@ test("put: puts empty value", async (t) => {
t.is(value, "");
});
test("put: overrides existing keys", async (t) => {
const { kv } = t.context;
const { kv, ns, object } = t.context;
const stmts = sqlStmts(object);
await kv.put("key", "value1");
const blobId = await stmts.getBlobIdByKey(`${ns}key`);
assert(blobId !== undefined);
await kv.put("key", "value2", {
expiration: TIME_FUTURE,
metadata: { testing: true },
});
const result = await kv.getWithMetadata("key");
t.is(result.value, "value2");
t.deepEqual(result.metadata, { testing: true });

// Check deletes old blob
await object.waitForFakeTasks();
t.is(await object.getBlob(blobId), null);

// Check created new blob
const newBlobId = await stmts.getBlobIdByKey(`${ns}key`);
assert(newBlobId !== undefined);
t.not(blobId, newBlobId);
});
test("put: keys are case-sensitive", async (t) => {
const { kv } = t.context;
Expand Down

0 comments on commit 71fb0b8

Please sign in to comment.