gristlabs · Spoffy · Dec 17, 2024 · Dec 18, 2024 · Dec 30, 2024 · Dec 30, 2024
diff --git a/app/server/lib/ActiveDoc.ts b/app/server/lib/ActiveDoc.ts
@@ -943,6 +943,26 @@ export class ActiveDoc extends EventEmitter {
     return data;
   }
 
+  public async startTransferringAllAttachmentsToDefaultStore() {
+    const attachmentStoreId = (await this._getDocumentSettings()).attachmentStoreId;
+    // If no attachment store is set on the doc, it should transfer everything to internal storage
+    await this._attachmentFileManager.startTransferringAllFilesToOtherStore(attachmentStoreId);
+  }
+
+  /**
+   * Returns a summary of pending attachment transfers between attachment stores.
+   */
+  public attachmentTransferStatus() {
+    return this._attachmentFileManager.transferStatus();
+  }
+
+  /**
+   * Returns a summary of where attachments on this doc are stored.
+   */
+  public async attachmentLocationSummary() {
+    return await this._attachmentFileManager.locationSummary();
+  }
+
   /**
    * Fetches the meta tables to return to the client when first opening a document.
    */

diff --git a/app/server/lib/AttachmentFileManager.ts b/app/server/lib/AttachmentFileManager.ts
diff --git a/app/server/lib/AttachmentStore.ts b/app/server/lib/AttachmentStore.ts
@@ -76,6 +76,9 @@ export interface IAttachmentStore {
   // implementation and gives them control over local buffering.
   download(docPoolId: DocPoolId, fileId: FileId, outputStream: stream.Writable): Promise<void>;
 
+  // Remove attachment from the store
+  delete(docPoolId: DocPoolId, fileId: FileId): Promise<void>;
+
   // Remove attachments for all documents in the given document pool.
   removePool(docPoolId: DocPoolId): Promise<void>;
 
@@ -120,6 +123,10 @@ export class ExternalStorageAttachmentStore implements IAttachmentStore {
     await this._storage.downloadStream(this._getKey(docPoolId, fileId), outputStream);
   }
 
+  public async delete(docPoolId: string, fileId: string): Promise<void> {
+    await this._storage.remove(this._getKey(docPoolId, fileId));
+  }
+
   public async removePool(docPoolId: string): Promise<void> {
     // Null assertion is safe because this should be checked before this class is instantiated.
     await this._storage.removeAllWithPrefix!(this._getPoolPrefix(docPoolId));
@@ -164,6 +171,10 @@ export class FilesystemAttachmentStore implements IAttachmentStore {
     );
   }
 
+  public async delete(docPoolId: string, fileId: string): Promise<void> {
+    await fse.remove(this._createPath(docPoolId, fileId));
+  }
+
   public async removePool(docPoolId: DocPoolId): Promise<void> {
     await fse.remove(this._createPath(docPoolId));
   }

diff --git a/app/server/lib/DocApi.ts b/app/server/lib/DocApi.ts
@@ -548,6 +548,25 @@ export class DocWorkerApi {
         .send(fileData);
     }));
 
+    // Starts transferring all attachments to the named store, if it exists.
+    this._app.post('/api/docs/:docId/attachments/transferAll', isOwner, withDoc(async (activeDoc, req, res) => {
+      await activeDoc.startTransferringAllAttachmentsToDefaultStore();
+      const locationSummary = await activeDoc.attachmentLocationSummary();
+
+      // Respond with the current status to allow for immediate UI updates.
+      res.json({
+        status: activeDoc.attachmentTransferStatus(),
+        locationSummary,
+      });
+    }));
+
+    // Returns the status of any current / pending attachment transfers
+    this._app.get('/api/docs/:docId/attachments/transferStatus', isOwner, withDoc(async (activeDoc, req, res) => {
+      res.json({
+        status: activeDoc.attachmentTransferStatus(),
+      });
+    }));
+
     // Mostly for testing
     this._app.post('/api/docs/:docId/attachments/updateUsed', canEdit, withDoc(async (activeDoc, req, res) => {
       await activeDoc.updateUsedAttachmentsIfNeeded();

diff --git a/app/server/lib/DocStorage.ts b/app/server/lib/DocStorage.ts
@@ -32,7 +32,6 @@ import cloneDeep = require('lodash/cloneDeep');
 import groupBy = require('lodash/groupBy');
 import { MinDBOptions } from './SqliteCommon';
 
-
 // Run with environment variable NODE_DEBUG=db (may include additional comma-separated sections)
 // for verbose logging.
 const debuglog = util.debuglog('db');
@@ -780,44 +779,64 @@ export class DocStorage implements ISQLiteDB, OnDemandStorage {
    *    checksum of the file's contents with the original extension.
    * @param {Buffer | undefined} fileData - Contents of the file.
    * @param {string | undefined} storageId - Identifier of the store that file is stored in.
+   * @param {boolean} shouldUpdate - Update the file record if found.
    * @returns {Promise[Boolean]} True if the file got attached; false if this ident already exists.
    */
   public findOrAttachFile(
     fileIdent: string,
     fileData: Buffer | undefined,
     storageId?: string,
+    shouldUpdate: boolean = false,
   ): Promise<boolean> {
-    return this.execTransaction(db => {
-      // Try to insert a new record with the given ident. It'll fail UNIQUE constraint if exists.
-      return db.run('INSERT INTO _gristsys_Files (ident) VALUES (?)', fileIdent)
-      // Only if this succeeded, do the work of reading the file and inserting its data.
-        .then(() =>
-              db.run('UPDATE _gristsys_Files SET data=?, storageId=? WHERE ident=?', fileData, storageId, fileIdent))
-        .then(() => true)
-      // If UNIQUE constraint failed, this ident must already exists, so return false.
-        .catch(err => {
-          if (/^(SQLITE_CONSTRAINT: )?UNIQUE constraint failed/.test(err.message)) {
-            return false;
-          }
+    return this.execTransaction(async (db) => {
+      let isNewFile = true;
+
+      try {
+        // Try to insert a new record with the given ident. It'll fail UNIQUE constraint if exists.
-        // Try to insert a new record with the given ident. It'll fail UNIQUE constraint if exists.
+        // Try to insert a new record with the given ident. It'll fail UNIQUE constraint if exists.
+        // Even when attempting to attach a new file exclusively (and do nothing when it exists), 
+        // it's a good idea to first check the existence of the fileIdent and if not insert the file and its data
-        // Try to insert a new record with the given ident. It'll fail UNIQUE constraint if exists.
+        // Try to insert a new record with the given ident. It'll fail UNIQUE constraint if exists.
+        // Even when attempting to attach a new file exclusively (and do nothing when it exists), 
+        // it's a good idea to first check the existence of the fileIdent and if not insert the file and its data
+        await db.run('INSERT INTO _gristsys_Files (ident) VALUES (?)', fileIdent);
+      } catch(err) {
+        // If UNIQUE constraint failed, this ident must already exist.
+        if (/^(SQLITE_CONSTRAINT: )?UNIQUE constraint failed/.test(err.message)) {
+          isNewFile = false;
+        } else {
           throw err;
-        });
+        }
+      }
+      if (isNewFile || shouldUpdate) {
+        await db.run('UPDATE _gristsys_Files SET data=?, storageId=? WHERE ident=?', fileData, storageId, fileIdent);
+      }
+
+      return isNewFile;
     });
   }
 
   /**
    * Reads and returns the data for the given attachment.
    * @param {string} fileIdent - The unique identifier of a file, as used by findOrAttachFile.
-   * @returns {Promise[Buffer]} The data buffer associated with fileIdent.
+   * @param {boolean} includeData - Load file contents from the database, in addition to metadata
+   * @returns {Promise[FileInfo | null]} - File information, or null if no record exists for that file identifier.
    */
-  public getFileInfo(fileIdent: string): Promise<FileInfo | null> {
-    return this.get('SELECT ident, storageId, data FROM _gristsys_Files WHERE ident=?', fileIdent)
+  public getFileInfo(fileIdent: string, includeData: boolean = true): Promise<FileInfo | null> {
+    const columns = includeData ? 'ident, storageId, data' : 'ident, storageId';
+    return this.get(`SELECT ${columns} FROM _gristsys_Files WHERE ident=?`, fileIdent)
       .then(row => row ? ({
         ident: row.ident as string,
         storageId: (row.storageId ?? null) as (string | null),
-        data: row.data as Buffer,
+        // Use a zero buffer for now if it doesn't exist. Should be refactored to allow null.
+        data: row.data ? row.data as Buffer : Buffer.alloc(0),
       }) : null);
   }
 
+  public async listAllFiles(): Promise<FileInfo[]> {
+    const rows = await this.all(`SELECT ident, storageId FROM _gristsys_Files`);
+
+    return rows.map(row => ({
+      ident: row.ident as string,
+      storageId: (row.storageId ?? null) as (string | null),
+      // Use a zero buffer for now to represent no data. Should be refactored to allow null.
+      data: Buffer.alloc(0),
+    }));
+  }
 
   /**
    * Fetches the given table from the database. See fetchQuery() for return value.

diff --git a/test/server/lib/AttachmentFileManager.ts b/test/server/lib/AttachmentFileManager.ts
@@ -1,16 +1,19 @@
 import { DocStorage, FileInfo } from "app/server/lib/DocStorage";
 import {
-  AttachmentFileManager, AttachmentRetrievalError,
+  AttachmentFileManager,
+  AttachmentRetrievalError,
   StoreNotAvailableError,
   StoresNotConfiguredError
 } from "app/server/lib/AttachmentFileManager";
+import { getDocPoolIdFromDocInfo } from "app/server/lib/AttachmentStore";
 import { AttachmentStoreProvider, IAttachmentStoreProvider } from "app/server/lib/AttachmentStoreProvider";
 import { makeTestingFilesystemStoreSpec } from "./FilesystemAttachmentStore";
 import { assert } from "chai";
 import * as sinon from "sinon";
+import * as stream from "node:stream";
 
 // Minimum features of doc storage that are needed to make AttachmentFileManager work.
-type IMinimalDocStorage = Pick<DocStorage, 'docName' | 'getFileInfo' | 'findOrAttachFile'>
+type IMinimalDocStorage = Pick<DocStorage, 'docName' | 'getFileInfo' | 'findOrAttachFile' | 'listAllFiles'>
 
 // Implements the minimal functionality needed for the AttachmentFileManager to work.
 class DocStorageFake implements IMinimalDocStorage {
@@ -19,15 +22,24 @@ class DocStorageFake implements IMinimalDocStorage {
   constructor(public docName: string) {
   }
 
+  public async listAllFiles(): Promise<FileInfo[]> {
+    const fileInfoPromises = Object.keys(this._files).map(key => this.getFileInfo(key));
+    const fileInfo = await Promise.all(fileInfoPromises);
+
+    const isFileInfo = (item: FileInfo | null): item is FileInfo => item !== null;
+
+    return fileInfo.filter(isFileInfo);
+  }
+
   public async getFileInfo(fileIdent: string): Promise<FileInfo | null> {
     return this._files[fileIdent] ?? null;
   }
 
-  // Return value is true if the file was newly added.
+  // Needs to match the semantics of DocStorage's implementation.
   public async findOrAttachFile(
-    fileIdent: string, fileData: Buffer | undefined, storageId?: string | undefined
+    fileIdent: string, fileData: Buffer | undefined, storageId?: string | undefined, shouldUpdate: boolean = true
   ): Promise<boolean> {
-    if (fileIdent in this._files) {
+    if (fileIdent in this._files && !shouldUpdate) {
       return false;
     }
     this._files[fileIdent] = {
@@ -48,7 +60,10 @@ function createDocStorageFake(docName: string): DocStorage {
 
 async function createFakeAttachmentStoreProvider(): Promise<IAttachmentStoreProvider> {
   return new AttachmentStoreProvider(
-    [await makeTestingFilesystemStoreSpec("filesystem")],
+    [
+      await makeTestingFilesystemStoreSpec("filesystem"),
+      await makeTestingFilesystemStoreSpec("filesystem-alt"),
+    ],
     "TEST-INSTALLATION-UUID"
   );
 }
@@ -126,6 +141,28 @@ describe("AttachmentFileManager", function() {
     assert.isTrue(await store!.exists(docId, result.fileIdent), "file does not exist in store");
   });
 
+  it("shouldn't do anything when trying to add an existing attachment to a new store", async function() {
+    const docId = "12345";
+    const manager = new AttachmentFileManager(
+      defaultDocStorageFake,
+      defaultProvider,
+      { id: docId, trunkId: null  },
+    );
+
+    const allStoreIds = defaultProvider.listAllStoreIds();
+    const result1 = await manager.addFile(allStoreIds[0], ".txt", Buffer.from(defaultTestFileContent));
+    const store1 = await defaultProvider.getStore(allStoreIds[0]);
+    assert.isTrue(await store1!.exists(docId, result1.fileIdent), "file does not exist in store");
+
+    const result2 = await manager.addFile(allStoreIds[1], ".txt", Buffer.from(defaultTestFileContent));
+    const store2 = await defaultProvider.getStore(allStoreIds[1]);
+    // File shouldn't exist in the new store
+    assert.isFalse(await store2!.exists(docId, result2.fileIdent));
+
+    const fileInfo = await defaultDocStorageFake.getFileInfo(result2.fileIdent);
+    assert.equal(fileInfo?.storageId, allStoreIds[0], "file record should not refer to the new store");
+  });
+
   it("should get a file from local storage", async function() {
     const docId = "12345";
     const manager = new AttachmentFileManager(
@@ -210,4 +247,116 @@ describe("AttachmentFileManager", function() {
     assert(fileData);
     assert.equal(fileData.toString(), defaultTestFileContent);
   });
+
+  async function testStoreTransfer(sourceStore?: string, destStore?: string) {
+    const docInfo = { id: "12345", trunkId: null  };
+    const manager = new AttachmentFileManager(
+      defaultDocStorageFake,
+      defaultProvider,
+      docInfo,
+    );
+
+    const fileAddResult = await manager.addFile(sourceStore, ".txt", Buffer.from(defaultTestFileContent));
+    manager.startTransferringFileToOtherStore(fileAddResult.fileIdent, destStore);
+
+    await manager.allTransfersCompleted();
+
+    if (!destStore) {
+      await defaultDocStorageFake.getFileInfo(fileAddResult.fileIdent);
+      assert.equal(
+        (await defaultDocStorageFake.getFileInfo(fileAddResult.fileIdent))?.data?.toString(),
+        defaultTestFileContent,
+      );
+      return;
+    }
+
+    const store = (await defaultProvider.getStore(destStore))!;
+
+    assert(
+      await store.exists(getDocPoolIdFromDocInfo(docInfo), fileAddResult.fileIdent),
+      "file does not exist in new store"
+    );
+  }
+
+  it("can transfer a file from internal to external storage", async function() {
+    await testStoreTransfer(undefined, defaultProvider.listAllStoreIds()[0]);
+  });
+
+  it("can transfer a file from external to internal storage", async function() {
+    await testStoreTransfer(defaultProvider.listAllStoreIds()[0], undefined);
+  });
+
+  it("can transfer a file from external to a different external storage", async function() {
+    await testStoreTransfer(defaultProvider.listAllStoreIds()[0], defaultProvider.listAllStoreIds()[1]);
+  });
+
+  it("throws an error if the downloaded file is corrupted", async function() {
+    const docInfo = { id: "12345", trunkId: null  };
+    const manager = new AttachmentFileManager(
+      defaultDocStorageFake,
+      defaultProvider,
+      docInfo,
+    );
+
+    const sourceStoreId = defaultProvider.listAllStoreIds()[0];
+    const fileAddResult = await manager.addFile(sourceStoreId, ".txt", Buffer.from(defaultTestFileContent));
+
+    const sourceStore = await defaultProvider.getStore(defaultProvider.listAllStoreIds()[0]);
+    const badData = stream.Readable.from(Buffer.from("I am corrupted"));
+    await sourceStore?.upload(getDocPoolIdFromDocInfo(docInfo), fileAddResult.fileIdent, badData);
+
+    const transferPromise =
+      manager.transferFileToOtherStore(fileAddResult.fileIdent, defaultProvider.listAllStoreIds()[1]);
+    await assert.isRejected(transferPromise, AttachmentRetrievalError, "checksum verification failed");
+  });
+
+  it("transfers all files in the background", async function() {
+    const docInfo = { id: "12345", trunkId: null  };
+    const manager = new AttachmentFileManager(
+      defaultDocStorageFake,
+      defaultProvider,
+      docInfo,
+    );
+
+    const allStoreIds = defaultProvider.listAllStoreIds();
+    const sourceStoreId = allStoreIds[0];
+    const fileAddResult1 = await manager.addFile(sourceStoreId, ".txt", Buffer.from("A"));
+    const fileAddResult2 = await manager.addFile(sourceStoreId, ".txt", Buffer.from("B"));
+    const fileAddResult3 = await manager.addFile(sourceStoreId, ".txt", Buffer.from("C"));
+
+    await manager.startTransferringAllFilesToOtherStore(allStoreIds[1]);
+    assert.isFalse(manager.allTransfersCompleted());
+    await manager.allTransfersCompleted();
+    assert.isTrue(manager.allTransfersCompleted());
+
+
+    const destStore = (await defaultProvider.getStore(allStoreIds[1]))!;
+    const poolId = getDocPoolIdFromDocInfo(docInfo);
+    assert.isTrue(await destStore.exists(poolId, fileAddResult1.fileIdent));
+    assert.isTrue(await destStore.exists(poolId, fileAddResult2.fileIdent));
+    assert.isTrue(await destStore.exists(poolId, fileAddResult3.fileIdent));
+  });
+
+  it("uses the most recent transfer destination", async function() {
+    const docInfo = { id: "12345", trunkId: null  };
+    const manager = new AttachmentFileManager(
+      defaultDocStorageFake,
+      defaultProvider,
+      docInfo,
+    );
+
+    const allStoreIds = defaultProvider.listAllStoreIds();
+    const fileAddResult1 = await manager.addFile(allStoreIds[0], ".txt", Buffer.from("A"));
+
+    manager.startTransferringFileToOtherStore(fileAddResult1.fileIdent, allStoreIds[1]);
+    manager.startTransferringFileToOtherStore(fileAddResult1.fileIdent, allStoreIds[0]);
+    manager.startTransferringFileToOtherStore(fileAddResult1.fileIdent, allStoreIds[1]);
+    manager.startTransferringFileToOtherStore(fileAddResult1.fileIdent, allStoreIds[0]);
+    await manager.allTransfersCompleted();
+
+    const fileInfo = await defaultDocStorageFake.getFileInfo(fileAddResult1.fileIdent);
+    assert.equal(fileInfo?.storageId, allStoreIds[0], "the file should be in the original store");
+    // We can't assert on if the files exists in the store, as it might be transferred from A to B and back to A,
+    // and so exist in both stores.
+  });
 });