justyns · justyns · Jul 14, 2024 · Jul 13, 2024 · Jul 14, 2024 · Jul 14, 2024
diff --git a/README.md b/README.md
@@ -23,6 +23,7 @@ If you are new here, start with either the `AI: Chat on current page` command or
 - **Rename a note based on Note Context**: Sends the note, including enriched data, to the LLM and asks for a new note title.  Custom rules or examples can also be provided to generate better titles.
 - **Generate vector embeddings**: Chunks each page, generates vector embeddings of the text, and indexes those embeddings.  No external database required.
 - **Similarity search**: Allows doing a similarity search based on indexed embeddings.
+- **Note Summary generation and search**: **Experimental** generates a summary of each note, then generates embeddings and indexes that summary to be searched using a similarity/semantic search.
 <!-- end-features -->
 
 ### Available commands

diff --git a/docs/Commands/AI: Search.md b/docs/Commands/AI: Search.md
@@ -4,4 +4,6 @@ commandName: "AI: Search"
 commandSummary: "Ask the user for a search query, and then navigate to the search results page.
 Search results are provided by calculating the cosine similarity between the
 query embedding and each indexed embedding."
----
+---
+
+Requires configuring [[Configuration/Embedding Models]] first.  Once properly configured, this command can be used to do a similarity or semantic search against all of the notes in a space.
diff --git a/docs/Configuration.md b/docs/Configuration.md
@@ -2,6 +2,15 @@ To change the text generation model used by all commands, or other configurable
 
 ```yaml
 ai:
+  # Disabled by default, indexEmbeddings and indexSummary can be set
+  # to true to provide the AI: Search command.
+  # Be sure to read the relevant documentation and warnings first.
+  indexEmbeddings: false
+  indexEmbeddingsExcludePages: []
+  indexEmbeddingsExcludeStrings: []
+  indexSummaryModelName: ollama-gemma2
+  indexSummary: false
+
   # configure one or more image models.  Only OpenAI's api is currently supported
   imageModels:
   - name: dall-e-3

diff --git a/docs/Configuration/Embedding Models.md b/docs/Configuration/Embedding Models.md
@@ -56,4 +56,41 @@ Options:
 - **indexEmbeddingsExcludeStrings**: List of exact strings to exclude from indexing. If a paragraph or line contains only one of these strings, it won’t be indexed.  This helps from polluting search results in some cases.
 - **embeddingModels**: Explained above.  Only the first model in the list is used for indexing.
 
-After setting **indexEmbeddings** to **true** OR changing the **first embeddingModels model**, you must run the `Space: Reindex` command.
+After setting **indexEmbeddings** to **true** OR changing the **first embeddingModels model**, you must run the `Space: Reindex` command.
+
+## Generating and indexing note summaries
+
+> **warning** This is an experimental feature, mostly due to the amount of extra time and resources it takes during the indexing process.  If you try it out, please report your experience!
+
+In addition to generating embeddings for each paragraph of a note, we can also use the llm model to generate a summary of the entire note and then index that.
+
+This can be helpful for larger notes or notes where each paragraph may not contain enough context by itself.
+
+To enable this feature, ensure you have these options in your SETTINGS:
+
+```yaml
+aiSettings:
+  indexSummaryModelName: ollama-gemma2
+  indexSummary: true
+  textModels:
+  - name: ollama-gemma2
+    modelName: gemma2
+    provider: openai
+    baseUrl: http://localhost:11434/v1
+    requireAuth: false
+```
+
+Options:
+- **indexSummary**: Off by default.  Set to true to start generating page summaries and indexing their embeddings.
+- **indexSummaryModelName**: Which [[Configuration/Text Models|text model]] to use for generating summaries.  It’s recommended to use a locally hosted model since every note in your space will be sent to it.
+
+> **warning** If you are not comfortable sending all of your notes to a 3rd party, do not use a 3rd party api for embeddings or summary generation.
+
+### Suggested models for summary generation
+
+> **info** Please report your experiences with using different models!
+
+These models have been tested with Ollama for generating note summaries, along with their quality.  Please report any other models you test with and your success (or not) with them.
+
+- **phi3**: Can generate summaries relatively quickly, but often includes hallucinations and weird changes that don’t match the source material.
+- **gemma2**: This model is a bit bigger, but generates much better summaries than phi3.
diff --git a/docs/Features.md b/docs/Features.md
@@ -9,4 +9,5 @@
 - **Generate and Insert Image using Dall-E**: Generates an image based on a prompt and inserts it into the note.
 - **Rename a note based on Note Context**: Sends the note, including enriched data, to the LLM and asks for a new note title.  Custom rules or examples can also be provided to generate better titles.
 - **Generate vector embeddings**: Chunks each page, generates vector embeddings of the text, and indexes those embeddings.  No external database required.
-- **Similarity search**: Allows doing a similarity search based on indexed embeddings.
+- **Similarity search**: Allows doing a similarity search based on indexed embeddings.
+- **Note Summary generation and search**: **Experimental** generates a summary of each note, then generates embeddings and indexes that summary to be searched using a similarity/semantic search.
diff --git a/silverbullet-ai.plug.yaml b/silverbullet-ai.plug.yaml
@@ -70,6 +70,10 @@ functions:
     path: src/embeddings.ts:indexEmbeddings
     events:
       - page:index
+  indexSummaryEmbeddings:
+    path: src/embeddings.ts:indexSummary
+    events:
+      - page:index
   debugSearchEmbeddings:
     path: src/embeddings.ts:debugSearchEmbeddings
     command:

diff --git a/src/cache.ts b/src/cache.ts
@@ -0,0 +1,33 @@
+let cache: { [key: string]: any } = {};
+
+export function setCache(key: string, value: any) {
+  cache[key] = value;
+}
+
+export function getCache(key: string): any {
+  return cache[key];
+}
+
+export function clearCache() {
+  cache = {};
+}
+
+export function removeCache(key: string) {
+  delete cache[key];
+}
+
+export function hasCache(key: string): boolean {
+  return Object.prototype.hasOwnProperty.call(cache, key);
+}
+
+// https://stackoverflow.com/questions/59777670/how-can-i-hash-a-string-with-sha256
+export async function hashStrings(...inputs: string[]): Promise<string> {
+  const concatenatedInput = inputs.join("");
+  const textAsBuffer = new TextEncoder().encode(concatenatedInput);
+  const hashBuffer = await crypto.subtle.digest("SHA-256", textAsBuffer);
+  const hashArray = Array.from(new Uint8Array(hashBuffer));
+  const hash = hashArray
+    .map((item) => item.toString(16).padStart(2, "0"))
+    .join("");
+  return hash;
+}
diff --git a/src/embeddings.ts b/src/embeddings.ts
@@ -1,5 +1,6 @@
 import type { FileMeta, IndexTreeEvent } from "$sb/types.ts";
 import type {
+  AISummaryObject,
   CombinedEmbeddingResult,
   EmbeddingObject,
   EmbeddingResult,
@@ -9,8 +10,13 @@ import { renderToText } from "$sb/lib/tree.ts";
 import { currentEmbeddingProvider, initIfNeeded } from "../src/init.ts";
 import { log } from "./utils.ts";
 import { editor } from "$sb/syscalls.ts";
-import { aiSettings } from "./init.ts";
+import { aiSettings, configureSelectedModel } from "./init.ts";
+import * as cache from "./cache.ts";
 
+/**
+ * Generate embeddings for each paragraph in a page, and then indexes
+ * them.
+ */
 export async function indexEmbeddings({ name: page, tree }: IndexTreeEvent) {
   await initIfNeeded();
 
@@ -57,7 +63,6 @@ export async function indexEmbeddings({ name: page, tree }: IndexTreeEvent) {
       continue;
     }
 
-    // TODO: Would it help to cache embeddings?  e.g. someone reloading the same search page over and over, or updating the same page causing the same paragraphs to be re-indexed
     const embedding = await currentEmbeddingProvider.generateEmbeddings({
       text: paragraphText,
     });
@@ -85,10 +90,93 @@ export async function indexEmbeddings({ name: page, tree }: IndexTreeEvent) {
   );
 }
 
+/**
+ * Generate a summary for a page, and then indexes it.
+ */
+export async function indexSummary({ name: page, tree }: IndexTreeEvent) {
+  await initIfNeeded();
+
+  // Only index pages if the user enabled it, and skip anything they want to exclude
+  const excludePages = [
+    "SETTINGS",
+    "SECRETS",
+    ...aiSettings.indexEmbeddingsExcludePages,
+  ];
+  if (
+    !aiSettings.indexEmbeddings ||
+    !aiSettings.indexSummary ||
+    excludePages.includes(page) ||
+    page.startsWith("_")
+  ) {
+    return;
+  }
+
+  if (!tree.children) {
+    return;
+  }
+
+  const pageText = renderToText(tree);
+  const summaryModel = aiSettings.textModels.find((model) =>
+    model.name === aiSettings.indexSummaryModelName
+  );
+  if (!summaryModel) {
+    throw new Error(
+      `Could not find summary model ${aiSettings.indexSummaryModelName}`,
+    );
+  }
+  const summaryProvider = await configureSelectedModel(summaryModel);
+  let summaryPrompt;
+
+  if (aiSettings.promptInstructions.indexSummaryPrompt !== "") {
+    summaryPrompt = aiSettings.promptInstructions.indexSummaryPrompt;
+  } else {
+    summaryPrompt =
+      "Provide a concise and informative summary of the above page. The summary should capture the key points and be useful for search purposes. Avoid any formatting or extraneous text.  No more than one paragraph.  Summary:\n";
+  }
+
+  const cacheKey = await cache.hashStrings(
+    summaryModel.name,
+    pageText,
+    summaryPrompt,
+  );
+  let summary = cache.getCache(cacheKey);
+  if (!summary) {
+    summary = await summaryProvider.singleMessageChat(
+      "Contents of " + page + ":\n" + pageText + "\n\n" + summaryPrompt,
+    );
+    cache.setCache(cacheKey, summary);
+  }
+
+  //   console.log("summary", summary);
+
+  const summaryEmbeddings = await currentEmbeddingProvider.generateEmbeddings({
+    text: summary,
+  });
+
+  const summaryObject: AISummaryObject = {
+    ref: `${page}@0`,
+    page: page,
+    embedding: summaryEmbeddings,
+    text: summary,
+    tag: "aiSummary",
+  };
+
+  await indexObjects<AISummaryObject>(page, [summaryObject]);
+
+  log(
+    "any",
+    `AI: Indexed summary for page ${page}`,
+  );
+}
+
 export async function getAllEmbeddings(): Promise<EmbeddingObject[]> {
   return (await queryObjects<EmbeddingObject>("embedding", {}));
 }
 
+export async function getAllAISummaries(): Promise<AISummaryObject[]> {
+  return (await queryObjects<AISummaryObject>("aiSummary", {}));
+}
+
 // Full disclosure, I don't really understand how this part works - thanks chatgpt!
 //   ^ If anyone can make it better, please do.
 function cosineSimilarity(vecA: number[], vecB: number[]): number {
@@ -119,13 +207,50 @@ export async function searchEmbeddings(
     similarity: cosineSimilarity(queryEmbedding, embedding.embedding),
   }));
 
+  if (aiSettings.indexSummary) {
+    const summaries = await getAllAISummaries();
+    const summaryResults: EmbeddingResult[] = summaries.map((summary) => ({
+      page: summary.page,
+      ref: summary.ref,
+      text: `Page Summary: ${summary.text}`,
+      similarity: cosineSimilarity(queryEmbedding, summary.embedding),
+    }));
+    results.push(...summaryResults);
+  }
+
   //   log("client", "AI: searchEmbeddings", results);
 
   return results
     .sort((a, b) => b.similarity - a.similarity)
     .slice(0, numResults);
 }
 
+/**
+ * Loop over every single summary object and calculate the cosine similarity between the query embedding and each summary object.
+ * Return the most similar summary objects.
+ */
+export async function searchSummaryEmbeddings(
+  query: string,
+  numResults = 10,
+): Promise<EmbeddingResult[]> {
+  await initIfNeeded();
+  const queryEmbedding = await currentEmbeddingProvider.generateEmbeddings({
+    text: query,
+  });
+  const summaries = await getAllAISummaries();
+
+  const results: EmbeddingResult[] = summaries.map((summary) => ({
+    page: summary.page,
+    ref: summary.ref,
+    text: summary.text,
+    similarity: cosineSimilarity(queryEmbedding, summary.embedding),
+  }));
+
+  return results
+    .sort((a, b) => b.similarity - a.similarity)
+    .slice(0, numResults);
+}
+
 /**
  * Combine and group similar embeddings into one object per page.
  * Without this, we could easily use up the results limit from a single page.
@@ -188,6 +313,9 @@ export async function debugSearchEmbeddings() {
 
 const searchPrefix = "🤖 ";
 
+/**
+ * Display "AI: Search" results.
+ */
 export async function readFileEmbeddings(
   name: string,
 ): Promise<{ data: Uint8Array; meta: FileMeta }> {

diff --git a/src/init.ts b/src/init.ts
@@ -172,6 +172,8 @@ function setupAIProvider(model: ModelConfig) {
         `Unsupported AI provider: ${model.provider}. Please configure a supported provider.`,
       );
   }
+
+  return currentAIProvider;
 }
 
 function setupEmbeddingProvider(model: EmbeddingModelConfig) {
@@ -225,7 +227,7 @@ export async function configureSelectedModel(model: ModelConfig) {
   }
 
   currentModel = model;
-  setupAIProvider(model);
+  return setupAIProvider(model);
 }
 
 export async function configureSelectedImageModel(model: ImageModelConfig) {
@@ -287,6 +289,8 @@ async function loadAndMergeSettings() {
     embeddingModels: [],
     textModels: [],
     indexEmbeddings: false,
+    indexSummary: false,
+    indexSummaryModelName: "",
     indexEmbeddingsExcludePages: [],
     indexEmbeddingsExcludeStrings: ["**user**:"],
   };
@@ -302,6 +306,7 @@ async function loadAndMergeSettings() {
     pageRenameSystem: "",
     pageRenameRules: "",
     tagRules: "",
+    indexSummaryPrompt: "",
   };
   const newSettings = await readSetting("ai", {});
   const newCombinedSettings = { ...defaultSettings, ...newSettings };

diff --git a/src/interfaces/EmbeddingProvider.ts b/src/interfaces/EmbeddingProvider.ts
@@ -1,10 +1,15 @@
 import { EmbeddingGenerationOptions } from "../types.ts";
+import * as cache from "../cache.ts";
 
 export interface EmbeddingProviderInterface {
   name: string;
   apiKey: string;
   baseUrl: string;
   modelName: string;
+  _generateEmbeddings: (
+    options: EmbeddingGenerationOptions,
+  ) => Promise<Array<number>>;
+
   generateEmbeddings: (
     options: EmbeddingGenerationOptions,
   ) => Promise<Array<number>>;
@@ -32,7 +37,25 @@ export abstract class AbstractEmbeddingProvider
     this.requireAuth = requireAuth;
   }
 
-  abstract generateEmbeddings(
+  abstract _generateEmbeddings(
     options: EmbeddingGenerationOptions,
   ): Promise<Array<number>>;
+
+  async generateEmbeddings(options: EmbeddingGenerationOptions) {
+    const cacheKey = await cache.hashStrings(
+      this.modelName,
+      options.text,
+    );
+
+    // Check if we've already generated these embeddings
+    const cachedEmbedding = cache.getCache(cacheKey);
+    if (cachedEmbedding) {
+      return cachedEmbedding;
+    }
+
+    // Not in cache
+    const embedding = await this._generateEmbeddings(options);
+    cache.setCache(cacheKey, embedding);
+    return embedding;
+  }
 }