diff --git a/README.md b/README.md index edcf950..6f5ee56 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ If you are new here, start with either the `AI: Chat on current page` command or - **Rename a note based on Note Context**: Sends the note, including enriched data, to the LLM and asks for a new note title. Custom rules or examples can also be provided to generate better titles. - **Generate vector embeddings**: Chunks each page, generates vector embeddings of the text, and indexes those embeddings. No external database required. - **Similarity search**: Allows doing a similarity search based on indexed embeddings. +- **Note Summary generation and search**: **Experimental** generates a summary of each note, then generates embeddings and indexes that summary to be searched using a similarity/semantic search. ### Available commands diff --git a/docs/Commands/AI: Search.md b/docs/Commands/AI: Search.md index 0c52568..f4a706a 100644 --- a/docs/Commands/AI: Search.md +++ b/docs/Commands/AI: Search.md @@ -4,4 +4,6 @@ commandName: "AI: Search" commandSummary: "Ask the user for a search query, and then navigate to the search results page. Search results are provided by calculating the cosine similarity between the query embedding and each indexed embedding." ---- \ No newline at end of file +--- + +Requires configuring [[Configuration/Embedding Models]] first. Once properly configured, this command can be used to do a similarity or semantic search against all of the notes in a space. \ No newline at end of file diff --git a/docs/Configuration.md b/docs/Configuration.md index e757d47..4087340 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -2,6 +2,15 @@ To change the text generation model used by all commands, or other configurable ```yaml ai: + # Disabled by default, indexEmbeddings and indexSummary can be set + # to true to provide the AI: Search command. + # Be sure to read the relevant documentation and warnings first. + indexEmbeddings: false + indexEmbeddingsExcludePages: [] + indexEmbeddingsExcludeStrings: [] + indexSummaryModelName: ollama-gemma2 + indexSummary: false + # configure one or more image models. Only OpenAI's api is currently supported imageModels: - name: dall-e-3 diff --git a/docs/Configuration/Embedding Models.md b/docs/Configuration/Embedding Models.md index 03bd805..f9a2302 100644 --- a/docs/Configuration/Embedding Models.md +++ b/docs/Configuration/Embedding Models.md @@ -56,4 +56,41 @@ Options: - **indexEmbeddingsExcludeStrings**: List of exact strings to exclude from indexing. If a paragraph or line contains only one of these strings, it won’t be indexed. This helps from polluting search results in some cases. - **embeddingModels**: Explained above. Only the first model in the list is used for indexing. -After setting **indexEmbeddings** to **true** OR changing the **first embeddingModels model**, you must run the `Space: Reindex` command. \ No newline at end of file +After setting **indexEmbeddings** to **true** OR changing the **first embeddingModels model**, you must run the `Space: Reindex` command. + +## Generating and indexing note summaries + +> **warning** This is an experimental feature, mostly due to the amount of extra time and resources it takes during the indexing process. If you try it out, please report your experience! + +In addition to generating embeddings for each paragraph of a note, we can also use the llm model to generate a summary of the entire note and then index that. + +This can be helpful for larger notes or notes where each paragraph may not contain enough context by itself. + +To enable this feature, ensure you have these options in your SETTINGS: + +```yaml +aiSettings: + indexSummaryModelName: ollama-gemma2 + indexSummary: true + textModels: + - name: ollama-gemma2 + modelName: gemma2 + provider: openai + baseUrl: http://localhost:11434/v1 + requireAuth: false +``` + +Options: +- **indexSummary**: Off by default. Set to true to start generating page summaries and indexing their embeddings. +- **indexSummaryModelName**: Which [[Configuration/Text Models|text model]] to use for generating summaries. It’s recommended to use a locally hosted model since every note in your space will be sent to it. + +> **warning** If you are not comfortable sending all of your notes to a 3rd party, do not use a 3rd party api for embeddings or summary generation. + +### Suggested models for summary generation + +> **info** Please report your experiences with using different models! + +These models have been tested with Ollama for generating note summaries, along with their quality. Please report any other models you test with and your success (or not) with them. + +- **phi3**: Can generate summaries relatively quickly, but often includes hallucinations and weird changes that don’t match the source material. +- **gemma2**: This model is a bit bigger, but generates much better summaries than phi3. \ No newline at end of file diff --git a/docs/Features.md b/docs/Features.md index 2c5e541..f0f3d25 100644 --- a/docs/Features.md +++ b/docs/Features.md @@ -9,4 +9,5 @@ - **Generate and Insert Image using Dall-E**: Generates an image based on a prompt and inserts it into the note. - **Rename a note based on Note Context**: Sends the note, including enriched data, to the LLM and asks for a new note title. Custom rules or examples can also be provided to generate better titles. - **Generate vector embeddings**: Chunks each page, generates vector embeddings of the text, and indexes those embeddings. No external database required. -- **Similarity search**: Allows doing a similarity search based on indexed embeddings. \ No newline at end of file +- **Similarity search**: Allows doing a similarity search based on indexed embeddings. +- **Note Summary generation and search**: **Experimental** generates a summary of each note, then generates embeddings and indexes that summary to be searched using a similarity/semantic search. \ No newline at end of file diff --git a/silverbullet-ai.plug.yaml b/silverbullet-ai.plug.yaml index 6c6d0e6..7203066 100644 --- a/silverbullet-ai.plug.yaml +++ b/silverbullet-ai.plug.yaml @@ -70,6 +70,10 @@ functions: path: src/embeddings.ts:indexEmbeddings events: - page:index + indexSummaryEmbeddings: + path: src/embeddings.ts:indexSummary + events: + - page:index debugSearchEmbeddings: path: src/embeddings.ts:debugSearchEmbeddings command: diff --git a/src/cache.ts b/src/cache.ts new file mode 100644 index 0000000..738621c --- /dev/null +++ b/src/cache.ts @@ -0,0 +1,33 @@ +let cache: { [key: string]: any } = {}; + +export function setCache(key: string, value: any) { + cache[key] = value; +} + +export function getCache(key: string): any { + return cache[key]; +} + +export function clearCache() { + cache = {}; +} + +export function removeCache(key: string) { + delete cache[key]; +} + +export function hasCache(key: string): boolean { + return Object.prototype.hasOwnProperty.call(cache, key); +} + +// https://stackoverflow.com/questions/59777670/how-can-i-hash-a-string-with-sha256 +export async function hashStrings(...inputs: string[]): Promise { + const concatenatedInput = inputs.join(""); + const textAsBuffer = new TextEncoder().encode(concatenatedInput); + const hashBuffer = await crypto.subtle.digest("SHA-256", textAsBuffer); + const hashArray = Array.from(new Uint8Array(hashBuffer)); + const hash = hashArray + .map((item) => item.toString(16).padStart(2, "0")) + .join(""); + return hash; +} diff --git a/src/embeddings.ts b/src/embeddings.ts index e3dff8e..094cf4c 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -1,5 +1,6 @@ import type { FileMeta, IndexTreeEvent } from "$sb/types.ts"; import type { + AISummaryObject, CombinedEmbeddingResult, EmbeddingObject, EmbeddingResult, @@ -9,8 +10,13 @@ import { renderToText } from "$sb/lib/tree.ts"; import { currentEmbeddingProvider, initIfNeeded } from "../src/init.ts"; import { log } from "./utils.ts"; import { editor } from "$sb/syscalls.ts"; -import { aiSettings } from "./init.ts"; +import { aiSettings, configureSelectedModel } from "./init.ts"; +import * as cache from "./cache.ts"; +/** + * Generate embeddings for each paragraph in a page, and then indexes + * them. + */ export async function indexEmbeddings({ name: page, tree }: IndexTreeEvent) { await initIfNeeded(); @@ -57,7 +63,6 @@ export async function indexEmbeddings({ name: page, tree }: IndexTreeEvent) { continue; } - // TODO: Would it help to cache embeddings? e.g. someone reloading the same search page over and over, or updating the same page causing the same paragraphs to be re-indexed const embedding = await currentEmbeddingProvider.generateEmbeddings({ text: paragraphText, }); @@ -85,10 +90,93 @@ export async function indexEmbeddings({ name: page, tree }: IndexTreeEvent) { ); } +/** + * Generate a summary for a page, and then indexes it. + */ +export async function indexSummary({ name: page, tree }: IndexTreeEvent) { + await initIfNeeded(); + + // Only index pages if the user enabled it, and skip anything they want to exclude + const excludePages = [ + "SETTINGS", + "SECRETS", + ...aiSettings.indexEmbeddingsExcludePages, + ]; + if ( + !aiSettings.indexEmbeddings || + !aiSettings.indexSummary || + excludePages.includes(page) || + page.startsWith("_") + ) { + return; + } + + if (!tree.children) { + return; + } + + const pageText = renderToText(tree); + const summaryModel = aiSettings.textModels.find((model) => + model.name === aiSettings.indexSummaryModelName + ); + if (!summaryModel) { + throw new Error( + `Could not find summary model ${aiSettings.indexSummaryModelName}`, + ); + } + const summaryProvider = await configureSelectedModel(summaryModel); + let summaryPrompt; + + if (aiSettings.promptInstructions.indexSummaryPrompt !== "") { + summaryPrompt = aiSettings.promptInstructions.indexSummaryPrompt; + } else { + summaryPrompt = + "Provide a concise and informative summary of the above page. The summary should capture the key points and be useful for search purposes. Avoid any formatting or extraneous text. No more than one paragraph. Summary:\n"; + } + + const cacheKey = await cache.hashStrings( + summaryModel.name, + pageText, + summaryPrompt, + ); + let summary = cache.getCache(cacheKey); + if (!summary) { + summary = await summaryProvider.singleMessageChat( + "Contents of " + page + ":\n" + pageText + "\n\n" + summaryPrompt, + ); + cache.setCache(cacheKey, summary); + } + + // console.log("summary", summary); + + const summaryEmbeddings = await currentEmbeddingProvider.generateEmbeddings({ + text: summary, + }); + + const summaryObject: AISummaryObject = { + ref: `${page}@0`, + page: page, + embedding: summaryEmbeddings, + text: summary, + tag: "aiSummary", + }; + + await indexObjects(page, [summaryObject]); + + log( + "any", + `AI: Indexed summary for page ${page}`, + ); +} + export async function getAllEmbeddings(): Promise { return (await queryObjects("embedding", {})); } +export async function getAllAISummaries(): Promise { + return (await queryObjects("aiSummary", {})); +} + // Full disclosure, I don't really understand how this part works - thanks chatgpt! // ^ If anyone can make it better, please do. function cosineSimilarity(vecA: number[], vecB: number[]): number { @@ -119,6 +207,17 @@ export async function searchEmbeddings( similarity: cosineSimilarity(queryEmbedding, embedding.embedding), })); + if (aiSettings.indexSummary) { + const summaries = await getAllAISummaries(); + const summaryResults: EmbeddingResult[] = summaries.map((summary) => ({ + page: summary.page, + ref: summary.ref, + text: `Page Summary: ${summary.text}`, + similarity: cosineSimilarity(queryEmbedding, summary.embedding), + })); + results.push(...summaryResults); + } + // log("client", "AI: searchEmbeddings", results); return results @@ -126,6 +225,32 @@ export async function searchEmbeddings( .slice(0, numResults); } +/** + * Loop over every single summary object and calculate the cosine similarity between the query embedding and each summary object. + * Return the most similar summary objects. + */ +export async function searchSummaryEmbeddings( + query: string, + numResults = 10, +): Promise { + await initIfNeeded(); + const queryEmbedding = await currentEmbeddingProvider.generateEmbeddings({ + text: query, + }); + const summaries = await getAllAISummaries(); + + const results: EmbeddingResult[] = summaries.map((summary) => ({ + page: summary.page, + ref: summary.ref, + text: summary.text, + similarity: cosineSimilarity(queryEmbedding, summary.embedding), + })); + + return results + .sort((a, b) => b.similarity - a.similarity) + .slice(0, numResults); +} + /** * Combine and group similar embeddings into one object per page. * Without this, we could easily use up the results limit from a single page. @@ -188,6 +313,9 @@ export async function debugSearchEmbeddings() { const searchPrefix = "🤖 "; +/** + * Display "AI: Search" results. + */ export async function readFileEmbeddings( name: string, ): Promise<{ data: Uint8Array; meta: FileMeta }> { diff --git a/src/init.ts b/src/init.ts index c469d0c..0c047bf 100644 --- a/src/init.ts +++ b/src/init.ts @@ -172,6 +172,8 @@ function setupAIProvider(model: ModelConfig) { `Unsupported AI provider: ${model.provider}. Please configure a supported provider.`, ); } + + return currentAIProvider; } function setupEmbeddingProvider(model: EmbeddingModelConfig) { @@ -225,7 +227,7 @@ export async function configureSelectedModel(model: ModelConfig) { } currentModel = model; - setupAIProvider(model); + return setupAIProvider(model); } export async function configureSelectedImageModel(model: ImageModelConfig) { @@ -287,6 +289,8 @@ async function loadAndMergeSettings() { embeddingModels: [], textModels: [], indexEmbeddings: false, + indexSummary: false, + indexSummaryModelName: "", indexEmbeddingsExcludePages: [], indexEmbeddingsExcludeStrings: ["**user**:"], }; @@ -302,6 +306,7 @@ async function loadAndMergeSettings() { pageRenameSystem: "", pageRenameRules: "", tagRules: "", + indexSummaryPrompt: "", }; const newSettings = await readSetting("ai", {}); const newCombinedSettings = { ...defaultSettings, ...newSettings }; diff --git a/src/interfaces/EmbeddingProvider.ts b/src/interfaces/EmbeddingProvider.ts index 38d8d6e..5f0fddf 100644 --- a/src/interfaces/EmbeddingProvider.ts +++ b/src/interfaces/EmbeddingProvider.ts @@ -1,10 +1,15 @@ import { EmbeddingGenerationOptions } from "../types.ts"; +import * as cache from "../cache.ts"; export interface EmbeddingProviderInterface { name: string; apiKey: string; baseUrl: string; modelName: string; + _generateEmbeddings: ( + options: EmbeddingGenerationOptions, + ) => Promise>; + generateEmbeddings: ( options: EmbeddingGenerationOptions, ) => Promise>; @@ -32,7 +37,25 @@ export abstract class AbstractEmbeddingProvider this.requireAuth = requireAuth; } - abstract generateEmbeddings( + abstract _generateEmbeddings( options: EmbeddingGenerationOptions, ): Promise>; + + async generateEmbeddings(options: EmbeddingGenerationOptions) { + const cacheKey = await cache.hashStrings( + this.modelName, + options.text, + ); + + // Check if we've already generated these embeddings + const cachedEmbedding = cache.getCache(cacheKey); + if (cachedEmbedding) { + return cachedEmbedding; + } + + // Not in cache + const embedding = await this._generateEmbeddings(options); + cache.setCache(cacheKey, embedding); + return embedding; + } } diff --git a/src/interfaces/Provider.ts b/src/interfaces/Provider.ts index 7150ab1..dc022e2 100644 --- a/src/interfaces/Provider.ts +++ b/src/interfaces/Provider.ts @@ -1,6 +1,6 @@ import { editor } from "$sb/syscalls.ts"; import { getPageLength } from "../editorUtils.ts"; -import { StreamChatOptions } from "../types.ts"; +import { ChatMessage, StreamChatOptions } from "../types.ts"; export interface ProviderInterface { name: string; @@ -12,6 +12,10 @@ export interface ProviderInterface { options: StreamChatOptions, cursorStart: number, ) => Promise; + singleMessageChat: ( + userMessage: string, + systemPrompt?: string, + ) => Promise; } export abstract class AbstractProvider implements ProviderInterface { @@ -79,4 +83,28 @@ export abstract class AbstractProvider implements ProviderInterface { await this.chatWithAI({ ...options, onDataReceived: onData }); } + + async singleMessageChat( + userMessage: string, + systemPrompt?: string, + ): Promise { + const messages: ChatMessage[] = [ + { + role: "user", + content: userMessage, + }, + ]; + + if (systemPrompt) { + messages.unshift({ + role: "system", + content: systemPrompt, + }); + } + + return await this.chatWithAI({ + messages, + stream: false, + }); + } } diff --git a/src/prompts.ts b/src/prompts.ts index 4b8d9e5..7b4d09a 100644 --- a/src/prompts.ts +++ b/src/prompts.ts @@ -2,20 +2,21 @@ import { extractFrontmatter } from "$sb/lib/frontmatter.ts"; import { editor, markdown, space } from "$sb/syscalls.ts"; import { queryObjects } from "$sbplugs/index/plug_api.ts"; import { renderTemplate } from "$sbplugs/template/api.ts"; -import { +import type { CompleteEvent, - SlashCompletion, SlashCompletionOption, - TemplateObject, -} from "$type/types.ts"; + SlashCompletions, +} from "$sb/types.ts"; +import type { TemplateObject } from "$sbplugs/template/types.ts"; +import type { ChatMessage } from "./types.ts"; import { getPageLength } from "./editorUtils.ts"; -import { ChatMessage, currentAIProvider, initIfNeeded } from "./init.ts"; +import { currentAIProvider, initIfNeeded } from "./init.ts"; import { supportsPlugSlashComplete } from "./utils.ts"; // TODO: This only works in edge (0.7.2+), see https://github.com/silverbulletmd/silverbullet/issues/742 export async function aiPromptSlashComplete( completeEvent: CompleteEvent, -): Promise<{ options: SlashCompletion[] } | void> { +): Promise<{ options: SlashCompletions[] } | void> { if (!supportsPlugSlashComplete()) { return; } @@ -44,11 +45,11 @@ export async function aiPromptSlashComplete( * Valid templates must have a value for aiprompt.description in the frontmatter. */ export async function insertAiPromptFromTemplate( - slashCompletion: SlashCompletionOption | undefined, + SlashCompletions: SlashCompletionOption | undefined, ) { let selectedTemplate; - if (!slashCompletion || !slashCompletion.templatePage) { + if (!SlashCompletions || !SlashCompletions.templatePage) { // TODO: I don't really understand how this filter works. I'd rather have it check for a #aiPrompt tag instead of an aiprompt.description property const aiPromptTemplates = await queryObjects("template", { filter: ["attr", ["attr", "aiprompt"], "description"], @@ -71,13 +72,13 @@ export async function insertAiPromptFromTemplate( `Select the template to use as the prompt. The prompt will be rendered and sent to the LLM model.`, ); } else { - console.log("selectedTemplate from slash completion: ", slashCompletion); - const templatePage = await space.readPage(slashCompletion.templatePage); + console.log("selectedTemplate from slash completion: ", SlashCompletions); + const templatePage = await space.readPage(SlashCompletions.templatePage); const tree = await markdown.parseMarkdown(templatePage); const { aiprompt } = await extractFrontmatter(tree); console.log("templatePage from slash completion: ", templatePage); selectedTemplate = { - ref: slashCompletion.templatePage, + ref: SlashCompletions.templatePage, systemPrompt: aiprompt.systemPrompt || aiprompt.system || "You are an AI note assistant. Please follow the prompt instructions.", insertAt: aiprompt.insertAt || "cursor", diff --git a/src/providers/gemini.ts b/src/providers/gemini.ts index bb32425..5562cc0 100644 --- a/src/providers/gemini.ts +++ b/src/providers/gemini.ts @@ -195,7 +195,7 @@ export class GeminiEmbeddingProvider extends AbstractEmbeddingProvider { super(apiKey, baseUrl, "Gemini", modelName, requireAuth); } - async generateEmbeddings( + async _generateEmbeddings( options: { text: string }, ): Promise> { const body = JSON.stringify({ diff --git a/src/providers/ollama.ts b/src/providers/ollama.ts index 4beb86d..5d67d8e 100644 --- a/src/providers/ollama.ts +++ b/src/providers/ollama.ts @@ -17,7 +17,7 @@ export class OllamaEmbeddingProvider extends AbstractEmbeddingProvider { } // Ollama doesn't have an openai compatible api for embeddings yet, so it gets its own provider - async generateEmbeddings( + async _generateEmbeddings( options: EmbeddingGenerationOptions, ): Promise> { const body = JSON.stringify({ diff --git a/src/providers/openai.ts b/src/providers/openai.ts index 0bddc42..6e1b6c5 100644 --- a/src/providers/openai.ts +++ b/src/providers/openai.ts @@ -168,7 +168,7 @@ export class OpenAIEmbeddingProvider extends AbstractEmbeddingProvider { super(apiKey, baseUrl, "OpenAI", modelName, requireAuth); } - async generateEmbeddings( + async _generateEmbeddings( options: EmbeddingGenerationOptions, ): Promise> { const body = JSON.stringify({ diff --git a/src/types.ts b/src/types.ts index 50c2abd..eed6bbf 100644 --- a/src/types.ts +++ b/src/types.ts @@ -32,6 +32,15 @@ export type EmbeddingObject = ObjectValue< } & Record >; +export type AISummaryObject = ObjectValue< + { + text: string; + page: string; + embedding: number[]; + tag: "aiSummary"; + } & Record +>; + export type EmbeddingResult = { page: string; ref: string; @@ -78,6 +87,7 @@ export type PromptInstructions = { pageRenameSystem: string; pageRenameRules: string; tagRules: string; + indexSummaryPrompt: string; }; export type AISettings = { @@ -89,6 +99,8 @@ export type AISettings = { indexEmbeddings: boolean; indexEmbeddingsExcludePages: string[]; indexEmbeddingsExcludeStrings: string[]; + indexSummary: boolean; + indexSummaryModelName: string; // These are deprecated and will be removed in a future release openAIBaseUrl: string;