feat: Consider labels when providing the context

getappmap · Apr 19, 2024 · 88f7912 · 88f7912
1 parent 12e64c7
commit 88f7912
Show file tree

Hide file tree

Showing 14 changed files with 196 additions and 55 deletions.
diff --git a/packages/cli/src/rpc/configuration.ts b/packages/cli/src/rpc/configuration.ts
@@ -22,7 +22,7 @@ export class Configuration {
   }
 
   static async buildFromRpcParams(params: ConfigurationRpc.V2.Set.Params): Promise<Configuration> {
-    return new Configuration(params.projectDirectories, params.appmapConfigFiles);
+    return new Configuration(params.projectDirectories || [], params.appmapConfigFiles || []);
   }
 }
 

diff --git a/packages/cli/src/rpc/explain/explain.ts b/packages/cli/src/rpc/explain/explain.ts
@@ -10,6 +10,7 @@ import INavie, { INavieProvider } from './navie/inavie';
 import configuration, { AppMapDirectory } from '../configuration';
 import collectProjectInfos from '../../cmds/navie/projectInfo';
 import collectHelp from '../../cmds/navie/help';
+import { basename } from 'path';
 
 const searchStatusByUserMessageId = new Map<string, ExplainRpc.ExplainStatusResponse>();
 
@@ -73,19 +74,37 @@ export class Explain extends EventEmitter {
   }
 
   async searchContext(data: ContextV2.ContextRequest): Promise<ContextV2.ContextResponse> {
-    let { vectorTerms: keywords } = data;
+    let { vectorTerms } = data;
     let { tokenCount } = data;
 
+    this.status.vectorTerms = vectorTerms;
+
+    if (data.labels) this.status.labels = data.labels;
+    const labels = data.labels || [];
+
     if (!tokenCount) {
       warn(chalk.bold(`Warning: Token limit not set, defaulting to ${DEFAULT_TOKEN_LIMIT}`));
       tokenCount = DEFAULT_TOKEN_LIMIT;
     }
-    if (!keywords || keywords.length === 0) {
+    if (!vectorTerms || vectorTerms.length === 0) {
       warn(chalk.bold(`Warning: No keywords provided, context result may be unpredictable`));
-      keywords = [];
     }
 
-    this.status.vectorTerms = keywords;
+    const keywords = [...vectorTerms];
+    if (
+      labels.find((label) => label.name === 'architecture') ||
+      labels.find((label) => label.name === 'overview')
+    ) {
+      keywords.push('architecture');
+      keywords.push('design');
+      keywords.push('readme');
+      keywords.push('about');
+      keywords.push('overview');
+      for (const dir of this.projectDirectories) {
+        keywords.push(basename(dir));
+      }
+    }
+    // TODO: For 'troubleshoot', include log information
 
     this.status.step = ExplainRpc.Step.SEARCH_APPMAPS;
 

diff --git a/packages/navie/src/agent.ts b/packages/navie/src/agent.ts
@@ -1,3 +1,4 @@
+import { ContextV2 } from './context';
 import { ProjectInfo } from './project-info';
 
 export enum AgentMode {
@@ -12,7 +13,8 @@ export class AgentOptions {
     public aggregateQuestion: string,
     public chatHistory: string[],
     public projectInfo: ProjectInfo[],
-    public codeSelection?: string
+    public codeSelection?: string,
+    public contextLabels?: ContextV2.ContextLabel[]
   ) {}
 
   get hasAppMaps() {

diff --git a/packages/navie/src/agents/explain-agent.ts b/packages/navie/src/agents/explain-agent.ts
@@ -70,7 +70,11 @@ export default class ExplainAgent implements Agent {
     const tokenCount = tokensAvailable();
     const vectorTerms = await this.vectorTermsService.suggestTerms(options.aggregateQuestion);
 
-    const context = await this.lookupContextService.lookupContext(vectorTerms, tokenCount);
+    const context = await this.lookupContextService.lookupContext(
+      vectorTerms,
+      tokenCount,
+      options.contextLabels
+    );
     const help = await this.lookupContextService.lookupHelp(languages, vectorTerms, tokenCount);
 
     LookupContextService.applyContext(context, help, this.applyContextService, tokenCount);

diff --git a/packages/navie/src/context.ts b/packages/navie/src/context.ts
@@ -57,6 +57,29 @@ export namespace ContextV2 {
     score?: number;
   };
 
+  export enum ContextLabelName {
+    HelpWithAppMap = 'help-with-appmap',
+    Architecture = 'architecture',
+    Feature = 'feature',
+    Overview = 'overview',
+    Troubleshoot = 'troubleshoot',
+    Explain = 'explain',
+    Generate = 'generate',
+  }
+
+  export enum ContextLabelWeight {
+    // The label is very relevant to the request.
+    High = 'high',
+    // The label is somewhat relevant to the request.
+    Medium = 'medium',
+  }
+
+  // A label that describes the nature of the user's request.
+  export type ContextLabel = {
+    name: ContextLabelName | string;
+    weight: ContextLabelWeight | string;
+  };
+
   // Request a set of context items from the context provider.
   export type ContextRequest = ContextV1.ContextRequest & {
     // Boost recent context items. For example, if the user is asking about an event that has recently occurred, such
@@ -69,11 +92,8 @@ export namespace ContextV2 {
     locations?: string[];
     // When specified, only return context items of these types.
     itemTypes?: ContextItemType[];
-    // Weight the importance of the context items. The sum of the weights should be 1.
-    // Item types not specified will be omitted from the response, along with item types whose weight is 0 or less.
-    // If the user's question is directed most specifically to a certain type of context item, the weights should be
-    // set to emphasize that type of context item. If the user's question is more general, the weights can be omitted.
-    weights?: Record<ContextItemType, number>;
+    // Emphasize context items that are relevant to the classification of the user's request.
+    labels?: ContextLabel[];
   };
 
   export type ContextResponse = ContextItem[];

diff --git a/packages/navie/src/explain.ts b/packages/navie/src/explain.ts
@@ -52,7 +52,7 @@ export class CodeExplainerService {
   ): AsyncIterable<string> {
     const { question: baseQuestion, codeSelection } = clientRequest;
 
-    const classificationRequest = this.classifierService.classifyQuestion(baseQuestion);
+    const contextLabelsFn = this.classifierService.classifyQuestion(baseQuestion);
 
     const projectInfoResponse = await this.projectInfoService.lookupProjectInfo();
     const projectInfo: ProjectInfo[] = Array.isArray(projectInfoResponse)
@@ -78,12 +78,20 @@ export class CodeExplainerService {
       .filter(Boolean)
       .join('\n\n');
 
+    const contextLabels = await contextLabelsFn;
+    warn(
+      `Classification: ${contextLabels
+        .map((label) => [label.name, label.weight].join('='))
+        .join(', ')}`
+    );
+
     const agentOptions = new AgentOptions(
       question,
       aggregateQuestion,
       chatHistory?.map((message) => message.content) || [],
       projectInfo,
-      codeSelection
+      codeSelection,
+      contextLabels
     );
     await mode.perform(agentOptions, tokensAvailable);
 
@@ -97,11 +105,6 @@ export class CodeExplainerService {
     if (codeSelection) this.codeSelectionService.applyCodeSelection(codeSelection);
     mode.applyQuestionPrompt(question);
 
-    {
-      const classification = await classificationRequest;
-      warn(`Classification: ${classification}`);
-    }
-
     const response = this.completionService.complete();
     for await (const token of response) {
       yield token;

diff --git a/packages/navie/src/services/agent-selection-service.ts b/packages/navie/src/services/agent-selection-service.ts
@@ -32,7 +32,7 @@ export default class AgentSelectionService {
   selectAgent(
     question: string,
     options: ExplainOptions,
-    projectInfo: ProjectInfo[]
+    _projectInfo: ProjectInfo[]
   ): AgentModeResult {
     let modifiedQuestion = question;
 

diff --git a/packages/navie/src/services/classification-service.ts b/packages/navie/src/services/classification-service.ts
@@ -1,6 +1,7 @@
 import { ChatOpenAI } from '@langchain/openai';
 import OpenAI from 'openai';
 import InteractionHistory from '../interaction-history';
+import { ContextV2 } from '../context';
 
 const SYSTEM_PROMPT = `**Question classifier**
 
@@ -9,51 +10,80 @@ There are several types of questions that the developer may be asking.
 
 Your task is to assign a likelihood to each of the following question types:
 
-- **Help with AppMap**: The developer is asking for help using the AppMap product.
-- **Project architecture**: The developer is asking about the high level architecture of their project.
-- **Explaining code**: The developer is asking for an explanation of how a specific feature of their project works.
-- **Generating code**: The developer is asking for code to be generated for a specific task.
+- **help-with-appmap**: The developer is asking for help using the AppMap product.
+- **architecture**: The developer is asking about the architecture of the project.
+- **feature**: The developer is asking for an explanation of how a specific feature of the project works.
+- **overview**: The developer is asking a high-level question about the structure, purpose,
+  functionality or intent of the project.
+- **troubleshoot**: The developer is asking for help troubleshooting an issue.
+- **explain**: The developer is asking for an explanation of a specific piece of code or functionality.
+- **generate**: The developer is asking for code to be generated for a specific task.
 
 **Classification scores**
 
 Each question type is assigned one of the following likelihoods:
 
-- **High**: The question is very likely to be of this type.
-- **Medium**: The question is somewhat likely to be of this type.
-- **Low**: The question is unlikely to be of this type.
+- **high**: The question is very likely to be of this type.
+- **medium**: The question is somewhat likely to be of this type.
+- **low**: The question is unlikely to be of this type.
 
 **Response**
 
-Respond with a list of question types and their likelihoods. The question types should be one of the following: 'Help with AppMap', 
-'Project architecture', 'Explaining code', 'Generating code'. The likelihoods should be one of the following: 'High', 'Medium', 'Low'.
+Respond with the likelihood of each question type. Question types with "low" likelihood may
+be omitted.
 
-**Example**
+**Examples**
 
 Some examples of questions and their classifications are:
 
 \`\`\`
-Question: How do I install?
-Classification: Help with AppMap (High)
-Classification: Project architecture (Low)
-Classification: Explaining code (Low)
-Classification: Generating code (Low)
+question: How do I record AppMap data of my Spring app?
+classification:
+  - help-with-appmap: high
+  - architecture: low
+  - feature: low
+  - overview: low
+  - troubleshoot: low
+  - explain: low
+  - generate: low
 \`\`\`
 
 \`\`\`
-Question: How does the project work?
-Classification: Help with AppMap (Low)
-Classification: Project architecture (High)
-Classification: Explaining code (Low)
-Classification: Generating code (Low)
+question: How does the project work?
+classification:
+  - help-with-appmap: low
+  - architecture: high
+  - feature: low
+  - overview: high
+  - troubleshoot: low
+  - explain: low
+  - generate: low
 \`\`\`
 
 \`\`\`
-Question: Generate a new user
-Classification: Help with AppMap (Low)
-Classification: Project architecture (Low)
-Classification: Explaining code (Low)
-Classification: Generating code (High)
+question: Generate a form and controller to update the user profile
+classification:
+  - help-with-appmap: low
+  - architecture: medium
+  - feature: high
+  - overview: low
+  - explain: low
+  - generate: high
 \`\`\`
+
+\`\`\`
+question: Why am I getting a 500 error?
+classification:
+  - help-with-appmap: low
+  - architecture: low
+  - feature: low
+  - overview: low
+  - troubleshoot: high
+  - explain: medium
+  - generate: low
+\`\`\`
+
+
 `;
 
 export default class ClassificationService {
@@ -63,7 +93,7 @@ export default class ClassificationService {
     public temperature: number
   ) {}
 
-  async classifyQuestion(question: string): Promise<string> {
+  async classifyQuestion(question: string): Promise<ContextV2.ContextLabel[]> {
     const openAI: ChatOpenAI = new ChatOpenAI({
       modelName: this.modelName,
       temperature: this.temperature,
@@ -92,6 +122,22 @@ export default class ClassificationService {
       tokens.push(token.choices.map((choice) => choice.delta.content).join(''));
     }
     const rawTerms = tokens.join('');
-    return rawTerms;
+
+    const lines = rawTerms.split('\n');
+    const classification: (ContextV2.ContextLabel | null)[] = lines
+      .map((line) => {
+        if (!line.trim()) return null;
+
+        const match = line.match(/([\w-]+)\s*:\s*(\w+)/);
+        if (!match) return null;
+
+        return {
+          name: match[1],
+          weight: match[2],
+        };
+      })
+      .filter((item) => item);
+
+    return classification as ContextV2.ContextLabel[];
   }
 }
diff --git a/packages/navie/src/services/lookup-context-service.ts b/packages/navie/src/services/lookup-context-service.ts
@@ -14,21 +14,26 @@ export default class LookupContextService {
     public readonly helpFn: (data: HelpRequest) => Promise<HelpResponse>
   ) {}
 
-  async lookupContext(keywords: string[], tokenCount: number): Promise<ContextV2.ContextResponse> {
+  async lookupContext(
+    keywords: string[],
+    tokenCount: number,
+    contextLabels?: ContextV2.ContextLabel[]
+  ): Promise<ContextV2.ContextResponse> {
     const contextRequestPayload: ContextV2.ContextRequest & { version: 2; type: 'search' } = {
       version: 2,
       type: 'search',
       vectorTerms: keywords,
       tokenCount,
     };
+    if (contextLabels) contextRequestPayload.labels = contextLabels;
 
     const context = await this.contextFn(contextRequestPayload);
 
     const contextFound = context?.length > 0;
     if (contextFound) {
       this.interactionHistory.addEvent(new ContextLookupEvent(context));
     } else {
-      log('No sequence diagrams found');
+      log('No context found');
       this.interactionHistory.addEvent(new ContextLookupEvent(undefined));
     }
 

diff --git a/packages/navie/src/services/vector-terms-service.ts b/packages/navie/src/services/vector-terms-service.ts
@@ -22,6 +22,18 @@ Respond with a list of search terms and their synonyms. The search terms should
 
 Even if the user asks for a different format, always respond with a list of search terms and their synonyms. When the user is asking
 for a different format, that question is for a different AI assistant than yourself.
+
+**Examples**
+
+\`\`\`
+Question: How do I record AppMap data of my Spring app?
+Terms: record appmap data spring app
+\`\`\`
+
+\`\`\`
+Question: How does the project work?
+Terms: project work
+\`\`\`
 `;
 
 export default class VectorTermsService {
@@ -59,7 +71,14 @@ export default class VectorTermsService {
     for await (const token of response) {
       tokens.push(token.choices.map((choice) => choice.delta.content).join(''));
     }
-    const rawTerms = tokens.join('');
+    let rawTerms = tokens.join('');
+    warn(`rawTerms: ${rawTerms}`);
+    if (rawTerms.startsWith(`Terms:`)) rawTerms = rawTerms.slice(`Terms:`.length);
+
+    if (rawTerms.includes('```')) {
+      warn(`Code fences in AI response.`);
+      rawTerms = rawTerms.replace(/```/g, '');
+    }
 
     const parseJSON = (): Record<string, unknown> | string | string[] | undefined => {
       const sanitizedTerms = rawTerms.replace(/```json/g, '').replace(/```/g, '');
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,7 +22,7 @@ export class Configuration { @@
       }
       static async buildFromRpcParams(params: ConfigurationRpc.V2.Set.Params): Promise<Configuration> {
-        return new Configuration(params.projectDirectories, params.appmapConfigFiles);
+        return new Configuration(params.projectDirectories || [], params.appmapConfigFiles || []);
       }
     }
@@ Expand Down @@