From 64169e07460d1bf611c7ad679bd72d0ef9f577b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Kopyci=C5=84ski?= Date: Fri, 10 Jan 2025 16:51:38 +0100 Subject: [PATCH] [Security Assistant] Migrate semantic_text to use highlighter instead of inner_hits (#204962) ## Summary Switch to use https://github.com/elastic/elasticsearch/pull/118064 when retrieving Knowledge base Index entry docs Followed testing instructions from https://github.com/elastic/kibana/pull/198020 Results: Zrzut ekranu 2024-12-19 o 16 32 28 Zrzut ekranu 2024-12-19 o 16 32 38 Zrzut ekranu 2024-12-19 o 16 32 43 Zrzut ekranu 2024-12-19 o 16 32 47 Zrzut ekranu 2024-12-19 o 16 32 50 (cherry picked from commit 55390001adf8ea1eb1f50d46a4a8bb925a8a33d4) --- .../knowledge_base/helpers.test.tsx | 14 +----- .../knowledge_base/helpers.ts | 48 +++++++------------ .../knowledge_base/index.ts | 2 - 3 files changed, 19 insertions(+), 45 deletions(-) diff --git a/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.test.tsx b/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.test.tsx index 69b142bdac6be..275e2be56e39b 100644 --- a/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.test.tsx +++ b/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.test.tsx @@ -159,7 +159,6 @@ describe('getStructuredToolForIndexEntry', () => { indexEntry: mockIndexEntry, esClient: mockEsClient, logger: mockLogger, - elserId: 'elser123', }); expect(tool).toBeInstanceOf(DynamicStructuredTool); @@ -181,15 +180,8 @@ describe('getStructuredToolForIndexEntry', () => { field1: 'value1', field2: 2, }, - inner_hits: { - 'test.test': { - hits: { - hits: [ - { _source: { text: 'Inner text 1' } }, - { _source: { text: 'Inner text 2' } }, - ], - }, - }, + highlight: { + test: ['Inner text 1', 'Inner text 2'], }, }, ], @@ -202,7 +194,6 @@ describe('getStructuredToolForIndexEntry', () => { indexEntry: mockIndexEntry, esClient: mockEsClient, logger: mockLogger, - elserId: 'elser123', }); const input = { query: 'testQuery', field1: 'value1', field2: 2 }; @@ -220,7 +211,6 @@ describe('getStructuredToolForIndexEntry', () => { indexEntry: mockIndexEntry, esClient: mockEsClient, logger: mockLogger, - elserId: 'elser123', }); const input = { query: 'testQuery', field1: 'value1', field2: 2 }; diff --git a/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts b/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts index a0d3afb355b4b..a7c30690fdba7 100644 --- a/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts +++ b/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/helpers.ts @@ -6,7 +6,6 @@ */ import { z } from '@kbn/zod'; -import { get } from 'lodash'; import { DynamicStructuredTool } from '@langchain/core/tools'; import { errors } from '@elastic/elasticsearch'; import { QueryDslQueryContainer, SearchRequest } from '@elastic/elasticsearch/lib/api/types'; @@ -140,12 +139,10 @@ export const getStructuredToolForIndexEntry = ({ indexEntry, esClient, logger, - elserId, }: { indexEntry: IndexEntry; esClient: ElasticsearchClient; logger: Logger; - elserId: string; }): DynamicStructuredTool => { const inputSchema = indexEntry.inputSchema?.reduce((prev, input) => { const fieldType = @@ -182,28 +179,27 @@ export const getStructuredToolForIndexEntry = ({ const params: SearchRequest = { index: indexEntry.index, size: 10, - retriever: { - standard: { - query: { - nested: { - path: `${indexEntry.field}.inference.chunks`, - query: { - sparse_vector: { - inference_id: elserId, - field: `${indexEntry.field}.inference.chunks.embeddings`, - query: input.query, - }, - }, - inner_hits: { - size: 2, - name: `${indexEntry.name}.${indexEntry.field}`, - _source: [`${indexEntry.field}.inference.chunks.text`], + query: { + bool: { + must: [ + { + semantic: { + field: indexEntry.field, + query: input.query, }, }, - }, + ], filter, }, }, + highlight: { + fields: { + [indexEntry.field]: { + type: 'semantic', + number_of_fragments: 2, + }, + }, + }, }; try { @@ -217,18 +213,8 @@ export const getStructuredToolForIndexEntry = ({ }, {}); } - // We want to send relevant inner hits (chunks) to the LLM as a context - const innerHitPath = `${indexEntry.name}.${indexEntry.field}`; - if (hit.inner_hits?.[innerHitPath]) { - return { - text: hit.inner_hits[innerHitPath].hits.hits - .map((innerHit) => innerHit._source.text) - .join('\n --- \n'), - }; - } - return { - text: get(hit._source, `${indexEntry.field}.inference.chunks[0].text`), + text: hit.highlight?.[indexEntry.field].join('\n --- \n'), }; }); diff --git a/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts b/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts index 0065066455475..89a2b834bf906 100644 --- a/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts +++ b/x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.ts @@ -652,7 +652,6 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { } try { - const elserId = ASSISTANT_ELSER_INFERENCE_ID; const userFilter = getKBUserFilter(user); const results = await this.findDocuments({ // Note: This is a magic number to set some upward bound as to not blow the context with too @@ -682,7 +681,6 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient { indexEntry, esClient, logger: this.options.logger, - elserId, }); }) );