Skip to content

Commit

Permalink
[Security Assistant] Migrate semantic_text to use highlighter instead…
Browse files Browse the repository at this point in the history
… of inner_hits (elastic#204962)

## Summary

Switch to use elastic/elasticsearch#118064 when
retrieving Knowledge base Index entry docs

Followed testing instructions from
elastic#198020

Results:
<img width="1498" alt="Zrzut ekranu 2024-12-19 o 16 32 28"
src="https://github.com/user-attachments/assets/a16bf729-ac30-4ea7-9b11-6e9ecca842dc"
/>

<img width="1495" alt="Zrzut ekranu 2024-12-19 o 16 32 38"
src="https://github.com/user-attachments/assets/016c08c3-9865-4461-86a5-638e9559b202"
/>

<img width="1502" alt="Zrzut ekranu 2024-12-19 o 16 32 43"
src="https://github.com/user-attachments/assets/37a14a2d-191d-420c-940d-1de649e082fd"
/>

<img width="1491" alt="Zrzut ekranu 2024-12-19 o 16 32 47"
src="https://github.com/user-attachments/assets/e2be1e95-6fc8-4149-b1ff-2e8b8a9a0a8d"
/>

<img width="1494" alt="Zrzut ekranu 2024-12-19 o 16 32 50"
src="https://github.com/user-attachments/assets/38b17f44-e349-46ab-8069-80d1a3fd42ae"
/>

(cherry picked from commit 5539000)
  • Loading branch information
patrykkopycinski committed Jan 10, 2025
1 parent ac4699b commit 64169e0
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ describe('getStructuredToolForIndexEntry', () => {
indexEntry: mockIndexEntry,
esClient: mockEsClient,
logger: mockLogger,
elserId: 'elser123',
});

expect(tool).toBeInstanceOf(DynamicStructuredTool);
Expand All @@ -181,15 +180,8 @@ describe('getStructuredToolForIndexEntry', () => {
field1: 'value1',
field2: 2,
},
inner_hits: {
'test.test': {
hits: {
hits: [
{ _source: { text: 'Inner text 1' } },
{ _source: { text: 'Inner text 2' } },
],
},
},
highlight: {
test: ['Inner text 1', 'Inner text 2'],
},
},
],
Expand All @@ -202,7 +194,6 @@ describe('getStructuredToolForIndexEntry', () => {
indexEntry: mockIndexEntry,
esClient: mockEsClient,
logger: mockLogger,
elserId: 'elser123',
});

const input = { query: 'testQuery', field1: 'value1', field2: 2 };
Expand All @@ -220,7 +211,6 @@ describe('getStructuredToolForIndexEntry', () => {
indexEntry: mockIndexEntry,
esClient: mockEsClient,
logger: mockLogger,
elserId: 'elser123',
});

const input = { query: 'testQuery', field1: 'value1', field2: 2 };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
*/

import { z } from '@kbn/zod';
import { get } from 'lodash';
import { DynamicStructuredTool } from '@langchain/core/tools';
import { errors } from '@elastic/elasticsearch';
import { QueryDslQueryContainer, SearchRequest } from '@elastic/elasticsearch/lib/api/types';
Expand Down Expand Up @@ -140,12 +139,10 @@ export const getStructuredToolForIndexEntry = ({
indexEntry,
esClient,
logger,
elserId,
}: {
indexEntry: IndexEntry;
esClient: ElasticsearchClient;
logger: Logger;
elserId: string;
}): DynamicStructuredTool => {
const inputSchema = indexEntry.inputSchema?.reduce((prev, input) => {
const fieldType =
Expand Down Expand Up @@ -182,28 +179,27 @@ export const getStructuredToolForIndexEntry = ({
const params: SearchRequest = {
index: indexEntry.index,
size: 10,
retriever: {
standard: {
query: {
nested: {
path: `${indexEntry.field}.inference.chunks`,
query: {
sparse_vector: {
inference_id: elserId,
field: `${indexEntry.field}.inference.chunks.embeddings`,
query: input.query,
},
},
inner_hits: {
size: 2,
name: `${indexEntry.name}.${indexEntry.field}`,
_source: [`${indexEntry.field}.inference.chunks.text`],
query: {
bool: {
must: [
{
semantic: {
field: indexEntry.field,
query: input.query,
},
},
},
],
filter,
},
},
highlight: {
fields: {
[indexEntry.field]: {
type: 'semantic',
number_of_fragments: 2,
},
},
},
};

try {
Expand All @@ -217,18 +213,8 @@ export const getStructuredToolForIndexEntry = ({
}, {});
}

// We want to send relevant inner hits (chunks) to the LLM as a context
const innerHitPath = `${indexEntry.name}.${indexEntry.field}`;
if (hit.inner_hits?.[innerHitPath]) {
return {
text: hit.inner_hits[innerHitPath].hits.hits
.map((innerHit) => innerHit._source.text)
.join('\n --- \n'),
};
}

return {
text: get(hit._source, `${indexEntry.field}.inference.chunks[0].text`),
text: hit.highlight?.[indexEntry.field].join('\n --- \n'),
};
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,6 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
}

try {
const elserId = ASSISTANT_ELSER_INFERENCE_ID;
const userFilter = getKBUserFilter(user);
const results = await this.findDocuments<EsIndexEntry>({
// Note: This is a magic number to set some upward bound as to not blow the context with too
Expand Down Expand Up @@ -682,7 +681,6 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
indexEntry,
esClient,
logger: this.options.logger,
elserId,
});
})
);
Expand Down

0 comments on commit 64169e0

Please sign in to comment.