Skip to content

Commit

Permalink
Fix embeddings not being returned in Azure AI Search memory (#867)
Browse files Browse the repository at this point in the history
## Motivation and Context (Why the change? What's the scenario?)

The `withEmbeddings` parameter for the `GetSimilarListAsync` and
`GetListAsync` methods on the `AzureAISearchMemory` class are not being
applied because the `vector` field in the search index is not
retrievable.

## High level description (Approach, Design)

Ensure that the index is created with the vector field as retrievable so
the option can be applied.

- Add a `VectorSearchField` to the schema with the `IsHidden` and
`IsStored` property values set
- Create a private method that sets the `Select` fields on the
`SearchOptions` to ensure only needed fields are returned

---------

Co-authored-by: Devis Lucato <[email protected]>
Co-authored-by: Devis Lucato <[email protected]>
  • Loading branch information
3 people authored Oct 30, 2024
1 parent e5adeeb commit b00d1a0
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
26 changes: 17 additions & 9 deletions extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ await client.IndexDocumentsAsync(
FilterMode = VectorFilterMode.PreFilter
}
};
DefineFieldsToSelect(options, withEmbeddings);

if (limit > 0)
{
Expand Down Expand Up @@ -253,6 +254,7 @@ public async IAsyncEnumerable<MemoryRecord> GetListAsync(
var client = this.GetSearchClient(index);

SearchOptions options = new();
DefineFieldsToSelect(options, withEmbeddings);

if (limit > 0)
{
Expand Down Expand Up @@ -515,7 +517,7 @@ private SearchIndex PrepareIndexSchema(string index, MemoryDbSchema schema)
* - searchable: Full-text searchable, subject to lexical analysis such as word-breaking during indexing.
* - filterable: Filterable fields of type Edm.String or Collection(Edm.String) don't undergo word-breaking.
* - facetable: Used for counting. Fields of type Edm.String that are filterable, "sortable", or "facetable" can be at most 32kb. */
SearchField? vectorField = null;
VectorSearchField? vectorField = null;
foreach (var field in schema.Fields)
{
switch (field.Type)
Expand All @@ -525,15 +527,10 @@ private SearchIndex PrepareIndexSchema(string index, MemoryDbSchema schema)
throw new AzureAISearchMemoryException($"Unsupported field type {field.Type:G}");

case MemoryDbField.FieldType.Vector:
vectorField = new SearchField(field.Name, SearchFieldDataType.Collection(SearchFieldDataType.Single))
vectorField = new VectorSearchField(field.Name, field.VectorSize, VectorSearchProfileName)
{
IsKey = false,
IsFilterable = false,
IsSearchable = true,
IsFacetable = false,
IsSortable = false,
VectorSearchDimensions = field.VectorSize,
VectorSearchProfileName = VectorSearchProfileName,
IsHidden = false,
IsStored = true
};

break;
Expand Down Expand Up @@ -630,6 +627,17 @@ at Azure.Search.Documents.SearchClient.SearchInternal[T](SearchOptions options,
return indexSchema;
}

private static void DefineFieldsToSelect(SearchOptions options, bool withEmbeddings)
{
options.Select.Add(AzureAISearchMemoryRecord.IdField);
options.Select.Add(AzureAISearchMemoryRecord.TagsField);
options.Select.Add(AzureAISearchMemoryRecord.PayloadField);
if (withEmbeddings)
{
options.Select.Add(AzureAISearchMemoryRecord.VectorField);
}
}

private static double ScoreToCosineSimilarity(double score)
{
return 2 - (1 / score);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ internal sealed class AzureAISearchMemoryRecord
{
internal const string IdField = "id";
internal const string VectorField = "embedding";
private const string TagsField = "tags";
private const string PayloadField = "payload";
internal const string TagsField = "tags";
internal const string PayloadField = "payload";

private static readonly JsonSerializerOptions s_jsonOptions = new()
{
Expand Down

0 comments on commit b00d1a0

Please sign in to comment.