Skip to content

Commit

Permalink
.Net: Added implementation of SQLite connector for new memory design (#…
Browse files Browse the repository at this point in the history
…9164)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

Related: #8089

In this PR:
- Implemented `IVectorStore`
- Implemented `IVectorizedSearch`
- Implemented `IVectorStoreRecordCollection<TKey, TRecord>`
- SQLite default record mapper
- SQLite generic data model mapper
- `Options` classes
- Extension methods for DI
- Integration tests  
- Unit tests (in progress)

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone 😄

---------

Co-authored-by: Mark Wallace <[email protected]>
Co-authored-by: westey <[email protected]>
Co-authored-by: Weihan Li <[email protected]>
Co-authored-by: Roger Barreto <[email protected]>
  • Loading branch information
5 people authored Oct 17, 2024
1 parent dbf1819 commit e9499b6
Show file tree
Hide file tree
Showing 38 changed files with 5,204 additions and 1 deletion.
11 changes: 11 additions & 0 deletions dotnet/SK-dotnet.sln
Original file line number Diff line number Diff line change
Expand Up @@ -361,16 +361,20 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Process.UnitTests", "src\Ex
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "GettingStartedWithProcesses", "samples\GettingStartedWithProcesses\GettingStartedWithProcesses.csproj", "{C057ACDF-DDD8-496B-BAF9-1C6E4E1248D7}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.Sqlite.UnitTests", "src\Connectors\Connectors.Sqlite.UnitTests\Connectors.Sqlite.UnitTests.csproj", "{0846F178-7EC3-4FE6-AC5C-7146E9FD9A3E}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VectorData.Abstractions", "src\Connectors\VectorData.Abstractions\VectorData.Abstractions.csproj", "{CF09AE39-67BA-4FE0-A0CD-A49BED956311}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.Memory.InMemory", "src\Connectors\Connectors.Memory.InMemory\Connectors.Memory.InMemory.csproj", "{265B8B6E-B9C4-44C7-BE96-59750F9A1DC7}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "MemoryUnitTests", "MemoryUnitTests", "{5A7028A7-4DDF-4E4F-84A9-37CE8F8D7E89}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.InMemory.UnitTests", "src\Connectors\Connectors.InMemory.UnitTests\Connectors.InMemory.UnitTests.csproj", "{A0B91C05-B3A6-436B-8F49-3427EC1CFC42}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "GettingStartedWithTextSearch", "samples\GettingStartedWithTextSearch\GettingStartedWithTextSearch.csproj", "{16AFA226-E417-490D-9311-9F2099A1EEC8}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VectorStoreRAG", "samples\Demos\VectorStoreRAG\VectorStoreRAG.csproj", "{28DFAF27-8FF3-4373-AAA4-2A6969C86246}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Process.Runtime.Dapr", "src\Experimental\Process.Runtime.Dapr\Process.Runtime.Dapr.csproj", "{9D5B4B53-0E97-42D9-B37E-CD263B6A1892}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProcessWithDapr", "samples\Demos\ProcessWithDapr\ProcessWithDapr.csproj", "{95163AA2-1ED5-412A-990B-C40B81934BFD}"
Expand Down Expand Up @@ -940,6 +944,12 @@ Global
{C057ACDF-DDD8-496B-BAF9-1C6E4E1248D7}.Publish|Any CPU.Build.0 = Debug|Any CPU
{C057ACDF-DDD8-496B-BAF9-1C6E4E1248D7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C057ACDF-DDD8-496B-BAF9-1C6E4E1248D7}.Release|Any CPU.Build.0 = Release|Any CPU
{0846F178-7EC3-4FE6-AC5C-7146E9FD9A3E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{0846F178-7EC3-4FE6-AC5C-7146E9FD9A3E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0846F178-7EC3-4FE6-AC5C-7146E9FD9A3E}.Publish|Any CPU.ActiveCfg = Debug|Any CPU
{0846F178-7EC3-4FE6-AC5C-7146E9FD9A3E}.Publish|Any CPU.Build.0 = Debug|Any CPU
{0846F178-7EC3-4FE6-AC5C-7146E9FD9A3E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0846F178-7EC3-4FE6-AC5C-7146E9FD9A3E}.Release|Any CPU.Build.0 = Release|Any CPU
{CF09AE39-67BA-4FE0-A0CD-A49BED956311}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{CF09AE39-67BA-4FE0-A0CD-A49BED956311}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CF09AE39-67BA-4FE0-A0CD-A49BED956311}.Publish|Any CPU.ActiveCfg = Publish|Any CPU
Expand Down Expand Up @@ -1110,6 +1120,7 @@ Global
{27AF60D6-86F5-4591-A700-4F8C93F41B11} = {0D8C6358-5DAA-4EA6-A924-C268A9A21BC9}
{21A32285-8443-4A75-B2E8-27E6090EC562} = {0D8C6358-5DAA-4EA6-A924-C268A9A21BC9}
{C057ACDF-DDD8-496B-BAF9-1C6E4E1248D7} = {FA3720F1-C99A-49B2-9577-A940257098BF}
{0846F178-7EC3-4FE6-AC5C-7146E9FD9A3E} = {5A7028A7-4DDF-4E4F-84A9-37CE8F8D7E89}
{CF09AE39-67BA-4FE0-A0CD-A49BED956311} = {24503383-A8C4-4255-9998-28D70FE8E99A}
{265B8B6E-B9C4-44C7-BE96-59750F9A1DC7} = {24503383-A8C4-4255-9998-28D70FE8E99A}
{5A7028A7-4DDF-4E4F-84A9-37CE8F8D7E89} = {0247C2C9-86C3-45BA-8873-28B0948EDC0C}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;

namespace Microsoft.SemanticKernel.Connectors.Sqlite;

internal abstract class SqliteWhereCondition(string operand, List<object> values)
{
public string Operand { get; set; } = operand;

public List<object> Values { get; set; } = values;

public string? TableName { get; set; }

public abstract string BuildQuery(List<string> parameterNames);

protected string GetOperand() => !string.IsNullOrWhiteSpace(this.TableName) ?
$"{this.TableName}.{this.Operand}" :
this.Operand;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;

namespace Microsoft.SemanticKernel.Connectors.Sqlite;

internal sealed class SqliteWhereEqualsCondition(string operand, object value)
: SqliteWhereCondition(operand, [value])
{
public override string BuildQuery(List<string> parameterNames)
{
const string EqualsOperator = "=";

Verify.True(parameterNames.Count > 0, $"Cannot build '{nameof(SqliteWhereEqualsCondition)}' condition without parameter name.");

return $"{this.GetOperand()} {EqualsOperator} {parameterNames[0]}";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;

namespace Microsoft.SemanticKernel.Connectors.Sqlite;

internal sealed class SqliteWhereInCondition(string operand, List<object> values)
: SqliteWhereCondition(operand, values)
{
public override string BuildQuery(List<string> parameterNames)
{
const string InOperator = "IN";

Verify.True(parameterNames.Count > 0, $"Cannot build '{nameof(SqliteWhereInCondition)}' condition without parameter names.");

return $"{this.GetOperand()} {InOperator} ({string.Join(", ", parameterNames)})";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;

namespace Microsoft.SemanticKernel.Connectors.Sqlite;

internal sealed class SqliteWhereMatchCondition(string operand, object value)
: SqliteWhereCondition(operand, [value])
{
public override string BuildQuery(List<string> parameterNames)
{
const string MatchOperator = "MATCH";

Verify.True(parameterNames.Count > 0, $"Cannot build '{nameof(SqliteWhereMatchCondition)}' condition without parameter name.");

return $"{this.GetOperand()} {MatchOperator} {parameterNames[0]}";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<AssemblyName>Microsoft.SemanticKernel.Connectors.Sqlite</AssemblyName>
<RootNamespace>$(AssemblyName)</RootNamespace>
<TargetFrameworks>net8.0;netstandard2.0</TargetFrameworks>
<VersionSuffix>alpha</VersionSuffix>
<VersionSuffix>preview</VersionSuffix>
</PropertyGroup>

<!-- IMPORT NUGET PACKAGE SHARED PROPERTIES -->
Expand All @@ -27,4 +27,8 @@
<ProjectReference Include="..\..\SemanticKernel.Core\SemanticKernel.Core.csproj" />
</ItemGroup>

<ItemGroup>
<InternalsVisibleTo Include="SemanticKernel.Connectors.Sqlite.UnitTests" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Data.Common;
using Microsoft.Extensions.VectorData;

namespace Microsoft.SemanticKernel.Connectors.Sqlite;

/// <summary>
/// Interface for constructing <see cref="IVectorStoreRecordCollection{TKey, TRecord}"/> SQLite instances when using <see cref="IVectorStore"/> to retrieve these.
/// </summary>
public interface ISqliteVectorStoreRecordCollectionFactory
{
/// <summary>
/// Constructs a new instance of the <see cref="IVectorStoreRecordCollection{TKey, TRecord}"/>.
/// </summary>
/// <typeparam name="TKey">The data type of the record key.</typeparam>
/// <typeparam name="TRecord">The data model to use for adding, updating and retrieving data from storage.</typeparam>
/// <param name="connection"><see cref="DbConnection"/> that will be used to manage the data in SQLite.</param>
/// <param name="name">The name of the collection to connect to.</param>
/// <param name="vectorStoreRecordDefinition">An optional record definition that defines the schema of the record type. If not present, attributes on <typeparamref name="TRecord"/> will be used.</param>
/// <returns>The new instance of <see cref="IVectorStoreRecordCollection{TKey, TRecord}"/>.</returns>
IVectorStoreRecordCollection<TKey, TRecord> CreateVectorStoreRecordCollection<TKey, TRecord>(
DbConnection connection,
string name,
VectorStoreRecordDefinition? vectorStoreRecordDefinition)
where TKey : notnull;
}
22 changes: 22 additions & 0 deletions dotnet/src/Connectors/Connectors.Memory.Sqlite/SqliteColumn.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Collections.Generic;

namespace Microsoft.SemanticKernel.Connectors.Sqlite;

/// <summary>
/// Representation of SQLite column.
/// </summary>
internal sealed class SqliteColumn(
string name,
string type,
bool isPrimary)
{
public string Name { get; set; } = name;

public string Type { get; set; } = type;

public bool IsPrimary { get; set; } = isPrimary;

public Dictionary<string, object>? Configuration { get; set; }
}
54 changes: 54 additions & 0 deletions dotnet/src/Connectors/Connectors.Memory.Sqlite/SqliteConstants.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;

namespace Microsoft.SemanticKernel.Connectors.Sqlite;

internal static class SqliteConstants
{
/// <summary>
/// SQLite extension name for vector search.
/// More information here: <see href="https://github.com/asg017/sqlite-vec"/>.
/// </summary>
public const string VectorSearchExtensionName = "vec0";

/// <summary>A <see cref="HashSet{T}"/> of types that a key on the provided model may have.</summary>
public static readonly HashSet<Type> SupportedKeyTypes =
[
typeof(ulong),
typeof(string)
];

/// <summary>A <see cref="HashSet{T}"/> of types that data properties on the provided model may have.</summary>
public static readonly HashSet<Type> SupportedDataTypes =
[
typeof(int),
typeof(int?),
typeof(long),
typeof(long?),
typeof(ulong),
typeof(ulong?),
typeof(short),
typeof(short?),
typeof(ushort),
typeof(ushort?),
typeof(string),
typeof(bool),
typeof(bool?),
typeof(float),
typeof(float?),
typeof(double),
typeof(double?),
typeof(decimal),
typeof(decimal?),
typeof(byte[]),
];

/// <summary>A <see cref="HashSet{T}"/> of types that vector properties on the provided model may have.</summary>
public static readonly HashSet<Type> SupportedVectorTypes =
[
typeof(ReadOnlyMemory<float>),
typeof(ReadOnlyMemory<float>?)
];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using Microsoft.Extensions.VectorData;

namespace Microsoft.SemanticKernel.Connectors.Sqlite;

/// <summary>
/// A mapper that maps between the generic Semantic Kernel data model and the model that the data is stored under, within SQLite.
/// </summary>
internal sealed class SqliteGenericDataModelMapper :
IVectorStoreRecordMapper<VectorStoreGenericDataModel<ulong>, Dictionary<string, object?>>,
IVectorStoreRecordMapper<VectorStoreGenericDataModel<string>, Dictionary<string, object?>>
{
/// <summary><see cref="VectorStoreRecordPropertyReader"/> with helpers for reading vector store model properties and their attributes.</summary>
private readonly VectorStoreRecordPropertyReader _propertyReader;

/// <summary>
/// Initializes a new instance of the <see cref="SqliteGenericDataModelMapper"/> class.
/// </summary>
/// <param name="propertyReader">A <see cref="VectorStoreRecordDefinition"/> that defines the schema of the data in the database.</param>
public SqliteGenericDataModelMapper(VectorStoreRecordPropertyReader propertyReader)
{
Verify.NotNull(propertyReader);

this._propertyReader = propertyReader;

// Validate property types.
this._propertyReader.VerifyDataProperties(SqliteConstants.SupportedDataTypes, supportEnumerable: false);
this._propertyReader.VerifyVectorProperties(SqliteConstants.SupportedVectorTypes);
}

#region Implementation of IVectorStoreRecordMapper<VectorStoreGenericDataModel<string>, Dictionary<string, object?>>

public Dictionary<string, object?> MapFromDataToStorageModel(VectorStoreGenericDataModel<string> dataModel)
{
return this.InternalMapFromDataToStorageModel(dataModel);
}

public VectorStoreGenericDataModel<string> MapFromStorageToDataModel(Dictionary<string, object?> storageModel, StorageToDataModelMapperOptions options)
{
return this.InternalMapFromStorageToDataModel<string>(storageModel, options);
}

#endregion

#region Implementation of IVectorStoreRecordMapper<VectorStoreGenericDataModel<ulong>, Dictionary<string, object?>>

public Dictionary<string, object?> MapFromDataToStorageModel(VectorStoreGenericDataModel<ulong> dataModel)
{
return this.InternalMapFromDataToStorageModel(dataModel);
}

VectorStoreGenericDataModel<ulong> IVectorStoreRecordMapper<VectorStoreGenericDataModel<ulong>, Dictionary<string, object?>>.MapFromStorageToDataModel(Dictionary<string, object?> storageModel, StorageToDataModelMapperOptions options)
{
return this.InternalMapFromStorageToDataModel<ulong>(storageModel, options);
}

#endregion

#region private

private Dictionary<string, object?> InternalMapFromDataToStorageModel<TKey>(VectorStoreGenericDataModel<TKey> dataModel)
where TKey : notnull
{
var properties = new Dictionary<string, object?>
{
// Add key property
{ this._propertyReader.KeyPropertyStoragePropertyName, dataModel.Key }
};

// Add data properties
if (dataModel.Data is not null)
{
foreach (var property in this._propertyReader.DataProperties)
{
if (dataModel.Data.TryGetValue(property.DataModelPropertyName, out var dataValue))
{
properties.Add(this._propertyReader.GetStoragePropertyName(property.DataModelPropertyName), dataValue);
}
}
}

// Add vector properties
if (dataModel.Vectors is not null)
{
foreach (var property in this._propertyReader.VectorProperties)
{
if (dataModel.Vectors.TryGetValue(property.DataModelPropertyName, out var vectorValue))
{
object? result = null;

if (vectorValue is not null)
{
var vector = (ReadOnlyMemory<float>)vectorValue;
result = SqliteVectorStoreRecordPropertyMapping.MapVectorForStorageModel(vector);
}

properties.Add(this._propertyReader.GetStoragePropertyName(property.DataModelPropertyName), result);
}
}
}

return properties;
}

private VectorStoreGenericDataModel<TKey> InternalMapFromStorageToDataModel<TKey>(Dictionary<string, object?> storageModel, StorageToDataModelMapperOptions options)
where TKey : notnull
{
TKey key;
var dataProperties = new Dictionary<string, object?>();
var vectorProperties = new Dictionary<string, object?>();

// Process key property.
if (storageModel.TryGetValue(this._propertyReader.KeyPropertyStoragePropertyName, out var keyObject) && keyObject is not null)
{
key = (TKey)keyObject;
}
else
{
throw new VectorStoreRecordMappingException("No key property was found in the record retrieved from storage.");
}

// Process data properties.
foreach (var property in this._propertyReader.DataProperties)
{
if (storageModel.TryGetValue(this._propertyReader.GetStoragePropertyName(property.DataModelPropertyName), out var dataValue))
{
dataProperties.Add(property.DataModelPropertyName, dataValue);
}
}

// Process vector properties
if (options.IncludeVectors)
{
foreach (var property in this._propertyReader.VectorProperties)
{
if (storageModel.TryGetValue(this._propertyReader.GetStoragePropertyName(property.DataModelPropertyName), out var vectorValue) &&
vectorValue is byte[] vectorBytes)
{
var vector = SqliteVectorStoreRecordPropertyMapping.MapVectorForDataModel(vectorBytes);
vectorProperties.Add(property.DataModelPropertyName, vector);
}
}
}

return new VectorStoreGenericDataModel<TKey>(key) { Data = dataProperties, Vectors = vectorProperties };
}

#endregion
}
Loading

0 comments on commit e9499b6

Please sign in to comment.