Skip to content

Commit

Permalink
Complete Index creation and deletion, test insert (#5)
Browse files Browse the repository at this point in the history
Update README including installation steps
Fix/Improve SQL statements
Update DI method names
Extend test application
Add build script
Upgrade to latest KM nuget
  • Loading branch information
dluc authored Dec 18, 2023
1 parent 8fc7850 commit 869416e
Show file tree
Hide file tree
Showing 14 changed files with 427 additions and 231 deletions.
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,6 @@ PublishScripts/
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
Expand Down Expand Up @@ -442,7 +440,6 @@ global.json
# doxfx
**/DROP/
**/TEMP/
**/packages/
**/bin/
**/obj/
_site
Expand Down
2 changes: 1 addition & 1 deletion Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
</PropertyGroup>
<ItemGroup>
<PackageVersion Include="Microsoft.KernelMemory.Abstractions" Version="0.22.231215.1" />
<PackageVersion Include="Microsoft.KernelMemory.Abstractions" Version="0.22.231216.1" />
<PackageVersion Include="Microsoft.Extensions.DependencyInjection" Version="8.0.0" />
<PackageVersion Include="Pgvector" Version="0.2.0" />
</ItemGroup>
Expand Down
1 change: 1 addition & 0 deletions KernelMemoryPostgres.sln
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "root", "root", "{6EF76FD8-4
code-analysis.props = code-analysis.props
nuget-package.props = nuget-package.props
NUGET.md = NUGET.md
build.sh = build.sh
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PostgresMemoryStorage", "PostgresMemoryStorage\PostgresMemoryStorage.csproj", "{239A6E9B-614F-4A17-9DBA-DAB54F89C703}"
Expand Down
16 changes: 8 additions & 8 deletions PostgresMemoryStorage/DependencyInjection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public static partial class KernelMemoryBuilderExtensions
/// <param name="config">Postgres configuration</param>
public static IKernelMemoryBuilder WithPostgres(this IKernelMemoryBuilder builder, PostgresConfig config)
{
builder.Services.AddPostgresAsVectorDb(config);
builder.Services.AddPostgresAsMemoryDb(config);
return builder;
}

Expand All @@ -28,7 +28,7 @@ public static IKernelMemoryBuilder WithPostgres(this IKernelMemoryBuilder builde
/// <param name="connString">Postgres connection string</param>
public static IKernelMemoryBuilder WithPostgres(this IKernelMemoryBuilder builder, string connString)
{
builder.Services.AddPostgresAsVectorDb(connString);
builder.Services.AddPostgresAsMemoryDb(connString);
return builder;
}
}
Expand All @@ -39,25 +39,25 @@ public static IKernelMemoryBuilder WithPostgres(this IKernelMemoryBuilder builde
public static partial class DependencyInjection
{
/// <summary>
/// Inject Postgres as the default implementation of IVectorDb
/// Inject Postgres as the default implementation of IMemoryDb
/// </summary>
/// <param name="services">Service collection</param>
/// <param name="config">Postgres configuration</param>
public static IServiceCollection AddPostgresAsVectorDb(this IServiceCollection services, PostgresConfig config)
public static IServiceCollection AddPostgresAsMemoryDb(this IServiceCollection services, PostgresConfig config)
{
return services
.AddSingleton<PostgresConfig>(config)
.AddSingleton<IMemoryDb, PostgresMemory>();
}

/// <summary>
/// Inject Postgres as the default implementation of IVectorDb
/// Inject Postgres as the default implementation of IMemoryDb
/// </summary>
/// <param name="services">Service collection</param>
/// <param name="connString">Postgres connection string</param>
public static IServiceCollection AddPostgresAsVectorDb(this IServiceCollection services, string connString)
public static IServiceCollection AddPostgresAsMemoryDb(this IServiceCollection services, string connString)
{
var config = new PostgresConfig { ConnString = connString };
return services.AddPostgresAsVectorDb(config);
var config = new PostgresConfig { ConnectionString = connString };
return services.AddPostgresAsMemoryDb(config);
}
}
2 changes: 1 addition & 1 deletion PostgresMemoryStorage/PostgresConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class PostgresConfig
/// <summary>
/// Connection string required to connect to Postgres
/// </summary>
public string ConnString { get; set; } = string.Empty;
public string ConnectionString { get; set; } = string.Empty;

/// <summary>
/// Name of the schema where to read and write records.
Expand Down
307 changes: 105 additions & 202 deletions PostgresMemoryStorage/PostgresDbClient.cs

Large diffs are not rendered by default.

15 changes: 11 additions & 4 deletions PostgresMemoryStorage/PostgresMemory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -41,7 +42,7 @@ public PostgresMemory(
throw new PostgresException("Embedding generator not configured");
}

this._db = new PostgresDbClient(config.ConnString, config.Schema);
this._db = new PostgresDbClient(config.ConnectionString, config.Schema);
}

/// <inheritdoc />
Expand All @@ -65,7 +66,7 @@ public async Task<IEnumerable<string>> GetIndexesAsync(
CancellationToken cancellationToken = default)
{
var result = new List<string>();
var tables = this._db.GetTablesAsync(cancellationToken).ConfigureAwait(false);
var tables = this._db.GetIndexTablesAsync(cancellationToken).ConfigureAwait(false);
await foreach (string name in tables)
{
result.Add(name);
Expand All @@ -75,13 +76,19 @@ public async Task<IEnumerable<string>> GetIndexesAsync(
}

/// <inheritdoc />
public Task DeleteIndexAsync(
public async Task DeleteIndexAsync(
string index,
CancellationToken cancellationToken = default)
{
index = NormalizeIndexName(index);

return this._db.DeleteTableAsync(index, cancellationToken);
// GetIndexes filters out tables that are not indexes
// to avoid deleting a table used for something else.
var list = await this.GetIndexesAsync(cancellationToken).ConfigureAwait(false);
if (list.Contains(index, StringComparer.OrdinalIgnoreCase))
{
await this._db.DeleteTableAsync(index, cancellationToken).ConfigureAwait(false);
}
}

/// <inheritdoc />
Expand Down
71 changes: 71 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Kernel Memory with Postgres

[//]: # ([![Nuget package]&#40;https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.Postgres&#41;]&#40;https://www.nuget.org/packages/Microsoft.KernelMemory.Postgres/&#41;)
[![License: MIT](https://img.shields.io/github/license/microsoft/kernel-memory)](https://github.com/microsoft/kernel-memory/blob/main/LICENSE)
[![Discord](https://img.shields.io/discord/1063152441819942922?label=Discord&logo=discord&logoColor=white&color=d82679)](https://aka.ms/SKDiscord)

Expand All @@ -8,3 +9,73 @@ is an open-source service and plugin specialized in the efficient indexing of da
through custom continuous data hybrid pipelines.

This repository contains the Postgres adapter allowing to use Kernel Memory with Postgres.

To use Postgres with Kernel Memory:

1. Verify your Postgres instance supports vectors, e.g. run `SELECT * FROM pg_extension`

[//]: # (2. install the [Microsoft.KernelMemory.Postgres]&#40;https://www.nuget.org/packages/Microsoft.KernelMemory.Postgres&#41; package)

2. add to appsettings.json (or appsettings.development.json) Postgres connection string, for example:

```json
{
"KernelMemory": {
"Services": {
"Postgres": {
"ConnectionString": "Host=localhost;Port=5432;Username=myuser;Password=mypassword"
}
}
}
}
```
3. configure KM builder to store memories in Postgres, for example:
```csharp
// using Microsoft.KernelMemory;
// using Microsoft.KernelMemory.Postgres;
// using Microsoft.Extensions.Configuration;

var postgresConfig = new PostgresConfig();

new ConfigurationBuilder()
.AddJsonFile("appsettings.json")
.AddJsonFile("appsettings.Development.json", optional: true)
.Build()
.BindSection("KernelMemory:Services:Postgres", postgresConfig);

var memory = new KernelMemoryBuilder()
.WithPostgres(postgresConfig)
.WithAzureOpenAITextGeneration(azureOpenAIConfig)
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIConfig)
.Build();
```

## Neighbor search indexes, quality and performance

The connector does not create IVFFlat or HNSW indexes on Postgres tables, and uses exact nearest neighbor search.

Depending on your scenario you might want to create these indexes manually, considering precision and performance
trade-offs. We welcome PRs to make this aspect configurable.

> An **IVFFlat** index divides vectors into lists, and then searches a subset of those lists that are closest to the
> query vector. It has **faster build times** and uses **less memory** than HNSW, but has **lower query performance**
> (in terms of speed-recall tradeoff).

> An **HNSW** index creates a multilayer graph. It has **slower build times** and uses **more memory** than IVFFlat,
> but has **better query performance** (in terms of speed-recall tradeoff). There’s no training step like IVFFlat, so
> the index can be created without any data in the table.

See https://github.com/pgvector/pgvector for more information.

## Memory Indexes and Postgres tables

The Postgres memory connector will create "memory indexes" automatically, one DB table for each memory index.

Tables have one hard coded **comment** attached, used to filter out other tables that might be present.
Tables without such comment are ignored when **Listing Tables** (Memory Index List) and when **Deleting Tables**
(Delete Memory Index), to avoid leaking extraneous tables, or deleting them.

However, Insert and Update operations do not check for such comment, so there's a small risk of attempting to read
records or write records into extraneous tables in the DB, which would most likely result in errors.

Overall we recommend not mixing external tables in the same DB used for Kernel Memory.
4 changes: 4 additions & 0 deletions TestApplication/.editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[*.cs]
dotnet_diagnostic.CA2007.severity = none # no need of ConfigureAwait(false) in examples
dotnet_diagnostic.CA1303.severity = none # Passing literal strings as values
resharper_inconsistent_naming_highlighting = none
54 changes: 43 additions & 11 deletions TestApplication/Program.cs
Original file line number Diff line number Diff line change
@@ -1,33 +1,65 @@
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.Extensions.Configuration;
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.Postgres;

namespace TestApplication;

internal class Program
{
public static void Main(string[] args)
public static async Task Main(string[] args)
{
var postgresConfig = new PostgresConfig();
var azureOpenAIEmbeddingConfig = new AzureOpenAIConfig();
var azureOpenAITextConfig = new AzureOpenAIConfig();

new ConfigurationBuilder()
.AddJsonFile("appsettings.json")
.AddJsonFile("appsettings.Development.json", optional: true)
.Build()
.BindSection("KernelMemory:Services:Postgres", postgresConfig)
.BindSection("KernelMemory:Services:AzureOpenAIEmbedding", azureOpenAIEmbeddingConfig)
.BindSection("KernelMemory:Services:AzureOpenAIText", azureOpenAITextConfig);

// Concatenate our 'WithPostgres()' after 'WithOpenAIDefaults()' from the core nuget
var test1 = new KernelMemoryBuilder()
.WithOpenAIDefaults("api key")
.WithPostgres("conn string")
var mem1 = new KernelMemoryBuilder()
.WithAzureOpenAITextGeneration(azureOpenAITextConfig)
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig)
.WithPostgres(postgresConfig)
.Build();

// Concatenate our 'WithPostgres()' before 'WithOpenAIDefaults()' from the core nuget
var test2 = new KernelMemoryBuilder()
.WithPostgres("conn string")
.WithOpenAIDefaults("api key")
var mem2 = new KernelMemoryBuilder()
.WithPostgres(postgresConfig)
.WithAzureOpenAITextGeneration(azureOpenAITextConfig)
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig)
.Build();

// Concatenate our 'WithPostgres()' before and after KM builder extension methods from the core nuget
var test3 = new KernelMemoryBuilder()
var mem3 = new KernelMemoryBuilder()
.WithSimpleFileStorage()
.WithPostgres("conn string")
.WithOpenAIDefaults("api key")
.WithAzureOpenAITextGeneration(azureOpenAITextConfig)
.WithPostgres(postgresConfig)
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig)
.Build();

Console.WriteLine("Test complete");
await mem1.DeleteIndexAsync("index1");
await mem2.DeleteIndexAsync("index2");
await mem3.DeleteIndexAsync("index3");

await mem1.ImportTextAsync("this is a test 1", index: "index1");
await mem1.ImportTextAsync("this is a test 2", index: "index2");
await mem1.ImportTextAsync("this is a test 3", index: "index3");

foreach (var s in await mem1.ListIndexesAsync())
{
Console.WriteLine(s.Name);
}

await mem1.DeleteIndexAsync("index2");
await mem3.DeleteIndexAsync("index3");

Console.WriteLine("\n=== Test complete ===");
}
}
14 changes: 13 additions & 1 deletion TestApplication/TestApplication.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,34 @@
<Nullable>enable</Nullable>
<ManagePackageVersionsCentrally>false</ManagePackageVersionsCentrally>
<NoWarn>CA1303,CA1852</NoWarn>
<RestoreNoCache>true</RestoreNoCache>
</PropertyGroup>

<ItemGroup>
<!-- This packages is built locally for demo purpose, see nuget.config -->
<PackageReference Include="Microsoft.KernelMemory.Postgres" Version="0.1.0"/>
</ItemGroup>

<!-- <ItemGroup>-->
<!-- <ProjectReference Include="..\PostgresMemoryStorage\PostgresMemoryStorage.csproj"/>-->
<!-- </ItemGroup>-->

<ItemGroup>
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.22.231215.1" />
<PackageReference Include="Microsoft.KernelMemory.Core" Version="0.22.231216.1" />
</ItemGroup>

<ItemGroup>
<None Remove="appsettings.json"/>
<Content Include="appsettings.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
<None Remove="appsettings.development.json"/>
<Content Include="appsettings.development.json" Condition="Exists('appsettings.development.json')">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
<Content Update="appsettings.development.json">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
</ItemGroup>

</Project>
Loading

0 comments on commit 869416e

Please sign in to comment.