Skip to content

.Net: IVectorStore implementation for Azure SQL #10623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
160e08e
add new test project, move the existing tests to it
adamsitnik Feb 11, 2025
186fda2
port existing tests to Testcontainers.MsSql and re-enable them
adamsitnik Feb 11, 2025
22495b9
Revert "port existing tests to Testcontainers.MsSql and re-enable them"
adamsitnik Feb 12, 2025
3c3fd4a
Merge remote-tracking branch 'upstream/feature-vector-data-preb1' int…
adamsitnik Feb 12, 2025
179f56e
implement the tests using the new pattern, provide implementation tha…
adamsitnik Feb 12, 2025
486d028
implement collection removal, existence check and creation
adamsitnik Feb 14, 2025
a41cac4
implement record insert and update (upsert)
adamsitnik Feb 16, 2025
6dfb04a
implement delete operations
adamsitnik Feb 17, 2025
e905409
GetAsync and GetBatchAsync
adamsitnik Feb 18, 2025
7c212da
refactor
adamsitnik Feb 18, 2025
8d71b11
implement UpsertBatchAsync
adamsitnik Feb 18, 2025
7f18352
implement SelectTableNames, read the code again and add TODOs for thi…
adamsitnik Feb 19, 2025
32605da
ensure that parameter names are always valid
adamsitnik Feb 19, 2025
b4a73ee
add some comments
adamsitnik Feb 19, 2025
e8584be
support storing more types, support auto-generated keys
adamsitnik Feb 19, 2025
f397f3f
simplify: don't use a dedicated query for inserting a single record
adamsitnik Feb 19, 2025
ffc4b14
Merge remote-tracking branch 'upstream/feature-vector-data-preb1' int…
adamsitnik Feb 20, 2025
7c8d2dc
vector search
adamsitnik Feb 20, 2025
9e5ef1c
implement filtering by reusing a lot of code implemented by @roji
adamsitnik Feb 20, 2025
080811f
reduce code duplication
adamsitnik Feb 20, 2025
c17021e
skip some tests, some polishing
adamsitnik Feb 20, 2025
4669e91
remove a comment added by Copilot
adamsitnik Feb 20, 2025
ba0486f
Update dotnet/src/Connectors/VectorData.Abstractions/RecordAttributes…
adamsitnik Feb 24, 2025
5bdaa8e
address code review feedback:
adamsitnik Feb 24, 2025
1902c0b
address remaining feedback:
adamsitnik Feb 25, 2025
3081305
implement IndexKind support for SqlServer and fix it for PostgreSQL:
adamsitnik Feb 26, 2025
5b843aa
fix the build
adamsitnik Feb 26, 2025
8bb8aea
throw for null inputs, do nothing for empty ones
adamsitnik Feb 26, 2025
f76b573
address code review feedback:
adamsitnik Feb 28, 2025
c40f341
Update dotnet/src/Connectors/VectorData.Abstractions/RecordDefinition…
adamsitnik Feb 28, 2025
2fe49c0
Apply suggestions from code review
adamsitnik Mar 6, 2025
4639a17
Merge remote-tracking branch 'upstream/feature-vector-data-preb1' int…
adamsitnik Mar 6, 2025
0bdca76
address code review feedback:
adamsitnik Mar 6, 2025
88419c3
Merge remote-tracking branch 'upstream/feature-vector-data-preb1' int…
adamsitnik Mar 6, 2025
9ed18ab
remove AutoGenerate
adamsitnik Mar 7, 2025
c42c6cb
Apply suggestions from code review
adamsitnik Mar 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions dotnet/SK-dotnet.sln
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CosmosMongoDBIntegrationTes
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AzureAISearchIntegrationTests", "src\VectorDataIntegrationTests\AzureAISearchIntegrationTests\AzureAISearchIntegrationTests.csproj", "{06181F0F-A375-43AE-B45F-73CBCFC30C14}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SqlServerIntegrationTests", "src\VectorDataIntegrationTests\SqlServerIntegrationTests\SqlServerIntegrationTests.csproj", "{A5E6193C-8431-4C6E-B674-682CB41EAA0C}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -1102,12 +1104,6 @@ Global
{6F591D05-5F7F-4211-9042-42D8BCE60415}.Publish|Any CPU.Build.0 = Debug|Any CPU
{6F591D05-5F7F-4211-9042-42D8BCE60415}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6F591D05-5F7F-4211-9042-42D8BCE60415}.Release|Any CPU.Build.0 = Release|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Debug|Any CPU.Build.0 = Debug|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Publish|Any CPU.ActiveCfg = Debug|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Publish|Any CPU.Build.0 = Debug|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Release|Any CPU.ActiveCfg = Release|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Release|Any CPU.Build.0 = Release|Any CPU
{E82B640C-1704-430D-8D71-FD8ED3695468}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E82B640C-1704-430D-8D71-FD8ED3695468}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E82B640C-1704-430D-8D71-FD8ED3695468}.Publish|Any CPU.ActiveCfg = Debug|Any CPU
Expand All @@ -1126,6 +1122,12 @@ Global
{39EAB599-742F-417D-AF80-95F90376BB18}.Publish|Any CPU.Build.0 = Publish|Any CPU
{39EAB599-742F-417D-AF80-95F90376BB18}.Release|Any CPU.ActiveCfg = Release|Any CPU
{39EAB599-742F-417D-AF80-95F90376BB18}.Release|Any CPU.Build.0 = Release|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Debug|Any CPU.Build.0 = Debug|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Publish|Any CPU.ActiveCfg = Debug|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Publish|Any CPU.Build.0 = Debug|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Release|Any CPU.ActiveCfg = Release|Any CPU
{232E1153-6366-4175-A982-D66B30AAD610}.Release|Any CPU.Build.0 = Release|Any CPU
{DAC54048-A39A-4739-8307-EA5A291F2EA0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{DAC54048-A39A-4739-8307-EA5A291F2EA0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DAC54048-A39A-4739-8307-EA5A291F2EA0}.Publish|Any CPU.ActiveCfg = Debug|Any CPU
Expand Down Expand Up @@ -1198,6 +1200,12 @@ Global
{78785CB1-66CF-4895-D7E5-A440DD84BE86}.Publish|Any CPU.Build.0 = Debug|Any CPU
{78785CB1-66CF-4895-D7E5-A440DD84BE86}.Release|Any CPU.ActiveCfg = Release|Any CPU
{78785CB1-66CF-4895-D7E5-A440DD84BE86}.Release|Any CPU.Build.0 = Release|Any CPU
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Publish|Any CPU.ActiveCfg = Debug|Any CPU
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Publish|Any CPU.Build.0 = Debug|Any CPU
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Release|Any CPU.Build.0 = Release|Any CPU
{27D33AB3-4DFF-48BC-8D76-FB2CDF90B707}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{27D33AB3-4DFF-48BC-8D76-FB2CDF90B707}.Debug|Any CPU.Build.0 = Debug|Any CPU
{27D33AB3-4DFF-48BC-8D76-FB2CDF90B707}.Publish|Any CPU.ActiveCfg = Debug|Any CPU
Expand Down Expand Up @@ -1437,6 +1445,7 @@ Global
{A0E65043-6B00-4836-850F-000A52238914} = {4F381919-F1BE-47D8-8558-3187ED04A84F}
{11DFBF14-6FBA-41F0-B7F3-A288952D6FDB} = {4F381919-F1BE-47D8-8558-3187ED04A84F}
{06181F0F-A375-43AE-B45F-73CBCFC30C14} = {4F381919-F1BE-47D8-8558-3187ED04A84F}
{A5E6193C-8431-4C6E-B674-682CB41EAA0C} = {4F381919-F1BE-47D8-8558-3187ED04A84F}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {FBDC56A3-86AD-4323-AA0F-201E59123B83}
Expand Down
327 changes: 327 additions & 0 deletions dotnet/src/Connectors/Connectors.Memory.Common/SqlFilterTranslator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,327 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Linq.Expressions;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Text;

namespace Microsoft.SemanticKernel.Connectors;

internal partial class SqlFilterTranslator
{
private readonly IReadOnlyDictionary<string, string> _storagePropertyNames;
private readonly LambdaExpression _lambdaExpression;
private readonly ParameterExpression _recordParameter;
private readonly StringBuilder _sql;

internal SqlFilterTranslator(
IReadOnlyDictionary<string, string> storagePropertyNames,
LambdaExpression lambdaExpression,
StringBuilder? sql = null)
{
this._storagePropertyNames = storagePropertyNames;
this._lambdaExpression = lambdaExpression;
Debug.Assert(lambdaExpression.Parameters.Count == 1);
this._recordParameter = lambdaExpression.Parameters[0];
this._sql = sql ?? new();
}

internal StringBuilder Clause => this._sql;

internal void Translate(bool appendWhere)
{
if (appendWhere)
{
this._sql.Append("WHERE ");
}

this.Translate(this._lambdaExpression.Body);
}

private void Translate(Expression? node)
{
switch (node)
{
case BinaryExpression binary:
this.TranslateBinary(binary);
return;

case ConstantExpression constant:
this.TranslateConstant(constant);
return;

case MemberExpression member:
this.TranslateMember(member);
return;

case MethodCallExpression methodCall:
this.TranslateMethodCall(methodCall);
return;

case UnaryExpression unary:
this.TranslateUnary(unary);
return;

default:
throw new NotSupportedException("Unsupported NodeType in filter: " + node?.NodeType);
}
}

private void TranslateBinary(BinaryExpression binary)
{
// Special handling for null comparisons
switch (binary.NodeType)
{
case ExpressionType.Equal when IsNull(binary.Right):
this._sql.Append('(');
this.Translate(binary.Left);
this._sql.Append(" IS NULL)");
return;
case ExpressionType.NotEqual when IsNull(binary.Right):
this._sql.Append('(');
this.Translate(binary.Left);
this._sql.Append(" IS NOT NULL)");
return;

case ExpressionType.Equal when IsNull(binary.Left):
this._sql.Append('(');
this.Translate(binary.Right);
this._sql.Append(" IS NULL)");
return;
case ExpressionType.NotEqual when IsNull(binary.Left):
this._sql.Append('(');
this.Translate(binary.Right);
this._sql.Append(" IS NOT NULL)");
return;
}

this._sql.Append('(');
this.Translate(binary.Left);

this._sql.Append(binary.NodeType switch
{
ExpressionType.Equal => " = ",
ExpressionType.NotEqual => " <> ",

ExpressionType.GreaterThan => " > ",
ExpressionType.GreaterThanOrEqual => " >= ",
ExpressionType.LessThan => " < ",
ExpressionType.LessThanOrEqual => " <= ",

ExpressionType.AndAlso => " AND ",
ExpressionType.OrElse => " OR ",

_ => throw new NotSupportedException("Unsupported binary expression node type: " + binary.NodeType)
});

this.Translate(binary.Right);
this._sql.Append(')');

static bool IsNull(Expression expression)
=> expression is ConstantExpression { Value: null }
|| (TryGetCapturedValue(expression, out _, out var capturedValue) && capturedValue is null);
}

private void TranslateConstant(ConstantExpression constant)
=> this.GenerateLiteral(constant.Value);

private void GenerateLiteral(object? value)
{
// TODO: Nullable
switch (value)
{
case byte b:
this._sql.Append(b);
return;
case short s:
this._sql.Append(s);
return;
case int i:
this._sql.Append(i);
return;
case long l:
this._sql.Append(l);
return;

case string s:
this._sql.Append('\'').Append(s.Replace("'", "''")).Append('\'');
return;
case bool b:
this.GenerateLiteral(b);
return;
case Guid g:
this._sql.Append('\'').Append(g.ToString()).Append('\'');
return;

case DateTime dateTime:
this.GenerateLiteral(dateTime);
return;

case DateTimeOffset dateTimeOffset:
this.GenerateLiteral(dateTimeOffset);
return;

case Array:
throw new NotImplementedException();

case null:
this._sql.Append("NULL");
return;

default:
throw new NotSupportedException("Unsupported constant type: " + value.GetType().Name);
}
}

private void TranslateMember(MemberExpression memberExpression)
{
switch (memberExpression)
{
case var _ when this.TryGetColumn(memberExpression, out var column):
this._sql.Append('"').Append(column).Append('"');
return;

// Identify captured lambda variables, translate to PostgreSQL parameters ($1, $2...)
case var _ when TryGetCapturedValue(memberExpression, out var name, out var value):
this.TranslateLambdaVariables(name, value);
return;

default:
throw new NotSupportedException($"Member access for '{memberExpression.Member.Name}' is unsupported - only member access over the filter parameter are supported");
}
}

private void TranslateMethodCall(MethodCallExpression methodCall)
{
switch (methodCall)
{
// Enumerable.Contains()
case { Method.Name: nameof(Enumerable.Contains), Arguments: [var source, var item] } contains
when contains.Method.DeclaringType == typeof(Enumerable):
this.TranslateContains(source, item);
return;

// List.Contains()
case
{
Method:
{
Name: nameof(Enumerable.Contains),
DeclaringType: { IsGenericType: true } declaringType
},
Object: Expression source,
Arguments: [var item]
} when declaringType.GetGenericTypeDefinition() == typeof(List<>):
this.TranslateContains(source, item);
return;

default:
throw new NotSupportedException($"Unsupported method call: {methodCall.Method.DeclaringType?.Name}.{methodCall.Method.Name}");
}
}

private void TranslateContains(Expression source, Expression item)
{
switch (source)
{
// Contains over array column (r => r.Strings.Contains("foo"))
case var _ when this.TryGetColumn(source, out _):
this.TranslateContainsOverArrayColumn(source, item);
return;

// Contains over inline array (r => new[] { "foo", "bar" }.Contains(r.String))
case NewArrayExpression newArray:
this.Translate(item);
this._sql.Append(" IN (");

var isFirst = true;
foreach (var element in newArray.Expressions)
{
if (isFirst)
{
isFirst = false;
}
else
{
this._sql.Append(", ");
}

this.Translate(element);
}

this._sql.Append(')');
return;

// Contains over captured array (r => arrayLocalVariable.Contains(r.String))
case var _ when TryGetCapturedValue(source, out _, out var value):
this.TranslateContainsOverCapturedArray(source, item, value);
return;

default:
throw new NotSupportedException("Unsupported Contains expression");
}
}

private void TranslateUnary(UnaryExpression unary)
{
switch (unary.NodeType)
{
case ExpressionType.Not:
// Special handling for !(a == b) and !(a != b)
if (unary.Operand is BinaryExpression { NodeType: ExpressionType.Equal or ExpressionType.NotEqual } binary)
{
this.TranslateBinary(
Expression.MakeBinary(
binary.NodeType is ExpressionType.Equal ? ExpressionType.NotEqual : ExpressionType.Equal,
binary.Left,
binary.Right));
return;
}

this._sql.Append("(NOT ");
this.Translate(unary.Operand);
this._sql.Append(')');
return;

default:
throw new NotSupportedException("Unsupported unary expression node type: " + unary.NodeType);
}
}

private bool TryGetColumn(Expression expression, [NotNullWhen(true)] out string? column)
{
if (expression is MemberExpression member && member.Expression == this._recordParameter)
{
if (!this._storagePropertyNames.TryGetValue(member.Member.Name, out column))
{
throw new InvalidOperationException($"Property name '{member.Member.Name}' provided as part of the filter clause is not a valid property name.");
}

return true;
}

column = null;
return false;
}

private static bool TryGetCapturedValue(Expression expression, [NotNullWhen(true)] out string? name, out object? value)
{
if (expression is MemberExpression { Expression: ConstantExpression constant, Member: FieldInfo fieldInfo }
&& constant.Type.Attributes.HasFlag(TypeAttributes.NestedPrivate)
&& Attribute.IsDefined(constant.Type, typeof(CompilerGeneratedAttribute), inherit: true))
{
name = fieldInfo.Name;
value = fieldInfo.GetValue(constant.Value);
return true;
}

name = null;
value = null;
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
<Description>Postgres(with pgvector extension) connector for Semantic Kernel plugins and semantic memory</Description>
</PropertyGroup>

<ItemGroup>
<Compile Include="..\Connectors.Memory.Common\SqlFilterTranslator.cs" Link="SqlFilterTranslator.cs" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="Microsoft.Bcl.AsyncInterfaces" />
<PackageReference Include="Npgsql" />
Expand Down
Loading
Loading