Skip to content

.Net: Support records with no key, fix issue with Relevance #6393

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 12, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,22 @@ public AzureCosmosDBNoSQLMemoryStore(
VectorEmbeddingPolicy vectorEmbeddingPolicy,
IndexingPolicy indexingPolicy)
{
if (!vectorEmbeddingPolicy.Embeddings.Any(e => e.Path == "/embedding"))
var embedding = vectorEmbeddingPolicy.Embeddings.FirstOrDefault(e => e.Path == "/embedding");
if (embedding is null)
{
throw new InvalidOperationException($"""
In order for {nameof(GetNearestMatchAsync)} to function, {nameof(vectorEmbeddingPolicy)} should
contain an embedding path at /embedding. It's also recommended to include a that path in the
{nameof(indexingPolicy)} to improve performance and reduce cost for searches.
""");
}
else if (embedding.DistanceFunction != DistanceFunction.Cosine)
{
throw new InvalidOperationException($"""
In order for {nameof(GetNearestMatchAsync)} to reliably return relevance information, the {nameof(DistanceFunction)} should
be specified as {nameof(DistanceFunction)}.{nameof(DistanceFunction.Cosine)}.
""");
}
this._cosmosClient = cosmosClient;
this._databaseName = databaseName;
this._vectorEmbeddingPolicy = vectorEmbeddingPolicy;
Expand Down Expand Up @@ -164,6 +172,12 @@ public async Task<string> UpsertAsync(
MemoryRecord record,
CancellationToken cancellationToken = default)
{
// In some cases we're expected to generate the key to use. Do so if one isn't provided.
if (string.IsNullOrEmpty(record.Key))
{
record.Key = Guid.NewGuid().ToString();
}

var result = await this._cosmosClient
.GetDatabase(this._databaseName)
.GetContainer(collectionName)
Expand Down Expand Up @@ -193,6 +207,7 @@ public async IAsyncEnumerable<string> UpsertBatchAsync(
bool withEmbedding = false,
CancellationToken cancellationToken = default)
{
// TODO: Consider using a query when `withEmbedding` is false to avoid passing it over the wire.
var result = await this._cosmosClient
.GetDatabase(this._databaseName)
.GetContainer(collectionName)
Expand Down Expand Up @@ -330,9 +345,10 @@ ORDER BY VectorDistance(x.embedding, @embedding)
{
foreach (var memoryRecord in await feedIterator.ReadNextAsync(cancellationToken).ConfigureAwait(false))
{
if (memoryRecord.SimilarityScore >= minRelevanceScore)
var relevanceScore = (memoryRecord.SimilarityScore + 1) / 2;
if (relevanceScore >= minRelevanceScore)
{
yield return (memoryRecord, memoryRecord.SimilarityScore);
yield return (memoryRecord, relevanceScore);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Connectors.AzureCosmosDBNoSQL;
using Microsoft.SemanticKernel.Embeddings;
using Microsoft.SemanticKernel.Memory;
using MongoDB.Driver;
using Xunit;
Expand Down Expand Up @@ -117,6 +122,54 @@ public async Task ItCanGetNearestMatchesAsync(int limit, bool withEmbeddings)
await memoryStore.DeleteCollectionAsync(collectionName);
}

[Theory(Skip = SkipReason)]
[InlineData(true)]
[InlineData(false)]
public async Task ItCanSaveReferenceGetTextAndSearchTextAsync(bool withEmbedding)
{
var collectionName = this._fixture.CollectionName;
var memoryStore = this._fixture.MemoryStore;
var textMemory = new SemanticTextMemory(memoryStore, new MockTextEmbeddingGenerationService());
var textToStore = "SampleText";
var id = "MyExternalId";
var source = "MyExternalSource";
var refId = await textMemory.SaveReferenceAsync(collectionName, textToStore, id, source);
Assert.NotNull(refId);

var expectedQueryResult = new MemoryQueryResult(
new MemoryRecordMetadata(isReference: true, id, text: "", description: "", source, additionalMetadata: ""),
1.0,
withEmbedding ? DataHelper.VectorSearchTestEmbedding : null);

var queryResult = await textMemory.GetAsync(collectionName, refId, withEmbedding);
AssertQueryResultEqual(expectedQueryResult, queryResult, withEmbedding);

var searchResults = await textMemory.SearchAsync(collectionName, textToStore, withEmbeddings: withEmbedding).ToListAsync();
Assert.Equal(1, searchResults?.Count);
AssertQueryResultEqual(expectedQueryResult, searchResults?[0], compareEmbeddings: true);

await textMemory.RemoveAsync(collectionName, refId);
}

private static void AssertQueryResultEqual(MemoryQueryResult expected, MemoryQueryResult? actual, bool compareEmbeddings)
{
Assert.NotNull(actual);
Assert.Equal(expected.Relevance, actual.Relevance);
Assert.Equal(expected.Metadata.Id, actual.Metadata.Id);
Assert.Equal(expected.Metadata.Text, actual.Metadata.Text);
Assert.Equal(expected.Metadata.Description, actual.Metadata.Description);
Assert.Equal(expected.Metadata.ExternalSourceName, actual.Metadata.ExternalSourceName);
Assert.Equal(expected.Metadata.AdditionalMetadata, actual.Metadata.AdditionalMetadata);
Assert.Equal(expected.Metadata.IsReference, actual.Metadata.IsReference);

if (compareEmbeddings)
{
Assert.NotNull(expected.Embedding);
Assert.NotNull(actual.Embedding);
Assert.Equal(expected.Embedding.Value.Span, actual.Embedding.Value.Span);
}
}

private static void AssertMemoryRecordEqual(
MemoryRecord expectedRecord,
MemoryRecord actualRecord,
Expand Down Expand Up @@ -147,4 +200,15 @@ private static void AssertMemoryRecordEqual(
Assert.True(actualRecord.Embedding.Span.IsEmpty);
}
}

private sealed class MockTextEmbeddingGenerationService : ITextEmbeddingGenerationService
{
public IReadOnlyDictionary<string, object?> Attributes { get; } = ReadOnlyDictionary<string, object?>.Empty;

public Task<IList<ReadOnlyMemory<float>>> GenerateEmbeddingsAsync(IList<string> data, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
IList<ReadOnlyMemory<float>> result = new List<ReadOnlyMemory<float>> { DataHelper.VectorSearchTestEmbedding };
return Task.FromResult(result);
}
}
}
Loading