Skip to content

Commit 164d02d

Browse files
authored
.Net: Add hybrid search abstraction and azure ai search implementation (#10441)
### Motivation and Context #9399 Adding the ADR, Keyword Hybrid Search abstractions and an implementation with integration tests for Azure AI Search. ### Description Adding the ADR, Keyword Hybrid Search abstractions and an implementation with integration tests for Azure AI Search. More implementations to follow. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
1 parent a9e0c09 commit 164d02d

File tree

10 files changed

+1004
-43
lines changed

10 files changed

+1004
-43
lines changed

docs/decisions/00NN-hybrid-search.md

Lines changed: 395 additions & 0 deletions
Large diffs are not rendered by default.

dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs

Lines changed: 68 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch;
2323
/// </summary>
2424
/// <typeparam name="TRecord">The data model to use for adding, updating and retrieving data from storage.</typeparam>
2525
#pragma warning disable CA1711 // Identifiers should not have incorrect suffix
26-
public sealed class AzureAISearchVectorStoreRecordCollection<TRecord> : IVectorStoreRecordCollection<string, TRecord>, IVectorizableTextSearch<TRecord>
26+
public sealed class AzureAISearchVectorStoreRecordCollection<TRecord> :
27+
IVectorStoreRecordCollection<string, TRecord>,
28+
IVectorizableTextSearch<TRecord>,
29+
IKeywordVectorizedHybridSearch<TRecord>
2730
#pragma warning restore CA1711 // Identifiers should not have incorrect suffix
2831
{
2932
/// <summary>The name of this database for telemetry purposes.</summary>
@@ -68,6 +71,9 @@ public sealed class AzureAISearchVectorStoreRecordCollection<TRecord> : IVectorS
6871
/// <summary>The default options for vector search.</summary>
6972
private static readonly VectorData.VectorSearchOptions s_defaultVectorSearchOptions = new();
7073

74+
/// <summary>The default options for hybrid vector search.</summary>
75+
private static readonly KeywordVectorizedHybridSearchOptions s_defaultKeywordVectorizedHybridSearchOptions = new();
76+
7177
/// <summary>Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service.</summary>
7278
private readonly SearchIndexClient _searchIndexClient;
7379

@@ -316,25 +322,16 @@ public async IAsyncEnumerable<string> UpsertBatchAsync(IEnumerable<TRecord> reco
316322
/// <inheritdoc />
317323
public Task<VectorSearchResults<TRecord>> VectorizedSearchAsync<TVector>(TVector vector, VectorData.VectorSearchOptions? options = null, CancellationToken cancellationToken = default)
318324
{
319-
Verify.NotNull(vector);
320-
321-
if (this._propertyReader.FirstVectorPropertyName is null)
322-
{
323-
throw new InvalidOperationException("The collection does not have any vector fields, so vector search is not possible.");
324-
}
325-
326-
if (vector is not ReadOnlyMemory<float> floatVector)
327-
{
328-
throw new NotSupportedException($"The provided vector type {vector.GetType().FullName} is not supported by the Azure AI Search connector.");
329-
}
325+
var floatVector = VerifyVectorParam(vector);
330326

331327
// Resolve options.
332328
var internalOptions = options ?? s_defaultVectorSearchOptions;
333-
string? vectorFieldName = this.ResolveVectorFieldName(internalOptions.VectorPropertyName);
329+
var vectorProperty = this._propertyReader.GetVectorPropertyOrFirst(internalOptions.VectorPropertyName);
330+
var vectorPropertyName = this._propertyReader.GetJsonPropertyName(vectorProperty!.DataModelPropertyName);
334331

335332
// Configure search settings.
336333
var vectorQueries = new List<VectorQuery>();
337-
vectorQueries.Add(new VectorizedQuery(floatVector) { KNearestNeighborsCount = internalOptions.Top, Fields = { vectorFieldName } });
334+
vectorQueries.Add(new VectorizedQuery(floatVector) { KNearestNeighborsCount = internalOptions.Top, Fields = { vectorPropertyName } });
338335
var filterString = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(internalOptions.Filter, this._propertyReader.JsonPropertyNamesMap);
339336

340337
// Build search options.
@@ -370,11 +367,12 @@ public Task<VectorSearchResults<TRecord>> VectorizableTextSearchAsync(string sea
370367

371368
// Resolve options.
372369
var internalOptions = options ?? s_defaultVectorSearchOptions;
373-
string? vectorFieldName = this.ResolveVectorFieldName(internalOptions.VectorPropertyName);
370+
var vectorProperty = this._propertyReader.GetVectorPropertyOrFirst(internalOptions.VectorPropertyName);
371+
var vectorPropertyName = this._propertyReader.GetJsonPropertyName(vectorProperty!.DataModelPropertyName);
374372

375373
// Configure search settings.
376374
var vectorQueries = new List<VectorQuery>();
377-
vectorQueries.Add(new VectorizableTextQuery(searchText) { KNearestNeighborsCount = internalOptions.Top, Fields = { vectorFieldName } });
375+
vectorQueries.Add(new VectorizableTextQuery(searchText) { KNearestNeighborsCount = internalOptions.Top, Fields = { vectorPropertyName } });
378376
var filterString = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(internalOptions.Filter, this._propertyReader.JsonPropertyNamesMap);
379377

380378
// Build search options.
@@ -398,6 +396,48 @@ public Task<VectorSearchResults<TRecord>> VectorizableTextSearchAsync(string sea
398396
return this.SearchAndMapToDataModelAsync(null, searchOptions, internalOptions.IncludeVectors, cancellationToken);
399397
}
400398

399+
/// <inheritdoc />
400+
public Task<VectorSearchResults<TRecord>> KeywordVectorizedHybridSearch<TVector>(TVector vector, ICollection<string> keywords, KeywordVectorizedHybridSearchOptions? options = null, CancellationToken cancellationToken = default)
401+
{
402+
Verify.NotNull(keywords);
403+
var floatVector = VerifyVectorParam(vector);
404+
405+
// Resolve options.
406+
var internalOptions = options ?? s_defaultKeywordVectorizedHybridSearchOptions;
407+
var vectorProperty = this._propertyReader.GetVectorPropertyOrFirst(internalOptions.VectorPropertyName);
408+
var vectorPropertyName = this._propertyReader.GetJsonPropertyName(vectorProperty.DataModelPropertyName);
409+
var textDataProperty = this._propertyReader.GetFullTextDataPropertyOrSingle(internalOptions.FullTextPropertyName);
410+
var textDataPropertyName = this._propertyReader.GetJsonPropertyName(textDataProperty.DataModelPropertyName);
411+
412+
// Configure search settings.
413+
var vectorQueries = new List<VectorQuery>();
414+
vectorQueries.Add(new VectorizedQuery(floatVector) { KNearestNeighborsCount = internalOptions.Top, Fields = { vectorPropertyName } });
415+
var filterString = AzureAISearchVectorStoreCollectionSearchMapping.BuildFilterString(internalOptions.Filter, this._propertyReader.JsonPropertyNamesMap);
416+
417+
// Build search options.
418+
var searchOptions = new SearchOptions
419+
{
420+
VectorSearch = new(),
421+
Size = internalOptions.Top,
422+
Skip = internalOptions.Skip,
423+
Filter = filterString,
424+
IncludeTotalCount = internalOptions.IncludeTotalCount,
425+
};
426+
searchOptions.VectorSearch.Queries.AddRange(vectorQueries);
427+
searchOptions.SearchFields.Add(textDataPropertyName);
428+
429+
// Filter out vector fields if requested.
430+
if (!internalOptions.IncludeVectors)
431+
{
432+
searchOptions.Select.Add(this._propertyReader.KeyPropertyJsonName);
433+
searchOptions.Select.AddRange(this._propertyReader.DataPropertyJsonNames);
434+
}
435+
436+
var keywordsCombined = string.Join(" ", keywords);
437+
438+
return this.SearchAndMapToDataModelAsync(keywordsCombined, searchOptions, internalOptions.IncludeVectors, cancellationToken);
439+
}
440+
401441
/// <summary>
402442
/// Get the document with the given key and map it to the data model using the configured mapper type.
403443
/// </summary>
@@ -556,31 +596,6 @@ private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options)
556596
return innerOptions;
557597
}
558598

559-
/// <summary>
560-
/// Resolve the vector field name to use for a search by using the storage name for the field name from options
561-
/// if available, and falling back to the first vector field name if not.
562-
/// </summary>
563-
/// <param name="optionsVectorFieldName">The vector field name provided via options.</param>
564-
/// <returns>The resolved vector field name.</returns>
565-
/// <exception cref="InvalidOperationException">Thrown if the provided field name is not a valid field name.</exception>
566-
private string ResolveVectorFieldName(string? optionsVectorFieldName)
567-
{
568-
string? vectorFieldName;
569-
if (!string.IsNullOrWhiteSpace(optionsVectorFieldName))
570-
{
571-
if (!this._propertyReader.JsonPropertyNamesMap.TryGetValue(optionsVectorFieldName!, out vectorFieldName))
572-
{
573-
throw new InvalidOperationException($"The collection does not have a vector field named '{optionsVectorFieldName}'.");
574-
}
575-
}
576-
else
577-
{
578-
vectorFieldName = this._propertyReader.FirstVectorPropertyJsonName;
579-
}
580-
581-
return vectorFieldName!;
582-
}
583-
584599
/// <summary>
585600
/// Get a document with the given key, and return null if it is not found.
586601
/// </summary>
@@ -638,4 +653,16 @@ private async Task<T> RunOperationAsync<T>(string operationName, Func<Task<T>> o
638653
};
639654
}
640655
}
656+
657+
private static ReadOnlyMemory<float> VerifyVectorParam<TVector>(TVector vector)
658+
{
659+
Verify.NotNull(vector);
660+
661+
if (vector is not ReadOnlyMemory<float> floatVector)
662+
{
663+
throw new NotSupportedException($"The provided vector type {vector.GetType().FullName} is not supported by the Azure AI Search connector.");
664+
}
665+
666+
return floatVector;
667+
}
641668
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System.Collections.Generic;
4+
using System.Threading;
5+
using System.Threading.Tasks;
6+
7+
namespace Microsoft.Extensions.VectorData;
8+
9+
/// <summary>
10+
/// Contains a method for doing a hybrid search using a vector and keywords.
11+
/// </summary>
12+
/// <typeparam name="TRecord">The record data model to use for retrieving data from the store.</typeparam>
13+
public interface IKeywordVectorizedHybridSearch<TRecord>
14+
{
15+
/// <summary>
16+
/// Performs a hybrid search for records that match the given embedding and keywords, after applying the provided filters.
17+
/// </summary>
18+
/// <typeparam name="TVector">The type of the vector.</typeparam>
19+
/// <param name="vector">The vector to search the store with.</param>
20+
/// <param name="keywords">A collection of keywords to search the store with.</param>
21+
/// <param name="options">The options that control the behavior of the search.</param>
22+
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
23+
/// <returns>The records found by the hybrid search, including their result scores.</returns>
24+
Task<VectorSearchResults<TRecord>> KeywordVectorizedHybridSearch<TVector>(
25+
TVector vector,
26+
ICollection<string> keywords,
27+
KeywordVectorizedHybridSearchOptions? options = default,
28+
CancellationToken cancellationToken = default);
29+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
namespace Microsoft.Extensions.VectorData;
4+
5+
/// <summary>
6+
/// Options for hybrid search when using a dense vector and string keywords to do the search.
7+
/// </summary>
8+
public class KeywordVectorizedHybridSearchOptions
9+
{
10+
/// <summary>
11+
/// Gets or sets a search filter to use before doing the hybrid search.
12+
/// </summary>
13+
public VectorSearchFilter? Filter { get; init; }
14+
15+
/// <summary>
16+
/// Gets or sets the name of the target dense vector property to search on.
17+
/// Use the name of the vector property from your data model or as provided in the record definition.
18+
/// If not provided will default to the first vector property in the schema.
19+
/// </summary>
20+
public string? VectorPropertyName { get; init; }
21+
22+
/// <summary>
23+
/// Gets or sets the name of the target text property to do the text/keyword search on.
24+
/// The property must have full text search enabled.
25+
/// Use the name of the data property from your data model or as provided in the record definition.
26+
/// If not provided will look if there is a text property with full text search enabled, and
27+
/// will throw if either none or multiple exist.
28+
/// </summary>
29+
public string? FullTextPropertyName { get; init; }
30+
31+
/// <summary>
32+
/// Gets or sets the maximum number of results to return.
33+
/// </summary>
34+
public int Top { get; init; } = 3;
35+
36+
/// <summary>
37+
/// Gets or sets the number of results to skip before returning results, i.e. the index of the first result to return.
38+
/// </summary>
39+
public int Skip { get; init; } = 0;
40+
41+
/// <summary>
42+
/// Gets or sets a value indicating whether to include vectors in the retrieval result.
43+
/// </summary>
44+
public bool IncludeVectors { get; init; } = false;
45+
46+
/// <summary>
47+
/// Gets or sets a value indicating whether the total count should be included in the results.
48+
/// </summary>
49+
/// <remarks>
50+
/// Default value is false.
51+
/// Not all vector search implementations will support this option in which case the total
52+
/// count will be null even if requested via this option.
53+
/// </remarks>
54+
public bool IncludeTotalCount { get; init; } = false;
55+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System;
4+
using System.Threading.Tasks;
5+
using SemanticKernel.IntegrationTests.Connectors.Memory.Xunit;
6+
7+
namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch;
8+
9+
/// <summary>
10+
/// Attribute to use to skip tests if the settings for Azure AI Search is not set.
11+
/// </summary>
12+
[AttributeUsage(AttributeTargets.Method | AttributeTargets.Class)]
13+
public sealed class AzureAISearchConfigConditionAttribute : Attribute, ITestCondition
14+
{
15+
public ValueTask<bool> IsMetAsync()
16+
{
17+
var config = AzureAISearchVectorStoreFixture.GetAzureAISearchConfiguration();
18+
var isMet = config is not null && !string.IsNullOrWhiteSpace(config.ServiceUrl) && !string.IsNullOrWhiteSpace(config.ApiKey);
19+
20+
return ValueTask.FromResult(isMet);
21+
}
22+
23+
public string SkipReason
24+
=> "Azure AI Search ServiceUrl or ApiKey was not specified in user secrets. Use the following command to set them: dotnet user-secrets set \"AzureAISearch:ServiceUrl\" \"your_service_url\" and dotnet user-secrets set \"AzureAISearch:ApiKey\" \"your_api_key\"";
25+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using Microsoft.Extensions.VectorData;
4+
using Microsoft.SemanticKernel.Connectors.AzureAISearch;
5+
using Xunit;
6+
7+
namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch;
8+
9+
/// <summary>
10+
/// Inherits common integration tests that should pass for any <see cref="IKeywordVectorizedHybridSearch{TRecord}"/>.
11+
/// </summary>
12+
/// <param name="fixture">Azure AI Search setup and teardown.</param>
13+
[Collection("AzureAISearchVectorStoreCollection")]
14+
[AzureAISearchConfigCondition]
15+
public class AzureAISearchKeywordVectorizedHybridSearchTests(AzureAISearchVectorStoreFixture fixture) : BaseKeywordVectorizedHybridSearchTests<string>
16+
{
17+
protected override string Key1 => "1";
18+
protected override string Key2 => "2";
19+
protected override string Key3 => "3";
20+
protected override string Key4 => "4";
21+
protected override int DelayAfterUploadInMilliseconds => 2000;
22+
23+
protected override IVectorStoreRecordCollection<string, TRecord> GetTargetRecordCollection<TRecord>(string recordCollectionName, VectorStoreRecordDefinition? vectorStoreRecordDefinition)
24+
{
25+
return new AzureAISearchVectorStoreRecordCollection<TRecord>(fixture.SearchIndexClient, recordCollectionName + AzureAISearchVectorStoreFixture.TestIndexPostfix, new()
26+
{
27+
VectorStoreRecordDefinition = vectorStoreRecordDefinition
28+
});
29+
}
30+
}

dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,13 @@ public class AzureAISearchVectorStoreFixture : IAsyncLifetime
2929
/// <summary>
3030
/// Test index name which consists out of "hotels-" and the machine name with any non-alphanumeric characters removed.
3131
/// </summary>
32+
private readonly string _testIndexName = "hotels-" + TestIndexPostfix;
33+
34+
/// <summary>
35+
/// Gets the test index name postfix that is derived from the local machine name used to avoid clashes between test runs from different callers.
36+
/// </summary>
3237
#pragma warning disable CA1308 // Normalize strings to uppercase
33-
private readonly string _testIndexName = "hotels-" + new Regex("[^a-zA-Z0-9]").Replace(Environment.MachineName.ToLowerInvariant(), "");
38+
public static string TestIndexPostfix { get; private set; } = new Regex("[^a-zA-Z0-9]").Replace(Environment.MachineName.ToLowerInvariant(), "");
3439
#pragma warning restore CA1308 // Normalize strings to uppercase
3540

3641
/// <summary>
@@ -43,12 +48,20 @@ public class AzureAISearchVectorStoreFixture : IAsyncLifetime
4348
.AddUserSecrets<AzureAISearchVectorStoreFixture>()
4449
.Build();
4550

51+
/// <summary>
52+
/// Get the test configuration for Azure AI Search.
53+
/// </summary>
54+
public static AzureAISearchConfiguration? GetAzureAISearchConfiguration()
55+
{
56+
return s_configuration.GetRequiredSection("AzureAISearch").Get<AzureAISearchConfiguration>();
57+
}
58+
4659
/// <summary>
4760
/// Initializes a new instance of the <see cref="AzureAISearchVectorStoreFixture"/> class.
4861
/// </summary>
4962
public AzureAISearchVectorStoreFixture()
5063
{
51-
var config = s_configuration.GetRequiredSection("AzureAISearch").Get<AzureAISearchConfiguration>();
64+
var config = GetAzureAISearchConfiguration();
5265
Assert.NotNull(config);
5366
this.Config = config;
5467
this.SearchIndexClient = new SearchIndexClient(new Uri(config.ServiceUrl), new AzureKeyCredential(config.ApiKey));

0 commit comments

Comments
 (0)