Skip to content

Commit 66ad52c

Browse files
authored
.Net: Add qdrant and weaviate keyword hybrid search implementations and tests (#10505)
### Motivation and Context #9399 ### Description - Add Qdrant keyword hybrid search - Add Weaviate keyword hybrid search - Add integrations tests for both. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
1 parent 164d02d commit 66ad52c

9 files changed

+317
-81
lines changed

dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs

Lines changed: 97 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant;
1818
/// </summary>
1919
/// <typeparam name="TRecord">The data model to use for adding, updating and retrieving data from storage.</typeparam>
2020
#pragma warning disable CA1711 // Identifiers should not have incorrect suffix
21-
public sealed class QdrantVectorStoreRecordCollection<TRecord> : IVectorStoreRecordCollection<ulong, TRecord>, IVectorStoreRecordCollection<Guid, TRecord>
21+
public sealed class QdrantVectorStoreRecordCollection<TRecord> :
22+
IVectorStoreRecordCollection<ulong, TRecord>,
23+
IVectorStoreRecordCollection<Guid, TRecord>,
24+
IKeywordVectorizedHybridSearch<TRecord>
2225
#pragma warning restore CA1711 // Identifiers should not have incorrect suffix
2326
{
2427
/// <summary>A set of types that a key on the provided model may have.</summary>
@@ -31,6 +34,9 @@ public sealed class QdrantVectorStoreRecordCollection<TRecord> : IVectorStoreRec
3134
/// <summary>The default options for vector search.</summary>
3235
private static readonly VectorSearchOptions s_defaultVectorSearchOptions = new();
3336

37+
/// <summary>The default options for hybrid vector search.</summary>
38+
private static readonly KeywordVectorizedHybridSearchOptions s_defaultKeywordVectorizedHybridSearchOptions = new();
39+
3440
/// <summary>The name of this database for telemetry purposes.</summary>
3541
private const string DatabaseName = "Qdrant";
3642

@@ -459,19 +465,11 @@ private async IAsyncEnumerable<TRecord> GetBatchByPointIdAsync<TKey>(
459465
/// <inheritdoc />
460466
public async Task<VectorSearchResults<TRecord>> VectorizedSearchAsync<TVector>(TVector vector, VectorSearchOptions? options = null, CancellationToken cancellationToken = default)
461467
{
462-
Verify.NotNull(vector);
463-
464-
if (this._propertyReader.FirstVectorPropertyName is null)
465-
{
466-
throw new InvalidOperationException("The collection does not have any vector fields, so vector search is not possible.");
467-
}
468-
469-
if (vector is not ReadOnlyMemory<float> floatVector)
470-
{
471-
throw new NotSupportedException($"The provided vector type {vector.GetType().FullName} is not supported by the Qdrant connector.");
472-
}
468+
var floatVector = VerifyVectorParam(vector);
473469

470+
// Resolve options.
474471
var internalOptions = options ?? s_defaultVectorSearchOptions;
472+
var vectorProperty = this._propertyReader.GetVectorPropertyOrFirst(internalOptions.VectorPropertyName);
475473

476474
// Build filter object.
477475
var filter = QdrantVectorStoreCollectionSearchMapping.BuildFilter(internalOptions.Filter, this._propertyReader.StoragePropertyNamesMap);
@@ -480,7 +478,7 @@ public async Task<VectorSearchResults<TRecord>> VectorizedSearchAsync<TVector>(T
480478
string? vectorName = null;
481479
if (this._options.HasNamedVectors)
482480
{
483-
vectorName = this.ResolveVectorFieldName(internalOptions.VectorPropertyName);
481+
vectorName = this._propertyReader.GetStoragePropertyName(vectorProperty.DataModelPropertyName);
484482
}
485483

486484
// Specify whether to include vectors in the search results.
@@ -517,29 +515,87 @@ public async Task<VectorSearchResults<TRecord>> VectorizedSearchAsync<TVector>(T
517515
return new VectorSearchResults<TRecord>(mappedResults.ToAsyncEnumerable());
518516
}
519517

520-
/// <summary>
521-
/// Resolve the vector field name to use for a search by using the storage name for the field name from options
522-
/// if available, and falling back to the first vector field name if not.
523-
/// </summary>
524-
/// <param name="optionsVectorFieldName">The vector field name provided via options.</param>
525-
/// <returns>The resolved vector field name.</returns>
526-
/// <exception cref="InvalidOperationException">Thrown if the provided field name is not a valid field name.</exception>
527-
private string ResolveVectorFieldName(string? optionsVectorFieldName)
518+
/// <inheritdoc />
519+
public async Task<VectorSearchResults<TRecord>> KeywordVectorizedHybridSearch<TVector>(TVector vector, ICollection<string> keywords, KeywordVectorizedHybridSearchOptions? options = null, CancellationToken cancellationToken = default)
528520
{
529-
string? vectorFieldName;
530-
if (!string.IsNullOrWhiteSpace(optionsVectorFieldName))
521+
var floatVector = VerifyVectorParam(vector);
522+
523+
// Resolve options.
524+
var internalOptions = options ?? s_defaultKeywordVectorizedHybridSearchOptions;
525+
var vectorProperty = this._propertyReader.GetVectorPropertyOrFirst(internalOptions.VectorPropertyName);
526+
527+
// Build filter object.
528+
var filter = QdrantVectorStoreCollectionSearchMapping.BuildFilter(internalOptions.Filter, this._propertyReader.StoragePropertyNamesMap);
529+
var textDataProperty = this._propertyReader.GetFullTextDataPropertyOrSingle(internalOptions.FullTextPropertyName);
530+
var textDataPropertyName = this._propertyReader.GetStoragePropertyName(textDataProperty.DataModelPropertyName);
531+
532+
// Specify the vector name if named vectors are used.
533+
string? vectorName = null;
534+
if (this._options.HasNamedVectors)
531535
{
532-
if (!this._propertyReader.StoragePropertyNamesMap.TryGetValue(optionsVectorFieldName!, out vectorFieldName))
536+
vectorName = this._propertyReader.GetStoragePropertyName(vectorProperty.DataModelPropertyName);
537+
}
538+
539+
// Specify whether to include vectors in the search results.
540+
var vectorsSelector = new WithVectorsSelector();
541+
vectorsSelector.Enable = internalOptions.IncludeVectors;
542+
543+
// Build the vector query.
544+
var vectorQuery = new PrefetchQuery
545+
{
546+
Filter = filter,
547+
Query = new Query
533548
{
534-
throw new InvalidOperationException($"The collection does not have a vector field named '{optionsVectorFieldName}'.");
535-
}
549+
Nearest = new VectorInput(floatVector.ToArray()),
550+
},
551+
};
552+
553+
if (this._options.HasNamedVectors)
554+
{
555+
vectorQuery.Using = vectorName;
536556
}
537-
else
557+
558+
// Build the keyword query.
559+
var keywordFilter = filter.Clone();
560+
var keywordSubFilter = new Filter();
561+
foreach (string keyword in keywords)
538562
{
539-
vectorFieldName = this._propertyReader.FirstVectorPropertyStoragePropertyName;
563+
keywordSubFilter.Should.Add(new Condition() { Field = new FieldCondition() { Key = textDataPropertyName, Match = new Match { Text = keyword } } });
540564
}
565+
keywordFilter.Must.Add(new Condition() { Filter = keywordSubFilter });
566+
var keywordQuery = new PrefetchQuery
567+
{
568+
Filter = keywordFilter,
569+
};
570+
571+
// Build the fusion query.
572+
var fusionQuery = new Query
573+
{
574+
Fusion = Fusion.Rrf,
575+
};
576+
577+
// Execute Search.
578+
var points = await this.RunOperationAsync(
579+
"Query",
580+
() => this._qdrantClient.QueryAsync(
581+
this.CollectionName,
582+
prefetch: new List<PrefetchQuery>() { vectorQuery, keywordQuery },
583+
query: fusionQuery,
584+
limit: (ulong)internalOptions.Top,
585+
offset: (ulong)internalOptions.Skip,
586+
vectorsSelector: vectorsSelector,
587+
cancellationToken: cancellationToken)).ConfigureAwait(false);
541588

542-
return vectorFieldName!;
589+
// Map to data model.
590+
var mappedResults = points.Select(point => QdrantVectorStoreCollectionSearchMapping.MapScoredPointToVectorSearchResult(
591+
point,
592+
this._mapper,
593+
internalOptions.IncludeVectors,
594+
DatabaseName,
595+
this._collectionName,
596+
"Query"));
597+
598+
return new VectorSearchResults<TRecord>(mappedResults.ToAsyncEnumerable());
543599
}
544600

545601
/// <summary>
@@ -588,4 +644,16 @@ private async Task<T> RunOperationAsync<T>(string operationName, Func<Task<T>> o
588644
};
589645
}
590646
}
647+
648+
private static ReadOnlyMemory<float> VerifyVectorParam<TVector>(TVector vector)
649+
{
650+
Verify.NotNull(vector);
651+
652+
if (vector is not ReadOnlyMemory<float> floatVector)
653+
{
654+
throw new NotSupportedException($"The provided vector type {vector.GetType().FullName} is not supported by the Qdrant connector.");
655+
}
656+
657+
return floatVector;
658+
}
591659
}

dotnet/src/Connectors/Connectors.Memory.Weaviate/WeaviateConstants.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ internal sealed class WeaviateConstants
1919
/// <summary>Score property name in Weaviate.</summary>
2020
internal const string ScorePropertyName = "distance";
2121

22+
/// <summary>Score property name for hybrid search in Weaviate.</summary>
23+
internal const string HybridScorePropertyName = "score";
24+
2225
/// <summary>Additional properties property name in Weaviate.</summary>
2326
internal const string AdditionalPropertiesPropertyName = "_additional";
2427
}

dotnet/src/Connectors/Connectors.Memory.Weaviate/WeaviateVectorStoreCollectionSearchMapping.cs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Copyright (c) Microsoft. All rights reserved.
22

3+
using System.Text.Json;
34
using System.Text.Json.Nodes;
45

56
namespace Microsoft.SemanticKernel.Connectors.Weaviate;
@@ -12,11 +13,17 @@ internal static class WeaviateVectorStoreCollectionSearchMapping
1213
/// <summary>
1314
/// Maps vector search result to the format, which is processable by <see cref="WeaviateVectorStoreRecordMapper{TRecord}"/>.
1415
/// </summary>
15-
public static (JsonObject StorageModel, double? Score) MapSearchResult(JsonNode result)
16+
public static (JsonObject StorageModel, double? Score) MapSearchResult(JsonNode result, string scorePropertyName)
1617
{
1718
var additionalProperties = result[WeaviateConstants.AdditionalPropertiesPropertyName];
1819

19-
var score = additionalProperties?[WeaviateConstants.ScorePropertyName]?.GetValue<double>();
20+
var scoreProperty = additionalProperties?[scorePropertyName];
21+
double? score = scoreProperty?.GetValueKind() switch
22+
{
23+
JsonValueKind.Number => scoreProperty.GetValue<double>(),
24+
JsonValueKind.String => double.Parse(scoreProperty.GetValue<string>()),
25+
_ => null
26+
};
2027

2128
var id = additionalProperties?[WeaviateConstants.ReservedKeyPropertyName];
2229
var vectors = additionalProperties?[WeaviateConstants.ReservedVectorPropertyName];

dotnet/src/Connectors/Connectors.Memory.Weaviate/WeaviateVectorStoreRecordCollection.cs

Lines changed: 61 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace Microsoft.SemanticKernel.Connectors.Weaviate;
2222
/// </summary>
2323
/// <typeparam name="TRecord">The data model to use for adding, updating and retrieving data from storage.</typeparam>
2424
#pragma warning disable CA1711 // Identifiers should not have incorrect suffix
25-
public sealed class WeaviateVectorStoreRecordCollection<TRecord> : IVectorStoreRecordCollection<Guid, TRecord>
25+
public sealed class WeaviateVectorStoreRecordCollection<TRecord> : IVectorStoreRecordCollection<Guid, TRecord>, IKeywordVectorizedHybridSearch<TRecord>
2626
#pragma warning restore CA1711 // Identifiers should not have incorrect suffix
2727
{
2828
/// <summary>The name of this database for telemetry purposes.</summary>
@@ -86,6 +86,9 @@ public sealed class WeaviateVectorStoreRecordCollection<TRecord> : IVectorStoreR
8686
/// <summary>The default options for vector search.</summary>
8787
private static readonly VectorSearchOptions s_defaultVectorSearchOptions = new();
8888

89+
/// <summary>The default options for hybrid vector search.</summary>
90+
private static readonly KeywordVectorizedHybridSearchOptions s_defaultKeywordVectorizedHybridSearchOptions = new();
91+
8992
/// <summary><see cref="HttpClient"/> that is used to interact with Weaviate API.</summary>
9093
private readonly HttpClient _httpClient;
9194

@@ -343,39 +346,63 @@ public async Task<VectorSearchResults<TRecord>> VectorizedSearchAsync<TVector>(
343346
{
344347
const string OperationName = "VectorSearch";
345348

346-
Verify.NotNull(vector);
349+
VerifyVectorParam(vector);
347350

348-
var vectorType = vector.GetType();
351+
var searchOptions = options ?? s_defaultVectorSearchOptions;
352+
var vectorProperty = this._propertyReader.GetVectorPropertyOrFirst(searchOptions.VectorPropertyName);
349353

350-
if (!s_supportedVectorTypes.Contains(vectorType))
351-
{
352-
throw new NotSupportedException(
353-
$"The provided vector type {vectorType.FullName} is not supported by the Azure CosmosDB NoSQL connector. " +
354-
$"Supported types are: {string.Join(", ", s_supportedVectorTypes.Select(l => l.FullName))}");
355-
}
354+
var vectorPropertyName = this._propertyReader.GetJsonPropertyName(vectorProperty.DataModelPropertyName);
355+
var fields = this._propertyReader.DataPropertyJsonNames;
356356

357-
var searchOptions = options ?? s_defaultVectorSearchOptions;
358-
var vectorProperty = this.GetVectorPropertyForSearch(searchOptions.VectorPropertyName);
357+
var query = WeaviateVectorStoreRecordCollectionQueryBuilder.BuildSearchQuery(
358+
vector,
359+
this.CollectionName,
360+
vectorPropertyName,
361+
this._propertyReader.KeyPropertyName,
362+
s_jsonSerializerOptions,
363+
searchOptions,
364+
this._propertyReader.JsonPropertyNamesMap,
365+
this._propertyReader.VectorPropertyJsonNames,
366+
this._propertyReader.DataPropertyJsonNames);
359367

360-
if (vectorProperty is null)
361-
{
362-
throw new InvalidOperationException("The collection does not have any vector properties, so vector search is not possible.");
363-
}
368+
return await this.ExecuteQueryAsync(query, searchOptions.IncludeVectors, WeaviateConstants.ScorePropertyName, OperationName, cancellationToken).ConfigureAwait(false);
369+
}
370+
371+
/// <inheritdoc />
372+
public async Task<VectorSearchResults<TRecord>> KeywordVectorizedHybridSearch<TVector>(TVector vector, ICollection<string> keywords, KeywordVectorizedHybridSearchOptions? options = null, CancellationToken cancellationToken = default)
373+
{
374+
const string OperationName = "HybridSearch";
375+
376+
VerifyVectorParam(vector);
377+
378+
var searchOptions = options ?? s_defaultKeywordVectorizedHybridSearchOptions;
379+
var vectorProperty = this._propertyReader.GetVectorPropertyOrFirst(searchOptions.VectorPropertyName);
380+
var textDataProperty = this._propertyReader.GetFullTextDataPropertyOrSingle(searchOptions.FullTextPropertyName);
364381

365382
var vectorPropertyName = this._propertyReader.GetJsonPropertyName(vectorProperty.DataModelPropertyName);
383+
var textDataPropertyName = this._propertyReader.GetJsonPropertyName(textDataProperty.DataModelPropertyName);
366384
var fields = this._propertyReader.DataPropertyJsonNames;
367385

368-
var query = WeaviateVectorStoreRecordCollectionQueryBuilder.BuildSearchQuery(
386+
var query = WeaviateVectorStoreRecordCollectionQueryBuilder.BuildHybridSearchQuery(
369387
vector,
388+
string.Join(" ", keywords),
370389
this.CollectionName,
371390
vectorPropertyName,
372391
this._propertyReader.KeyPropertyName,
392+
textDataPropertyName,
373393
s_jsonSerializerOptions,
374394
searchOptions,
375395
this._propertyReader.JsonPropertyNamesMap,
376396
this._propertyReader.VectorPropertyJsonNames,
377397
this._propertyReader.DataPropertyJsonNames);
378398

399+
return await this.ExecuteQueryAsync(query, searchOptions.IncludeVectors, WeaviateConstants.HybridScorePropertyName, OperationName, cancellationToken).ConfigureAwait(false);
400+
}
401+
402+
#region private
403+
404+
private async Task<VectorSearchResults<TRecord>> ExecuteQueryAsync(string query, bool includeVectors, string scorePropertyName, string operationName, CancellationToken cancellationToken)
405+
{
379406
using var request = new WeaviateVectorSearchRequest(query).Build();
380407

381408
var (responseModel, content) = await this.ExecuteRequestWithResponseContentAsync<WeaviateVectorSearchResponse>(request, cancellationToken).ConfigureAwait(false);
@@ -388,28 +415,26 @@ public async Task<VectorSearchResults<TRecord>> VectorizedSearchAsync<TVector>(
388415
{
389416
VectorStoreType = DatabaseName,
390417
CollectionName = this.CollectionName,
391-
OperationName = OperationName
418+
OperationName = operationName
392419
};
393420
}
394421

395422
var mappedResults = collectionResults.Where(x => x is not null).Select(result =>
396423
{
397-
var (storageModel, score) = WeaviateVectorStoreCollectionSearchMapping.MapSearchResult(result!);
424+
var (storageModel, score) = WeaviateVectorStoreCollectionSearchMapping.MapSearchResult(result!, scorePropertyName);
398425

399426
var record = VectorStoreErrorHandler.RunModelConversion(
400427
DatabaseName,
401428
this.CollectionName,
402-
OperationName,
403-
() => this._mapper.MapFromStorageToDataModel(storageModel, new() { IncludeVectors = searchOptions.IncludeVectors }));
429+
operationName,
430+
() => this._mapper.MapFromStorageToDataModel(storageModel, new() { IncludeVectors = includeVectors }));
404431

405432
return new VectorSearchResult<TRecord>(record, score);
406433
});
407434

408435
return new VectorSearchResults<TRecord>(mappedResults.ToAsyncEnumerable());
409436
}
410437

411-
#region private
412-
413438
private Task<HttpResponseMessage> ExecuteRequestAsync(HttpRequestMessage request, CancellationToken cancellationToken)
414439
{
415440
request.RequestUri = new Uri(this._endpoint, request.RequestUri!);
@@ -469,33 +494,6 @@ private async Task<T> RunOperationAsync<T>(string operationName, Func<Task<T>> o
469494
}
470495
}
471496

472-
/// <summary>
473-
/// Get vector property to use for a search by using the storage name for the field name from options
474-
/// if available, and falling back to the first vector property in <typeparamref name="TRecord"/> if not.
475-
/// </summary>
476-
/// <param name="vectorFieldName">The vector field name.</param>
477-
/// <exception cref="InvalidOperationException">Thrown if the provided field name is not a valid field name.</exception>
478-
private VectorStoreRecordVectorProperty? GetVectorPropertyForSearch(string? vectorFieldName)
479-
{
480-
// If vector property name is provided in options, try to find it in schema or throw an exception.
481-
if (!string.IsNullOrWhiteSpace(vectorFieldName))
482-
{
483-
// Check vector properties by data model property name.
484-
var vectorProperty = this._propertyReader.VectorProperties
485-
.FirstOrDefault(l => l.DataModelPropertyName.Equals(vectorFieldName, StringComparison.Ordinal));
486-
487-
if (vectorProperty is not null)
488-
{
489-
return vectorProperty;
490-
}
491-
492-
throw new InvalidOperationException($"The {typeof(TRecord).FullName} type does not have a vector property named '{vectorFieldName}'.");
493-
}
494-
495-
// If vector property is not provided in options, return first vector property from schema.
496-
return this._propertyReader.VectorProperty;
497-
}
498-
499497
/// <summary>
500498
/// Returns custom mapper, generic data model mapper or default record mapper.
501499
/// </summary>
@@ -528,5 +526,19 @@ private IVectorStoreRecordMapper<TRecord, JsonObject> InitializeMapper()
528526
s_jsonSerializerOptions);
529527
}
530528

529+
private static void VerifyVectorParam<TVector>(TVector vector)
530+
{
531+
Verify.NotNull(vector);
532+
533+
var vectorType = vector.GetType();
534+
535+
if (!s_supportedVectorTypes.Contains(vectorType))
536+
{
537+
throw new NotSupportedException(
538+
$"The provided vector type {vectorType.FullName} is not supported by the Weaviate connector. " +
539+
$"Supported types are: {string.Join(", ", s_supportedVectorTypes.Select(l => l.FullName))}");
540+
}
541+
}
542+
531543
#endregion
532544
}

0 commit comments

Comments
 (0)