Skip to content

.Net: [MEVD] Added support for unnamed vectors in Weaviate connector #11454

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ internal sealed class WeaviateGetCollectionsRequest

public HttpRequestMessage Build()
{
return HttpRequest.CreateGetRequest(ApiRoute, this);
return HttpRequest.CreateGetRequest(ApiRoute);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,13 @@ public WeaviateCollectionSchema(string collectionName)

[JsonPropertyName("properties")]
public List<WeaviateCollectionSchemaProperty> Properties { get; set; } = [];

[JsonPropertyName("vectorizer")]
public string Vectorizer { get; set; } = WeaviateConstants.DefaultVectorizer;

[JsonPropertyName("vectorIndexType")]
public string? VectorIndexType { get; set; }

[JsonPropertyName("vectorIndexConfig")]
public WeaviateCollectionSchemaVectorIndexConfig? VectorIndexConfig { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ namespace Microsoft.SemanticKernel.Connectors.Weaviate;

internal sealed class WeaviateCollectionSchemaVectorConfig
{
private const string DefaultVectorizer = "none";

[JsonPropertyName("vectorizer")]
public Dictionary<string, object?> Vectorizer { get; set; } = new() { [DefaultVectorizer] = null };
public Dictionary<string, object?> Vectorizer { get; set; } = new() { [WeaviateConstants.DefaultVectorizer] = null };

[JsonPropertyName("vectorIndexType")]
public string? VectorIndexType { get; set; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ internal sealed class WeaviateConstants
/// <summary>Reserved vector property name in Weaviate.</summary>
internal const string ReservedVectorPropertyName = "vectors";

/// <summary>Reserved single vector property name in Weaviate.</summary>
internal const string ReservedSingleVectorPropertyName = "vector";

/// <summary>Collection property name in Weaviate.</summary>
internal const string CollectionPropertyName = "class";

Expand All @@ -27,4 +30,7 @@ internal sealed class WeaviateConstants

/// <summary>Additional properties property name in Weaviate.</summary>
internal const string AdditionalPropertiesPropertyName = "_additional";

/// <summary>Default vectorizer for vector properties in Weaviate.</summary>
internal const string DefaultVectorizer = "none";
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,59 +23,86 @@ internal sealed class WeaviateDynamicDataModelMapper : IWeaviateMapper<Dictionar
/// <summary>A <see cref="JsonSerializerOptions"/> for serialization/deserialization of record properties.</summary>
private readonly JsonSerializerOptions _jsonSerializerOptions;

/// <summary>Gets a value indicating whether the vectors in the store are named and multiple vectors are supported, or whether there is just a single unnamed vector in Weaviate collection.</summary>
private readonly bool _hasNamedVectors;

/// <summary>Gets a vector property named used in Weaviate collection.</summary>
private readonly string _vectorPropertyName;

/// <summary>
/// Initializes a new instance of the <see cref="WeaviateDynamicDataModelMapper"/> class.
/// </summary>
/// <param name="collectionName">The name of the Weaviate collection</param>
/// <param name="hasNamedVectors">Gets or sets a value indicating whether the vectors in the store are named and multiple vectors are supported, or whether there is just a single unnamed vector in Weaviate collection</param>
/// <param name="model">The model</param>
/// <param name="jsonSerializerOptions">A <see cref="JsonSerializerOptions"/> for serialization/deserialization of record properties.</param>
public WeaviateDynamicDataModelMapper(
string collectionName,
bool hasNamedVectors,
VectorStoreRecordModel model,
JsonSerializerOptions jsonSerializerOptions)
{
this._collectionName = collectionName;
this._hasNamedVectors = hasNamedVectors;
this._model = model;
this._jsonSerializerOptions = jsonSerializerOptions;

this._vectorPropertyName = hasNamedVectors ?
WeaviateConstants.ReservedVectorPropertyName :
WeaviateConstants.ReservedSingleVectorPropertyName;
}

public JsonObject MapFromDataToStorageModel(Dictionary<string, object?> dataModel)
{
Verify.NotNull(dataModel);

// Transform generic data model to Weaviate object model.
var keyObject = JsonSerializer.SerializeToNode(dataModel[this._model.KeyProperty.ModelName]);
var keyNode = JsonSerializer.SerializeToNode(dataModel[this._model.KeyProperty.ModelName]);

// Populate data properties.
var dataObject = new JsonObject();
var dataNode = new JsonObject();
foreach (var property in this._model.DataProperties)
{
if (dataModel.TryGetValue(property.ModelName, out var dataValue))
{
dataObject[property.StorageName] = dataValue is null
dataNode[property.StorageName] = dataValue is null
? null
: JsonSerializer.SerializeToNode(dataValue, property.Type, this._jsonSerializerOptions);
}
}

// Populate vector properties.
var vectorObject = new JsonObject();
foreach (var property in this._model.VectorProperties)
JsonNode? vectorNode = null;

if (this._hasNamedVectors)
{
if (dataModel.TryGetValue(property.ModelName, out var vectorValue))
vectorNode = new JsonObject();
foreach (var property in this._model.VectorProperties)
{
vectorObject[property.StorageName] = vectorValue is null
if (dataModel.TryGetValue(property.ModelName, out var vectorValue))
{
vectorNode[property.StorageName] = vectorValue is null
? null
: JsonSerializer.SerializeToNode(vectorValue, property.Type, this._jsonSerializerOptions);
}
}
}
else
{
if (dataModel.TryGetValue(this._model.VectorProperty.ModelName, out var vectorValue))
{
vectorNode = vectorValue is null
? null
: JsonSerializer.SerializeToNode(vectorValue, property.Type, this._jsonSerializerOptions);
: JsonSerializer.SerializeToNode(vectorValue, this._model.VectorProperty.Type, this._jsonSerializerOptions);
}
}

return new JsonObject
{
{ WeaviateConstants.CollectionPropertyName, JsonValue.Create(this._collectionName) },
{ WeaviateConstants.ReservedKeyPropertyName, keyObject },
{ WeaviateConstants.ReservedDataPropertyName, dataObject },
{ WeaviateConstants.ReservedVectorPropertyName, vectorObject },
{ WeaviateConstants.ReservedKeyPropertyName, keyNode },
{ WeaviateConstants.ReservedDataPropertyName, dataNode },
{ this._vectorPropertyName, vectorNode },
};
}

Expand Down Expand Up @@ -109,13 +136,25 @@ public JsonObject MapFromDataToStorageModel(Dictionary<string, object?> dataMode
// Populate vector properties.
if (options.IncludeVectors)
{
foreach (var property in this._model.VectorProperties)
if (this._hasNamedVectors)
{
foreach (var property in this._model.VectorProperties)
{
var jsonObject = storageModel[WeaviateConstants.ReservedVectorPropertyName] as JsonObject;

if (jsonObject is not null && jsonObject.TryGetPropertyValue(property.StorageName, out var vectorValue))
{
result.Add(property.ModelName, vectorValue.Deserialize(property.Type, this._jsonSerializerOptions));
}
}
}
else
{
var jsonObject = storageModel[WeaviateConstants.ReservedVectorPropertyName] as JsonObject;
var jsonNode = storageModel[WeaviateConstants.ReservedSingleVectorPropertyName];

if (jsonObject is not null && jsonObject.TryGetPropertyValue(property.StorageName, out var vectorValue))
if (jsonNode is not null)
{
result.Add(property.ModelName, vectorValue.Deserialize(property.Type, this._jsonSerializerOptions));
result.Add(this._model.VectorProperty.ModelName, jsonNode.Deserialize(this._model.VectorProperty.Type, this._jsonSerializerOptions));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,51 @@

namespace Microsoft.SemanticKernel.Connectors.Weaviate;

internal class WeaviateModelBuilder() : VectorStoreRecordJsonModelBuilder(s_modelBuildingOptions)
internal class WeaviateModelBuilder(bool hasNamedVectors) : VectorStoreRecordJsonModelBuilder(GetModelBuildingOptions(hasNamedVectors))
{
private static readonly VectorStoreRecordModelBuildingOptions s_modelBuildingOptions = new()
private static VectorStoreRecordModelBuildingOptions GetModelBuildingOptions(bool hasNamedVectors)
{
RequiresAtLeastOneVector = false,
SupportsMultipleKeys = false,
SupportsMultipleVectors = true,
return new()
{
RequiresAtLeastOneVector = false,
SupportsMultipleKeys = false,
SupportsMultipleVectors = hasNamedVectors,

SupportedKeyPropertyTypes = [typeof(Guid)],
SupportedDataPropertyTypes = s_supportedDataTypes,
SupportedEnumerableDataPropertyElementTypes = s_supportedDataTypes,
SupportedVectorPropertyTypes = s_supportedVectorTypes,
SupportedKeyPropertyTypes = [typeof(Guid)],
SupportedDataPropertyTypes = s_supportedDataTypes,
SupportedEnumerableDataPropertyElementTypes = s_supportedDataTypes,
SupportedVectorPropertyTypes = s_supportedVectorTypes,

UsesExternalSerializer = true,
ReservedKeyStorageName = WeaviateConstants.ReservedKeyPropertyName
};
UsesExternalSerializer = true,
ReservedKeyStorageName = WeaviateConstants.ReservedKeyPropertyName
};
}

private static readonly HashSet<Type> s_supportedDataTypes =
[
typeof(string),
typeof(bool),
typeof(bool?),
typeof(int),
typeof(int?),
typeof(long),
typeof(long?),
typeof(short),
typeof(short?),
typeof(byte),
typeof(byte?),
typeof(float),
typeof(float?),
typeof(double),
typeof(double?),
typeof(decimal),
typeof(decimal?),
typeof(DateTime),
typeof(DateTime?),
typeof(DateTimeOffset),
typeof(DateTimeOffset?),
typeof(Guid),
typeof(Guid?)
];

internal static readonly HashSet<Type> s_supportedVectorTypes =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ public IVectorStoreRecordCollection<TKey, TRecord> GetCollection<TKey, TRecord>(
{
VectorStoreRecordDefinition = vectorStoreRecordDefinition,
Endpoint = this._options.Endpoint,
ApiKey = this._options.ApiKey
ApiKey = this._options.ApiKey,
HasNamedVectors = this._options.HasNamedVectors
}) as IVectorStoreRecordCollection<TKey, TRecord>;

return recordCollection;
Expand All @@ -87,6 +88,9 @@ public async IAsyncEnumerable<string> ListCollectionNamesAsync([EnumeratorCancel
try
{
var httpResponse = await this._httpClient.SendAsync(request, HttpCompletionOption.ResponseContentRead, cancellationToken).ConfigureAwait(false);

httpResponse.EnsureSuccessStatusCode();

var httpResponseContent = await httpResponse.Content.ReadAsStringAsync(cancellationToken).ConfigureAwait(false);

collectionsResponse = JsonSerializer.Deserialize<WeaviateGetCollectionsResponse>(httpResponseContent)!;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ internal static class WeaviateVectorStoreCollectionCreateMapping
/// Maps record type properties to Weaviate collection schema for collection creation.
/// </summary>
/// <param name="collectionName">The name of the vector store collection.</param>
/// <param name="hasNamedVectors">Gets a value indicating whether the vectors in the store are named and multiple vectors are supported, or whether there is just a single unnamed vector in Weaviate collection.</param>
/// <param name="model">The model.</param>
/// <returns>Weaviate collection schema.</returns>
public static WeaviateCollectionSchema MapToSchema(string collectionName, VectorStoreRecordModel model)
public static WeaviateCollectionSchema MapToSchema(string collectionName, bool hasNamedVectors, VectorStoreRecordModel model)
{
var schema = new WeaviateCollectionSchema(collectionName);

Expand All @@ -38,16 +39,28 @@ public static WeaviateCollectionSchema MapToSchema(string collectionName, Vector
}

// Handle vector properties.
foreach (var property in model.VectorProperties)
if (hasNamedVectors)
{
schema.VectorConfigurations.Add(property.StorageName, new WeaviateCollectionSchemaVectorConfig
foreach (var property in model.VectorProperties)
{
VectorIndexType = MapIndexKind(property.IndexKind, property.StorageName),
VectorIndexConfig = new WeaviateCollectionSchemaVectorIndexConfig
schema.VectorConfigurations.Add(property.StorageName, new WeaviateCollectionSchemaVectorConfig
{
Distance = MapDistanceFunction(property.DistanceFunction, property.StorageName)
}
});
VectorIndexType = MapIndexKind(property.IndexKind, property.StorageName),
VectorIndexConfig = new WeaviateCollectionSchemaVectorIndexConfig
{
Distance = MapDistanceFunction(property.DistanceFunction, property.StorageName)
}
});
}
}
else
{
var vectorProperty = model.VectorProperty;
schema.VectorIndexType = MapIndexKind(vectorProperty.IndexKind, vectorProperty.StorageName);
schema.VectorIndexConfig = new WeaviateCollectionSchemaVectorIndexConfig
{
Distance = MapDistanceFunction(vectorProperty.DistanceFunction, vectorProperty.StorageName)
};
}

return schema;
Expand Down Expand Up @@ -110,7 +123,7 @@ private static string MapDistanceFunction(string? distanceFunction, string vecto
DistanceFunction.EuclideanSquaredDistance => EuclideanSquared,
DistanceFunction.Hamming => Hamming,
DistanceFunction.ManhattanDistance => Manhattan,
_ => throw new InvalidOperationException(
_ => throw new NotSupportedException(
$"Distance function '{distanceFunction}' on {nameof(VectorStoreRecordVectorProperty)} '{vectorPropertyName}' is not supported by the Weaviate VectorStore. " +
$"Supported distance functions: {string.Join(", ",
DistanceFunction.CosineDistance,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ internal static class WeaviateVectorStoreCollectionSearchMapping
/// <summary>
/// Maps vector search result to the format, which is processable by <see cref="WeaviateVectorStoreRecordMapper{TRecord}"/>.
/// </summary>
public static (JsonObject StorageModel, double? Score) MapSearchResult(JsonNode result, string scorePropertyName)
public static (JsonObject StorageModel, double? Score) MapSearchResult(
JsonNode result,
string scorePropertyName,
bool hasNamedVectors)
{
var additionalProperties = result[WeaviateConstants.AdditionalPropertiesPropertyName];

Expand All @@ -25,14 +28,18 @@ public static (JsonObject StorageModel, double? Score) MapSearchResult(JsonNode
_ => null
};

var vectorPropertyName = hasNamedVectors ?
WeaviateConstants.ReservedVectorPropertyName :
WeaviateConstants.ReservedSingleVectorPropertyName;

var id = additionalProperties?[WeaviateConstants.ReservedKeyPropertyName];
var vectors = additionalProperties?[WeaviateConstants.ReservedVectorPropertyName];
var vectors = additionalProperties?[vectorPropertyName];

var storageModel = new JsonObject
{
{ WeaviateConstants.ReservedKeyPropertyName, id?.DeepClone() },
{ WeaviateConstants.ReservedDataPropertyName, result?.DeepClone() },
{ WeaviateConstants.ReservedVectorPropertyName, vectors?.DeepClone() },
{ vectorPropertyName, vectors?.DeepClone() },
};

return (storageModel, score);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,11 @@ public sealed class WeaviateVectorStoreOptions
/// This parameter is optional because authentication may be disabled in local clusters for testing purposes.
/// </remarks>
public string? ApiKey { get; set; } = null;

/// <summary>
/// Gets or sets a value indicating whether the vectors in the store are named and multiple vectors are supported, or whether there is just a single unnamed vector in Weaviate collection.
/// Defaults to multiple named vectors.
/// <see href="https://weaviate.io/developers/weaviate/config-refs/schema/multi-vector"/>.
/// </summary>
public bool HasNamedVectors { get; set; } = true;
}
Loading
Loading