From d41a1fa22c2cedca199112d33fed869b5732e395 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 5 Jun 2024 15:16:00 +0100 Subject: [PATCH 01/48] .Net: Add Updated Memory Connector ADR and IMemoryRecordService interface with related classes. (#6364) Adding an ADR for evolving memory connectors. Related work item: #5887 ### Motivation and Context See included ADR for more motivation and context. ### Description See included ADR for more more info. ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- .../0045-updated-vector-store-design.md | 880 ++++++++++++++++++ .../IMemoryRecordService{TKey,TDataModel}.cs | 80 ++ .../MemoryRecordDataAttribute.cs | 25 + .../MemoryRecordKeyAttribute.cs | 15 + .../MemoryRecordVectorAttribute.cs | 15 + .../RecordOptions/DeleteRecordOptions.cs | 33 + .../Memory/RecordOptions/GetRecordOptions.cs | 39 + .../RecordOptions/UpsertRecordOptions.cs | 33 + 8 files changed, 1120 insertions(+) create mode 100644 docs/decisions/0045-updated-vector-store-design.md create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordDataAttribute.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordKeyAttribute.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordVectorAttribute.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs diff --git a/docs/decisions/0045-updated-vector-store-design.md b/docs/decisions/0045-updated-vector-store-design.md new file mode 100644 index 000000000000..f1c0d422e5f0 --- /dev/null +++ b/docs/decisions/0045-updated-vector-store-design.md @@ -0,0 +1,880 @@ +--- +# These are optional elements. Feel free to remove any of them. +status: proposed +contact: westey-m +date: 2024-06-05 +deciders: sergeymenshykh, markwallace, rbarreto, dmytrostruk, westey-m, matthewbolanos, eavanvalkenburg +consulted: stephentoub, dluc, ajcvickers, roji +informed: +--- + +# Updated Memory Connector Design + +## Context and Problem Statement + +Semantic Kernel has a collection of connectors to popular Vector databases e.g. Azure AI Search, Chroma, Milvus, ... +Each Memory connector implements a memory abstraction defined by Semantic Kernel and allows developers to easily integrate Vector databases into their applications. +The current abstractions are experimental and the purpose of this ADR is to progress the design of the abstractions so that they can graduate to non experimental status. + +### Problems with current design + +1. The `IMemoryStore` interface has four responsibilities with different cardinalities. +2. The `IMemoryStore` interface only supports a fixed schema for data storage, retrieval and search, which limits its usability by customers with existing data sets. +2. The `IMemoryStore` implementations are opinionated around key encoding / decoding and collection name sanitization, which limits its usability by customers with existing data sets. + +Responsibilities: + +|Functional Area|Cardinality|Significance to Semantic Kernel| +|-|-|-| +|Collection/Index create|An implementation per store type and model|Valuable when building a store and adding data| +|Collection/Index list names, exists and delete|An implementation per store type|Valuable when building a store and adding data| +|Data Storage and Retrieval|An implementation per store type|Valuable when building a store and adding data| +|Vector Search|An implementation per store type, model and search type|Valuable for many scenarios including RAG, finding contradictory facts based on user input, finding similar memories to merge, etc.| + + +### Memory Store Today +```cs +interface IMemoryStore +{ + // Collection / Index Management + Task CreateCollectionAsync(string collectionName, CancellationToken cancellationToken = default); + IAsyncEnumerable GetCollectionsAsync(CancellationToken cancellationToken = default); + Task DoesCollectionExistAsync(string collectionName, CancellationToken cancellationToken = default); + Task DeleteCollectionAsync(string collectionName, CancellationToken cancellationToken = default); + + // Data Storage and Retrieval + Task UpsertAsync(string collectionName, MemoryRecord record, CancellationToken cancellationToken = default); + IAsyncEnumerable UpsertBatchAsync(string collectionName, IEnumerable records, CancellationToken cancellationToken = default); + Task GetAsync(string collectionName, string key, bool withEmbedding = false, CancellationToken cancellationToken = default); + IAsyncEnumerable GetBatchAsync(string collectionName, IEnumerable keys, bool withVectors = false, CancellationToken cancellationToken = default); + Task RemoveAsync(string collectionName, string key, CancellationToken cancellationToken = default); + Task RemoveBatchAsync(string collectionName, IEnumerable keys, CancellationToken cancellationToken = default); + + // Vector Search + IAsyncEnumerable<(MemoryRecord, double)> GetNearestMatchesAsync( + string collectionName, + ReadOnlyMemory embedding, + int limit, + double minRelevanceScore = 0.0, + bool withVectors = false, + CancellationToken cancellationToken = default); + + Task<(MemoryRecord, double)?> GetNearestMatchAsync( + string collectionName, + ReadOnlyMemory embedding, + double minRelevanceScore = 0.0, + bool withEmbedding = false, + CancellationToken cancellationToken = default); +} +``` + +### Actions + +1. The `IMemoryStore` should be split into four different interfaces, one for each responsibility. +2. The **Data Storage and Retrieval** and **Vector Search** areas should allow typed access to data and support any schema that is currently available in the customer's data store. +3. The collection / index create functionality should allow developers to create their own implementations and support creating first party collections for built in functionality. Each implementation would be for a specific schema and data store type. +4. The collection / index list/exists/delete functionality should allow management of any collection regardless of schema. There should be one implementation for each data store type. +5. Remove opinionated behaviors from connectors. The opinionated behavior limits the ability of these connectors to be used with pre-existing vector databases. As far as possible these behaviors should be moved into decorators or be injectable. Examples of opinionated behaviors: + 1. The AzureAISearch connector encodes keys before storing and decodes them after retrieval since keys in Azure AI Search supports a limited set of characters. + 2. The AzureAISearch connector sanitizes collection names before using them, since Azure AI Search supports a limited set of characters. + 3. The Redis connector prepends the collection name on to the front of keys before storing records and also registers the collection name as a prefix for records to be indexed by the index. + +### Non-functional requirements for new connectors +1. Ensure all connectors are throwing the same exceptions consistently with data about the request made provided in a consistent manner. +2. Add consistent telemetry for all connectors. +3. As far as possible integration tests should be runnable on build server. + +### New Designs + +The separation between collection/index management and record management. + +```mermaid +--- +title: SK Collection/Index and record management +--- +classDiagram + note for IMemoryRecordService "Can manage records for any scenario" + note for IMemoryCollectionCreateService "Can create collections and\nindexes" + note for IMemoryCollectionUpdateService "Can retrieve/delete any collections and\nindexes" + + namespace SKAbstractions{ + class IMemoryCollectionCreateService{ + <> + +CreateCollection + } + + class IMemoryCollectionUpdateService{ + <> + +GetCollectionNames + +CollectionExists + +DeleteCollection + } + + class IMemoryRecordService~TModel~{ + <> + +Upsert(TModel record) string + +UpserBatch(TModel record) string + +Get(string key) TModel + +GetBatch(string[] keys) TModel[] + +Delete(string key) + +DeleteBatch(string[] keys) + } + } + + namespace AzureAIMemory{ + class AzureAISearchMemoryCollectionCreateService{ + } + + class AzureAISearchMemoryCollectionUpdateService{ + } + + class AzureAISearchMemoryRecordService{ + } + } + + namespace RedisMemory{ + class RedisMemoryCollectionCreateService{ + } + + class RedisMemoryCollectionUpdateService{ + } + + class RedisMemoryRecordService{ + } + } + + IMemoryCollectionCreateService <|-- AzureAISearchMemoryCollectionCreateService + IMemoryCollectionUpdateService <|-- AzureAISearchMemoryCollectionUpdateService + IMemoryRecordService <|-- AzureAISearchMemoryRecordService + + IMemoryCollectionCreateService <|-- RedisMemoryCollectionCreateService + IMemoryCollectionUpdateService <|-- RedisMemoryCollectionUpdateService + IMemoryRecordService <|-- RedisMemoryRecordService +``` + +How to use your own schema with core sk functionality. + +```mermaid +--- +title: Chat History Break Glass +--- +classDiagram + note for IMemoryRecordService "Can manage records\nfor any scenario" + note for IMemoryCollectionCreateService "Can create collections\nan dindexes" + note for IMemoryCollectionUpdateService "Can retrieve/delete any\ncollections and indexes" + note for CustomerHistoryMemoryCollectionCreateService "Creates history collections and indices\nusing Customer requirements" + note for CustomerHistoryMemoryRecordService "Decorator class for IMemoryRecordService that maps\nbetween the customer model to our model" + + namespace SKAbstractions{ + class IMemoryCollectionCreateService{ + <> + +CreateCollection + } + + class IMemoryCollectionUpdateService{ + <> + +GetCollectionNames + +CollectionExists + +DeleteCollection + } + + class IMemoryRecordService~TModel~{ + <> + +Upsert(TModel record) string + +Get(string key) TModel + +Delete(string key) string + } + + class ISemanticTextMemory{ + <> + +SaveInformationAsync() + +SaveReferenceAsync() + +GetAsync() + +DeleteAsync() + +SearchAsync() + +GetCollectionsAsync() + } + } + + namespace CustomerProject{ + class CustomerHistoryModel{ + +string text + +float[] vector + +Dictionary~string, string~ properties + } + + class CustomerHistoryMemoryCollectionCreateService{ + +CreateCollection + } + + class CustomerHistoryMemoryRecordService{ + -IMemoryRecordService~CustomerHistoryModel~ _store + +Upsert(ChatHistoryModel record) string + +Get(string key) ChatHistoryModel + +Delete(string key) string + } + } + + namespace SKCore{ + class SemanticTextMemory{ + -IMemoryRecordService~ChatHistoryModel~ _MemoryRecordService + -IMemoryCollectionService _collectionsService + -ITextEmbeddingGenerationService _embeddingGenerationService + } + + class ChatHistoryPlugin{ + -ISemanticTextMemory memory + } + + class ChatHistoryModel{ + +string message + +float[] embedding + +Dictionary~string, string~ metadata + } + } + + IMemoryCollectionCreateService <|-- CustomerHistoryMemoryCollectionCreateService + + IMemoryRecordService <|-- CustomerHistoryMemoryRecordService + IMemoryRecordService <.. CustomerHistoryMemoryRecordService + CustomerHistoryModel <.. CustomerHistoryMemoryRecordService + ChatHistoryModel <.. CustomerHistoryMemoryRecordService + + ChatHistoryModel <.. SemanticTextMemory + IMemoryRecordService <.. SemanticTextMemory + IMemoryCollectionCreateService <.. SemanticTextMemory + + ISemanticTextMemory <.. ChatHistoryPlugin +``` + +### Vector Store Cross Store support + +A comparison of the different ways in which stores implement storage capabilities to help drive decisions: + +|Feature|Azure AI Search|Weaviate|Redis|Chroma|FAISS|Pinecone|LLamaIndex|PostgreSql|Qdrant|Milvus| +|-|-|-|-|-|-|-|-|-|-|-| +|Get Item Support|Y|Y|Y|Y||Y||Y|Y|Y| +|Batch Operation Support|Y|Y|Y|Y||Y||||Y| +|Per Item Results for Batch Operations|Y|Y|Y|N||N||||| +|Keys of upserted records|Y|Y|N3|N3||N3||||Y| +|Keys of removed records|Y||N3|N||N||||N3| +|Retrieval field selection for gets|Y||Y4|P2||N||Y|Y|Y| +|Include/Exclude Embeddings for gets|P1|Y|Y4,1|Y||N||P1|Y|N| +|Failure reasons when batch partially fails|Y|Y|Y|N||N||||| +|Is Key separate from data|N|Y|Y|Y||Y||N|Y|N| +|Can Generate Ids|N|Y|N|N||Y||Y|N|Y| +|Can Generate Embedding|Not Available Via API yet|Y|N|Client Side Abstraction|||||N|| +|Index allows text search|Y|Y|Y|Y (On Metadata by default)||||Y (with TSVECTOR field)|Y|Y| +|Allows filtering|Y|Y|Y (on TAG)|Y (On Metadata by default)||[Y](https://docs.pinecone.io/guides/indexes/configure-pod-based-indexes#selective-metadata-indexing)||Y|Y|Y| +|Allows scalar index field setup|Y|Y|Y|N||Y|||Y|Y| +|Requires scalar index field setup to filter|Y|Y|Y|N||N (on by default for all)|||N|N (can filter without index)| +|Field Differentiation|Fields|Key, Props, Vectors|Key, Fields|Key, Documents, Metadata, Vectors||Key, Metadata, SparseValues, Vectors||Fields|Key, Props(Payload), Vectors|Fields| +|Index to Collection|1 to 1|1 to 1|1 to many|1 to 1|-|1 to 1|-|1 to 1|1 to 1|1 to 1| +|Id Type|String|UUID|string with collection name prefix|string||string|UUID|64Bit Int / UUID / ULID|64Bit Unsigned Int / UUID|Int64 / varchar| +|Supported Vector Types|[Collection(Edm.Byte) / Collection(Edm.Single) / Collection(Edm.Half) / Collection(Edm.Int16) / Collection(Edm.SByte)](https://learn.microsoft.com/en-us/rest/api/searchservice/supported-data-types)|float32|FLOAT32 and FLOAT64|||[Rust f32](https://docs.pinecone.io/troubleshooting/embedding-values-changed-when-upserted)||[single-precision (4 byte float) / half-precision (2 byte float) / binary (1bit) / sparse vectors (4 bytes)](https://github.com/pgvector/pgvector?tab=readme-ov-file#pgvector)|UInt8 / Float32|Binary / Float32 / Float16 / BFloat16 / SparseFloat| +|Supported Distance Functions|[Cosine / dot prod / euclidean dist (l2 norm)](https://learn.microsoft.com/en-us/azure/search/vector-search-ranking#similarity-metrics-used-to-measure-nearness)|[Cosine dist / dot prod / Squared L2 dist / hamming (num of diffs) / manhattan dist](https://weaviate.io/developers/weaviate/config-refs/distances#available-distance-metrics)|[Euclidean dist (L2) / Inner prod (IP) / Cosine dist](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/)|[Squared L2 / Inner prod / Cosine similarity](https://docs.trychroma.com/guides#changing-the-distance-function)||[cosine sim / euclidean dist / dot prod](https://docs.pinecone.io/reference/api/control-plane/create_index)||[L2 dist / inner prod / cosine dist / L1 dist / Hamming dist / Jaccard dist](https://github.com/pgvector/pgvector?tab=readme-ov-file#pgvector)|[Dot prod / Cosine sim / Euclidean dist (L2) / Manhattan dist](https://qdrant.tech/documentation/concepts/search/)|[Cosine sim / Euclidean dist / Inner Prod](https://milvus.io/docs/index-vector-fields.md)| +|Supported index types|[Exhaustive KNN / HNSW](https://learn.microsoft.com/en-us/azure/search/vector-search-ranking#algorithms-used-in-vector-search)|[HNSW / Flat / Dynamic](https://weaviate.io/developers/weaviate/config-refs/schema/vector-index)|[HNSW / FLAT](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/#create-a-vector-field)|[HNSW not configurable](https://cookbook.chromadb.dev/core/concepts/#vector-index-hnsw-index)||[PGA](https://www.pinecone.io/blog/hnsw-not-enough/)||[HNSW / IVFFlat](https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing)|[HNSW for dense](https://qdrant.tech/documentation/concepts/indexing/#vector-index)|

[In Memory: FLAT / IVF_FLAT / IVF_SQ8 / IVF_PQ / HNSW / SCANN](https://milvus.io/docs/index.md)

[On Disk: DiskANN](https://milvus.io/docs/disk_index.md)

[GPU: GPU_CAGRA / GPU_IVF_FLAT / GPU_IVF_PQ / GPU_BRUTE_FORCE](https://milvus.io/docs/gpu_index.md)

| + +Footnotes: +- P = Partial Support +- 1 Only if you have the schema, to select the appropriate fields. +- 2 Supports broad categories of fields only. +- 3 Id is required in request, so can be returned if needed. +- 4 No strong typed support when specifying field list. +- HNSW = Hierarchical Navigable Small World (HNSW performs an [approximate nearest neighbor (ANN)](https://learn.microsoft.com/en-us/azure/search/vector-search-overview#approximate-nearest-neighbors) search) +- KNN = k-nearest neighbors (performs a brute-force search that scans the entire vector space) +- IVFFlat = Inverted File with Flat Compression (This index type uses approximate nearest neighbor search (ANNS) to provide fast searches) +- Weaviate Dynamic = Starts as flat and switches to HNSW if the number of objects exceed a limit +- PGA = [Pinecone Graph Algorithm](https://www.pinecone.io/blog/hnsw-not-enough/) + +### Support for different mappers + +Mapping between data models and the storage models can also require custom logic depending on the type of data model and storage model involved. + +I'm therefore proposing that we allow mappers to be injectable for each `MemoryRecordService` instance. The interfaces for these would vary depending +on the storage models used by each vector store and any unique capabilities that each vector store may have, e.g. qdrant can operate in `single` or +`multiple named vector` modes, which means the mapper needs to know whether to set a single vector or fill a vector map. + +In addition to this, we should build first party mappers for each of the vector stores, which will cater for built in, generic models or use metadata to perform the mapping. + +### Support for different storage schemas + +The different stores vary in many ways around how data is organized. +- Some just store a record with fields on it, where fields can be a key or a data field or a vector and their type is determined at collection creation time. +- Others separate fields by type when interacting with the api, e.g. you have to specify a key explicitly, put metadata into a metadata dictionary and put vectors into a vector array. + +I'm proposing that we allow two ways in which to provide the information required to map data between the consumer data model and storage data model. +First is a set of configuration objects that capture the types of each field. Second would be a set of attributes that can be used to decorate the model itself +and can be converted to the configuration objects, allowing a single execution path. +Additional configuration properties can easily be added for each type of field as required, e.g. IsFilterable or IsFullTextSearchable, allowing us to also create an index from the provided configuration. + +I'm also proposing that even though similar attributes already exist in other systems, e.g. System.ComponentModel.DataAnnotations.KeyAttribute, we create our own. +We will likely require additional properties on all these attributes that are not currently supported on the existing attributes, e.g. whether a field is or +should be filterable. Requiring users to switch to new attributes later will be disruptive. + +Here is what the attributes would look like, plus a sample use case. + +```cs +sealed class MemoryRecordKeyAttribute : Attribute +{ +} +sealed class MemoryRecordDataAttribute : Attribute +{ + public bool HasEmbedding { get; set; } + public string EmbeddingPropertyName { get; set; } +} +sealed class MemoryRecordVectorAttribute : Attribute +{ +} + +public record HotelInfo( + [property: MemoryRecordKey, JsonPropertyName("hotel-id")] string HotelId, + [property: MemoryRecordData, JsonPropertyName("hotel-name")] string HotelName, + [property: MemoryRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbeddings"), JsonPropertyName("description")] string Description, + [property: MemoryRecordVector, JsonPropertyName("description-embeddings")] ReadOnlyMemory? DescriptionEmbeddings); +``` + +Here is what the configuration objects would look like. + +```cs +abstract class Field(string fieldName); + +sealed class KeyField(string fieldName): Field(fieldName) +{ +} +sealed class DataField(string fieldName): Field(fieldName) +{ + bool HasEmbedding; + string EmbeddingPropertyName; +} +sealed class VectorField(string fieldName): Field(fieldName) +{ +} + +sealed class MemoryRecordDefinition +{ + IReadOnlyList Fields; +} +``` + +### Notable method signature changes from existing interface + +All methods currently existing on IMemoryStore will be ported to new interfaces, but in places I am proposing that we make changes to improve +consistency and scalability. + +1. `RemoveAsync` and `RemoveBatchAsync` renamed to `DeleteAsync` and `DeleteBatchAsync`, since record are actually deleted, and this also matches the verb used for collections. +2. `GetCollectionsAsync` renamed to `GetCollectionNamesAsync`, since we are only retrieving names and no other information about collections. +3. `DoesCollectionExistAsync` renamed to `CollectionExistsAsync` since this is shorter and is more commonly used in other apis. + +### Comparison with other AI frameworks + +|Criteria|Current SK Implementation|Proposed SK Implementation|Spring AI|LlamaIndex|Langchain| +|-|-|-|-|-|-| +|Support for Custom Schemas|N|Y|N|N|N| +|Naming of store|MemoryStore|MemoryRecordService, MemoryCollectionCreateService, MemoryCollectionUpdateService|VectorStore|VectorStore|VectorStore| +|MultiVector support|N|Y|N|N|N| +|Support Multiple Collections via SDK params|Y|Y|N (via app config)|Y|Y| + +## Decision Drivers + +From GitHub Issue: +- API surface must be easy to use and intuitive +- Alignment with other patterns in the SK +- - Design must allow Memory Plugins to be easily instantiated with any connector +- Design must support all Kernel content types +- Design must allow for database specific configuration +- All NFR's to be production ready are implemented (see Roadmap for more detail) +- Basic CRUD operations must be supported so that connectors can be used in a polymorphic manner +- Official Database Clients must be used where available +- Dynamic database schema must be supported +- Dependency injection must be supported +- Azure-ML YAML format must be supported +- Breaking glass scenarios must be supported + +## Considered Questions + +1. Combined collection and record management vs separated. +2. Collection name and key value normalization in decorator or main class. +3. Collection name as method param or constructor param. +4. How to normalize ids across different vector stores where different types are supported. +5. Store Interface/Class Naming + +### Question 1: Combined collection and record management vs separated. + +#### Option 1 - Combined collection and record management + +```cs +interface IMemoryRecordService +{ + Task CreateCollectionAsync(CollectionCreateConfig collectionConfig, CancellationToken cancellationToken = default); + IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default); + Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default); + Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default); + + Task UpsertAsync(TDataModel data, CancellationToken cancellationToken = default); + IAsyncEnumerable UpsertBatchAsync(IEnumerable dataSet, CancellationToken cancellationToken = default); + Task GetAsync(string key, bool withEmbedding = false, CancellationToken cancellationToken = default); + IAsyncEnumerable GetBatchAsync(IEnumerable keys, bool withVectors = false, CancellationToken cancellationToken = default); + Task DeleteAsync(string key, CancellationToken cancellationToken = default); + Task DeleteBatchAsync(IEnumerable keys, CancellationToken cancellationToken = default); +} + +class AzureAISearchMemoryRecordService( + Azure.Search.Documents.Indexes.SearchIndexClient client, + Schema schema): IMemoryRecordService; + +class WeaviateMemoryRecordService( + WeaviateClient client, + Schema schema): IMemoryRecordService; + +class RedisMemoryRecordService( + StackExchange.Redis.IDatabase database, + Schema schema): IMemoryRecordService; +``` + +#### Option 2 - Separated collection and record management with opinionated create implementations + +```cs + +interface IMemoryCollectionService +{ + virtual Task CreateChatHistoryCollectionAsync(string name, CancellationToken cancellationToken = default); + virtual Task CreateSemanticCacheCollectionAsync(string name, CancellationToken cancellationToken = default); + + IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default); + Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default); + Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default); +} + +class AzureAISearchMemoryCollectionService: IMemoryCollectionService; +class RedisMemoryCollectionService: IMemoryCollectionService; +class WeaviateMemoryCollectionService: IMemoryCollectionService; + +// Customers can inherit from our implementations and replace just the creation scenarios to match their schemas. +class CustomerCollectionsService: AzureAISearchMemoryCollectionService, IMemoryCollectionService; + +// We can also create implementations that create indices based on an MLIndex specification. +class MLIndexAzureAISearchMemoryCollectionService(MLIndex mlIndexSpec): AzureAISearchMemoryCollectionService, IMemoryCollectionService; + +interface IMemoryRecordService +{ + Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + Task DeleteAsync(string key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); + Task UpsertAsync(TDataModel record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); +} + +class AzureAISearchMemoryRecordService(): IMemoryRecordService; +``` + +#### Option 3 - Separated collection and record management with collection create separate from other operations. + +Vector store same as option 2 so not repeated for brevity. + +```cs + +interface IMemoryCollectionCreateService +{ + virtual Task CreateCollectionAsync(string name, CancellationToken cancellationToken = default); +} + +// Implement a generic version of create that takes a configuration that should work for 80% of cases. +class AzureAISearchConfiguredCollectionCreateService(CollectionCreateConfig collectionConfig): IMemoryCollectionCreateService; + +// Allow custom implementations of create for break glass scenarios for outside the 80% case. +class AzureAISearchChatHistoryCollectionCreateService: IMemoryCollectionCreateService; +class AzureAISearchSemanticCacheCollectionCreateService: IMemoryCollectionCreateService; + +// Customers can create their own creation scenarios to match their schemas, but can continue to use our get, does exist and delete class. +class CustomerChatHistoryCollectionCreateService: IMemoryCollectionCreateService; + +interface IMemoryCollectionUpdateService +{ + IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default); + Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default); + Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default); +} + +class AzureAISearchMemoryCollectionUpdateService: IMemoryCollectionUpdateService; +class RedisMemoryCollectionUpdateService: IMemoryCollectionUpdateService; +class WeaviateMemoryCollectionUpdateService: IMemoryCollectionUpdateService; + +``` + +#### Option 4 - Separated collection and record management with collection create separate from other operations, with collection management aggregation class on top. + +Variation on option 3. + +```cs + +interface IMemoryCollectionCreateService +{ + virtual Task CreateCollectionAsync(string name, CancellationToken cancellationToken = default); +} + +interface IMemoryCollectionUpdateService +{ + IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default); + Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default); + Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default); +} + +// DB Specific Update implementations +class AzureAISearchMemoryCollectionUpdateService: IMemoryCollectionUpdateService; +class RedisMemoryCollectionUpdateService: IMemoryCollectionUpdateService; + +// Combined Create + Update Interface +interface IMemoryCollectionService: IMemoryCollectionCreateService, IMemoryCollectionUpdateService {} + +// Base abstract class that forwards non-create operations to provided service. +abstract class MemoryCollectionService(IMemoryCollectionUpdateService collectionsUpdateService): IMemoryCollectionService +{ + public abstract Task CreateCollectionAsync(string name, CancellationToken cancellationToken = default); + public IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default) { return collectionsUpdateService.ListCollectionNamesAsync(cancellationToken); } + public Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default) { return collectionsUpdateService.CollectionExistsAsync(name, cancellationToken); } + public Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default) { return collectionsUpdateService.DeleteCollectionAsync(name, cancellationToken); } +} + +// Collections service implementations, that inherit from base class, and just adds the different creation implementations. +class AzureAISearchChatHistoryMemoryService(AzureAISearchMemoryCollectionUpdateService updateService): MemoryCollectionService(updateService); +class AzureAISearchSemanticCacheMemoryService(AzureAISearchMemoryCollectionUpdateService updateService): MemoryCollectionService(updateService); +class AzureAISearchMLIndexMemoryService(AzureAISearchMemoryCollectionUpdateService updateService): MemoryCollectionService(updateService); + +// Customer collections service implementation, that uses the base Azure AI Search implementation for get, doesExist and delete, but adds it's own creation. +class ContosoProductsMemoryCollectionService(AzureAISearchMemoryCollectionUpdateService updateService): MemoryCollectionService(updateService); + +``` + +#### Option 5 - Separated collection and record management with collection create separate from other operations, with overall aggregation class on top. + +Same as option 3 / 4, plus: + +```cs + +interface IMemoryService : IMemoryCollectionCreateService, IMemoryCollectionService, IMemoryRecordService +{ +} + +// Create a static factory that produces one of these, so only the interface is public, not the class. +internal class CombinedMemoryService(IMemoryCollectionCreateService creation, IMemoryCollectionService collections, IMemoryRecordService records): IMemoryService +{ +} + +``` + +#### Decision Outcome + +Option 1 is problematic on its own, since we have to allow consumers to create custom implementations of collection create for break glass scenarios. With +a single interface like this, it will require them to implement many methods that they do not want to change. Options 4 & 5, gives us more flexibility while +still preserving the ease of use of an aggregated interface as described in Option 1. + +Option 2 doesn't give us the flexbility we need for break glass scenarios, since it only allows certain types of collections to be created. It also means +that each time a new collection type is required it introduces a breaking change, so it is not a viable option. + +Since collection create and configuration and the possible options vary considerable across different database types, we will need to support an easy +to use break glass scenario for collection creation. While we would be able to develop a basic configurable create option, for complex create scenarios +users will need to implement their own. We will also need to support multiple create implementations out of the box, e.g. a configuration based option using +our own configuration, create implementations that re-create the current model for backward compatibility, create implementations that use other configuration +as input, e.g. Azure-ML YAML. Therefore separating create, which may have many implementations, from exists, list and delete, which requires only a single implementation per database type is useful. +Option 3 provides us this separation, but Option 4 + 5 builds on top of this, and allows us to combine different implementations together for simpler +consumption. + +Chosen option: 4 + 5. + +- Collection create, configuration and supported options vary considerably across different schemas and database types. +- Collection list, exists and delete is the same across different schemas, but varies by database type. +- Vector storage, even with custom schemas can be supported using a single implementation per database type. +- We will need to support multiple collection create service implementations per store type, a single collection update service implementation per store type, and a single vector store implementation per store type. +- At the same time we can layer interfaces on top that allow easy combined access to collection and record management. + + +### Question 2: Collection name and key value normalization in store, decorator or via injection. + +#### Option 1 - Normalization in main record service + +- Pros: Simple +- Cons: The normalization needs to vary separately from the record service, so this will not work + +```cs + public class AzureAISearchMemoryRecordService : IMemoryRecordService + { + ... + + // On input. + var normalizedCollectionName = this.NormalizeCollectionName(collectionName); + var encodedId = AzureAISearchMemoryRecord.EncodeId(key); + + ... + + // On output. + DecodeId(this.Id) + + ... + } +``` + +#### Option 2 - Normalization in decorator + +- Pros: Allows normalization to vary separately from the record service. +- Pros: No code executed when no normalization required. +- Pros: Easy to package matching encoders/decoders together. +- Pros: Easier to obsolete encoding/normalization as a concept. +- Cons: Not a major con, but need to implement the full MemoryRecordService interface, instead of e.g. just providing the two translation functions, if we go with option 3. +- Cons: Hard to have a generic implementation that can work with any model, without either changing the data in the provided object on upsert or doing cloning in an expensive way. + +```cs + new KeyNormalizingAISearchMemoryRecordService( + "keyField", + new AzureAISearchMemoryRecordService(...)); +``` + +#### Option 3 - Normalization via optional function parameters to record service constructor + +- Pros: Allows normalization to vary separately from the record service. +- Pros: No need to implement the full MemoryRecordService interface. +- Pros: Can modify values on serialization without changing the incoming record, if supported by DB SDK. +- Cons: Harder to package matching encoders/decoders together. + +```cs +public class AzureAISearchMemoryRecordService(StoreOptions options); + +public class StoreOptions +{ + public Func? EncodeKey { get; init; } + public Func? DecodeKey { get; init; } + public Func? SanitizeCollectionName { get; init; } +} +``` + +#### Option 4 - Normalization via custom mapper + +If developer wants to change any values they can do so by creating a custom mapper. + +- Cons: Developer needs to implement a mapper if they want to do normalization. +- Cons: Developer cannot change collection name as part of the mapping. +- Pros: No new extension points required to support normalization. +- Pros: Developer can change any field in the record. + +#### Decision Outcome + +Chosen option 3, since it is similar to how we are doing mapper injection and would also work well in python. + +Option 1 won't work because if e.g. the data was written using another tool, it may be unlikely that it was encoded using the same mechanism as supported here +and therefore this functionality may not be appropriate. The developer should have the ability to not use this functionality or +provide their own encoding / decoding behavior. + +### Question 3: Collection name as method param or via constructor or either + +#### Option 1 - Collection name as method param + +```cs +public class MyMemoryStore() +{ + public async Task GetAsync(string collectionName, string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); +} +``` + +#### Option 2 - Collection name via constructor + +```cs +public class MyMemoryStore(string defaultCollectionName) +{ + public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); +} +``` + +#### Option 3 - Collection name via either + +```cs +public class MyMemoryStore(string defaultCollectionName) +{ + public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); +} + +public class GetRecordOptions +{ + public string CollectionName { get; init; }; +} +``` + +#### Decision Outcome + +Chosen option 3, to allow developers more choice. + +### Question 4: How to normalize ids across different vector stores where different types are supported. + +#### Option 1 - Take a string and convert to a type that was specified on the constructor + +```cs +public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +{ + var convertedKey = this.keyType switch + { + KeyType.Int => int.parse(key), + KeyType.GUID => Guid.parse(key) + } + + ... +} +``` + +- No additional overloads are required over time so no breaking changes. +- Most data types can easily be represented in string form and converted to/from it. + +#### Option 2 - Take an object and cast to a type that was specified on the constructor. + +```cs +public async Task GetAsync(object key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +{ + var convertedKey = this.keyType switch + { + KeyType.Int => key as int, + KeyType.GUID => key as Guid + } + + if (convertedKey is null) + { + throw new InvalidOperationException($"The provided key must be of type {this.keyType}") + } + + ... +} + +``` + +- No additional overloads are required over time so no breaking changes. +- Any data types can be represented as object. + +#### Option 3 - Multiple overloads where we convert where possible, throw when not possible. + +```cs +public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +{ + var convertedKey = this.keyType switch + { + KeyType.Int => int.Parse(key), + KeyType.String => key, + KeyType.GUID => Guid.Parse(key) + } +} +public async Task GetAsync(int key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +{ + var convertedKey = this.keyType switch + { + KeyType.Int => key, + KeyType.String => key.ToString(), + KeyType.GUID => throw new InvalidOperationException($"The provided key must be convertible to a GUID.") + } +} +public async Task GetAsync(GUID key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +{ + var convertedKey = this.keyType switch + { + KeyType.Int => throw new InvalidOperationException($"The provided key must be convertible to an int.") + KeyType.String => key.ToString(), + KeyType.GUID => key + } +} +``` + +- Additional overloads are required over time if new key types are found on new connectors, causing breaking changes. +- You can still call a method that causes a runtime error, when the type isn't supported. + +#### Option 4 - Add key type as generic to interface + +```cs +interface IMemoryRecordService +{ + Task GetAsync(TKeyType key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); +} + +class AzureAISearchMemoryRecordService: IMemoryRecordService +{ + public AzureAISearchMemoryRecordService() + { + // Check if TKeyType matches the type of the field marked as a key on TDataModel and throw if they don't match. + // Also check if keytype is one of the allowed types for Azure AI Search and throw if it isn't. + } +} + +``` + +- No runtime issues after construction. +- More cumbersome interface. + +#### Decision Outcome + +Chosen option 4, since it is forwards compatible with any complex key types we may need to support but still allows +each implementation to hardcode allowed key types if the vector db only supports certain key types. + +### Question 5: Store Interface/Class Naming. + +#### Option 1 - VectorDB + +```cs +IVectorDBRecordService +IVectorDBCollectionUpdateService +IVectorDBCollectionCreateService +``` + +#### Option 2 - Memory + +```cs +IMemoryRecordService +IMemoryCollectionUpdateService +IMemoryCollectionCreateService +``` + +#### Decision Outcome + +Chosen option 2. Memory constrains the scope of these classes to be for memory storage and retrieval in the context of an AI system. Since almost all +databases are currently adding vector support, including relational, it's important to clarify the purpose of these abstractions compared to others. +Here, the purpose is not to provide generic database access to all databases that support vectors, but rather for memory storage and retrieval. The +concern with using a term such as VectorDB is that it opens up the scope of the feature set to include anything that stores a vector, without +constraining it to any specific purpose. + +## Roadmap + +### Record Management + +1. Release RecordService public interface and implementations for Azure AI Search, Qdrant and Redis. +2. Add support for registering record services with SK container to allow automatic dependency injection. +3. Add RecordService implementations for remaining stores. + +### Collection Management + +4. Release Collection Management public interface and implementations for Azure AI Search, Qdrant and Redis. +5. Add support for registering collection management with SK container to allow automatic dependency injection. +6. Add Collection Management implementations for remaining stores. + +### Collection Creation + +7. Release Collection Creation public interface. +8. Create cross db collection creation config that supports common functionality, and per daatabase implementation that supports this configuration. +9. Add support for registering collection creation with SK container to allow automatic dependency injection. + +### First Party Memory Features and well known model support + +10. Add model and mappers for legacy SK MemoryStore interface, so that consumers using this has an upgrade path to the new memory storage stack. +11. Add model and mappers for popular loader systems, like Kernel Memory or LlamaIndex. +11. Explore adding first party implementations for common scenarios, e.g. semantic caching. Specfics TBD. + +### Cross Cutting Requirements + +Need the following for all features: + +- Unit tests +- Integration tests +- Logging / Telemetry +- Common Exception Handling +- Samples, including: + - Usage scenario for collection and record management using custom model and configured collection creation. + - A simple consumption example like semantic caching, specfics TBD. + - Adding your own collection creation implementation. + - Adding your own custom model mapper. +- Documentation, including: + - How to create models and annotate/describe them to use with the storage system. + - How to define configuration for creating collections using common create implementation. + - How to use record and collection management apis. + - How to implement your own collection create implementation for break glass scenario. + - How to implement your own mapper. + - How to upgrade from the current storage system to the new one. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs new file mode 100644 index 000000000000..9419eef1e81c --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// An interface for adding, updating, deleting and retrieving records from a memory store. +/// +/// The data type of the record key. +/// The data model to use for adding, updating and retrieving data from storage. +[Experimental("SKEXP0001")] +public interface IMemoryRecordService + where TDataModel : class +{ + /// + /// Gets a memory record from the data store. Does not guarantee that the collection exists. + /// Throws if the record is not found. + /// + /// The unique id associated with the memory record to get. + /// Optional options for retrieving the record. + /// The to monitor for cancellation requests. The default is . + /// The memory record if found, otherwise null. + Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Gets a batch of memory records from the data store. Does not guarantee that the collection exists. + /// Throws if any of the records are not found. + /// Gets will be made in a single request or in a single parallel batch depending on the available store functionality. + /// + /// The unique ids associated with the memory record to get. + /// Optional options for retrieving the records. + /// The to monitor for cancellation requests. The default is . + /// The vecmemorytor records associated with the unique keys provided. + IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Deletes a memory record from the data store. Does not guarantee that the collection exists. + /// + /// The unique id associated with the memory record to remove. + /// Optional options for removing the record. + /// The to monitor for cancellation requests. The default is . + /// The unique identifier for the memory record. + Task DeleteAsync(TKey key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Deletes a batch of memory records from the data store. Does not guarantee that the collection exists. + /// Deletes will be made in a single request or in a single parallel batch depending on the available store functionality. + /// + /// The unique ids associated with the memory records to remove. + /// Optional options for removing the records. + /// The to monitor for cancellation requests. The default is . + Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Upserts a memory record into the data store. Does not guarantee that the collection exists. + /// If the record already exists, it will be updated. + /// If the record does not exist, it will be created. + /// + /// The memory record to upsert. + /// Optional options for upserting the record. + /// The to monitor for cancellation requests. The default is . + /// The unique identifier for the memory record. + Task UpsertAsync(TDataModel record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Upserts a group of memory records into the data store. Does not guarantee that the collection exists. + /// If the record already exists, it will be updated. + /// If the record does not exist, it will be created. + /// Upserts will be made in a single request or in a single parallel batch depending on the available store functionality. + /// + /// The memory records to upsert. + /// Optional options for upserting the records. + /// The to monitor for cancellation requests. The default is . + /// The unique identifiers for the memory records. + IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordDataAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordDataAttribute.cs new file mode 100644 index 000000000000..fbcfd7087722 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordDataAttribute.cs @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Attribute to mark a property on a vector model class as the data that is being indexed. +/// +[Experimental("SKEXP0001")] +[AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] +public sealed class MemoryRecordDataAttribute : Attribute +{ + /// + /// Gets or sets a value indicating whether this data field has an associated embedding field. + /// + /// Defaults to + public bool HasEmbedding { get; init; } + + /// + /// Gets or sets the name of the property that contains the embedding for this data field. + /// + public string? EmbeddingPropertyName { get; init; } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordKeyAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordKeyAttribute.cs new file mode 100644 index 000000000000..72c1575acff4 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordKeyAttribute.cs @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Attribute to mark a property on a class as the key under which data is stored in a vector store. +/// +[Experimental("SKEXP0001")] +[AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] +public sealed class MemoryRecordKeyAttribute : Attribute +{ +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordVectorAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordVectorAttribute.cs new file mode 100644 index 000000000000..c09b47f5ddea --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordVectorAttribute.cs @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Attribute to mark a property on a vector model class as the vector. +/// +[Experimental("SKEXP0001")] +[AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] +public sealed class MemoryRecordVectorAttribute : Attribute +{ +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs new file mode 100644 index 000000000000..d34f921975d5 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Optional options when calling . +/// +[Experimental("SKEXP0001")] +public class DeleteRecordOptions +{ + /// + /// Initializes a new instance of the class. + /// + public DeleteRecordOptions() + { + } + + /// + /// Initializes a new instance of the class by cloning the given options. + /// + /// The options to clone + public DeleteRecordOptions(DeleteRecordOptions source) + { + this.CollectionName = source.CollectionName; + } + + /// + /// Get or sets an optional collection name to use for this operation that is different to the default. + /// + public string? CollectionName { get; init; } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs new file mode 100644 index 000000000000..6f284fb4328f --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Optional options when calling . +/// +[Experimental("SKEXP0001")] +public class GetRecordOptions +{ + /// + /// Initializes a new instance of the class. + /// + public GetRecordOptions() + { + } + + /// + /// Initializes a new instance of the class by cloning the given options. + /// + /// The options to clone + public GetRecordOptions(GetRecordOptions source) + { + this.CollectionName = source.CollectionName; + this.IncludeVectors = source.IncludeVectors; + } + + /// + /// Get or sets an optional collection name to use for this operation that is different to the default. + /// + public string? CollectionName { get; init; } + + /// + /// Get or sets a value indicating whether to include vectors in the retrieval result. + /// + public bool IncludeVectors { get; init; } = false; +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs new file mode 100644 index 000000000000..59bcdadbfa15 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Optional options when calling . +/// +[Experimental("SKEXP0001")] +public class UpsertRecordOptions +{ + /// + /// Initializes a new instance of the class. + /// + public UpsertRecordOptions() + { + } + + /// + /// Initializes a new instance of the class by cloning the given options. + /// + /// The options to clone + public UpsertRecordOptions(UpsertRecordOptions source) + { + this.CollectionName = source.CollectionName; + } + + /// + /// Get or sets an optional collection name to use for this operation that is different to the default. + /// + public string? CollectionName { get; init; } +} From cc59b040130076997d37d385580856d511888a22 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 13 Jun 2024 16:20:45 +0100 Subject: [PATCH 02/48] .Net: Add new Azure AI Search memory connector implementation (#6585) ### Motivation and Context Adding a reference implementation using Azure AI Search for the new memory connector record interface. ### Description For more information see the included ADR document. Issue: #6519 ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- .../0045-updated-vector-store-design.md | 159 ++++++- .../AzureAISearchMemoryRecordMapperType.cs | 21 + .../AzureAISearchMemoryRecordService.cs | 391 ++++++++++++++++++ ...AzureAISearchMemoryRecordServiceOptions.cs | 42 ++ .../AzureAISearchMemoryCollectionFixture.cs | 10 + .../AzureAISearchMemoryFixture.cs | 277 +++++++++++++ .../AzureAISearchMemoryRecordServiceTests.cs | 300 ++++++++++++++ .../IntegrationTests/IntegrationTests.csproj | 1 + .../Memory/AzureAISearchConfiguration.cs | 14 + .../Memory/AzureAISearchSetup.psm1 | 74 ++++ .../Schema/VectorStoreModelPropertyReader.cs | 214 ++++++++++ .../Memory/IMemoryRecordMapper.cs | 27 ++ .../IMemoryRecordService{TKey,TDataModel}.cs | 14 +- .../Memory/MemoryDataModelMappingException.cs | 35 ++ .../MemoryServiceCommandExecutionException.cs | 35 ++ .../MemoryRecordDataProperty.cs | 43 ++ .../MemoryRecordDefinition.cs | 21 + .../MemoryRecordKeyProperty.cs | 30 ++ .../RecordDefinition/MemoryRecordProperty.cs | 26 ++ .../MemoryRecordVectorProperty.cs | 30 ++ 20 files changed, 1746 insertions(+), 18 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordMapperType.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordService.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordServiceOptions.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryCollectionFixture.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryFixture.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryRecordServiceTests.cs create mode 100644 dotnet/src/IntegrationTests/TestSettings/Memory/AzureAISearchConfiguration.cs create mode 100644 dotnet/src/IntegrationTests/TestSettings/Memory/AzureAISearchSetup.psm1 create mode 100644 dotnet/src/InternalUtilities/src/Schema/VectorStoreModelPropertyReader.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordMapper.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/MemoryDataModelMappingException.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/MemoryServiceCommandExecutionException.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDataProperty.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDefinition.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordKeyProperty.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordProperty.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordVectorProperty.cs diff --git a/docs/decisions/0045-updated-vector-store-design.md b/docs/decisions/0045-updated-vector-store-design.md index f1c0d422e5f0..876e0c7b8664 100644 --- a/docs/decisions/0045-updated-vector-store-design.md +++ b/docs/decisions/0045-updated-vector-store-design.md @@ -247,7 +247,7 @@ classDiagram ISemanticTextMemory <.. ChatHistoryPlugin ``` -### Vector Store Cross Store support +### Vector Store Cross Store support - General Features A comparison of the different ways in which stores implement storage capabilities to help drive decisions: @@ -264,16 +264,6 @@ A comparison of the different ways in which stores implement storage capabilitie |Is Key separate from data|N|Y|Y|Y||Y||N|Y|N| |Can Generate Ids|N|Y|N|N||Y||Y|N|Y| |Can Generate Embedding|Not Available Via API yet|Y|N|Client Side Abstraction|||||N|| -|Index allows text search|Y|Y|Y|Y (On Metadata by default)||||Y (with TSVECTOR field)|Y|Y| -|Allows filtering|Y|Y|Y (on TAG)|Y (On Metadata by default)||[Y](https://docs.pinecone.io/guides/indexes/configure-pod-based-indexes#selective-metadata-indexing)||Y|Y|Y| -|Allows scalar index field setup|Y|Y|Y|N||Y|||Y|Y| -|Requires scalar index field setup to filter|Y|Y|Y|N||N (on by default for all)|||N|N (can filter without index)| -|Field Differentiation|Fields|Key, Props, Vectors|Key, Fields|Key, Documents, Metadata, Vectors||Key, Metadata, SparseValues, Vectors||Fields|Key, Props(Payload), Vectors|Fields| -|Index to Collection|1 to 1|1 to 1|1 to many|1 to 1|-|1 to 1|-|1 to 1|1 to 1|1 to 1| -|Id Type|String|UUID|string with collection name prefix|string||string|UUID|64Bit Int / UUID / ULID|64Bit Unsigned Int / UUID|Int64 / varchar| -|Supported Vector Types|[Collection(Edm.Byte) / Collection(Edm.Single) / Collection(Edm.Half) / Collection(Edm.Int16) / Collection(Edm.SByte)](https://learn.microsoft.com/en-us/rest/api/searchservice/supported-data-types)|float32|FLOAT32 and FLOAT64|||[Rust f32](https://docs.pinecone.io/troubleshooting/embedding-values-changed-when-upserted)||[single-precision (4 byte float) / half-precision (2 byte float) / binary (1bit) / sparse vectors (4 bytes)](https://github.com/pgvector/pgvector?tab=readme-ov-file#pgvector)|UInt8 / Float32|Binary / Float32 / Float16 / BFloat16 / SparseFloat| -|Supported Distance Functions|[Cosine / dot prod / euclidean dist (l2 norm)](https://learn.microsoft.com/en-us/azure/search/vector-search-ranking#similarity-metrics-used-to-measure-nearness)|[Cosine dist / dot prod / Squared L2 dist / hamming (num of diffs) / manhattan dist](https://weaviate.io/developers/weaviate/config-refs/distances#available-distance-metrics)|[Euclidean dist (L2) / Inner prod (IP) / Cosine dist](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/)|[Squared L2 / Inner prod / Cosine similarity](https://docs.trychroma.com/guides#changing-the-distance-function)||[cosine sim / euclidean dist / dot prod](https://docs.pinecone.io/reference/api/control-plane/create_index)||[L2 dist / inner prod / cosine dist / L1 dist / Hamming dist / Jaccard dist](https://github.com/pgvector/pgvector?tab=readme-ov-file#pgvector)|[Dot prod / Cosine sim / Euclidean dist (L2) / Manhattan dist](https://qdrant.tech/documentation/concepts/search/)|[Cosine sim / Euclidean dist / Inner Prod](https://milvus.io/docs/index-vector-fields.md)| -|Supported index types|[Exhaustive KNN / HNSW](https://learn.microsoft.com/en-us/azure/search/vector-search-ranking#algorithms-used-in-vector-search)|[HNSW / Flat / Dynamic](https://weaviate.io/developers/weaviate/config-refs/schema/vector-index)|[HNSW / FLAT](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/#create-a-vector-field)|[HNSW not configurable](https://cookbook.chromadb.dev/core/concepts/#vector-index-hnsw-index)||[PGA](https://www.pinecone.io/blog/hnsw-not-enough/)||[HNSW / IVFFlat](https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing)|[HNSW for dense](https://qdrant.tech/documentation/concepts/indexing/#vector-index)|

[In Memory: FLAT / IVF_FLAT / IVF_SQ8 / IVF_PQ / HNSW / SCANN](https://milvus.io/docs/index.md)

[On Disk: DiskANN](https://milvus.io/docs/disk_index.md)

[GPU: GPU_CAGRA / GPU_IVF_FLAT / GPU_IVF_PQ / GPU_BRUTE_FORCE](https://milvus.io/docs/gpu_index.md)

| Footnotes: - P = Partial Support @@ -281,12 +271,41 @@ Footnotes: - 2 Supports broad categories of fields only. - 3 Id is required in request, so can be returned if needed. - 4 No strong typed support when specifying field list. + +### Vector Store Cross Store support - Fields, types and indexing + +|Feature|Azure AI Search|Weaviate|Redis|Chroma|FAISS|Pinecone|LLamaIndex|PostgreSql|Qdrant|Milvus| +|-|-|-|-|-|-|-|-|-|-|-| +|Field Differentiation|Fields|Key, Props, Vectors|Key, Fields|Key, Document, Metadata, Vector||Key, Metadata, SparseValues, Vector||Fields|Key, Props(Payload), Vectors|Fields| +|Multiple Vector per record support|Y|Y|Y|N||[N](https://docs.pinecone.io/guides/data/upsert-data#upsert-records-with-metadata)||Y|Y|Y| +|Index to Collection|1 to 1|1 to 1|1 to many|1 to 1|-|1 to 1|-|1 to 1|1 to 1|1 to 1| +|Id Type|String|UUID|string with collection name prefix|string||string|UUID|64Bit Int / UUID / ULID|64Bit Unsigned Int / UUID|Int64 / varchar| +|Supported Vector Types|[Collection(Edm.Byte) / Collection(Edm.Single) / Collection(Edm.Half) / Collection(Edm.Int16) / Collection(Edm.SByte)](https://learn.microsoft.com/en-us/rest/api/searchservice/supported-data-types)|float32|FLOAT32 and FLOAT64|||[Rust f32](https://docs.pinecone.io/troubleshooting/embedding-values-changed-when-upserted)||[single-precision (4 byte float) / half-precision (2 byte float) / binary (1bit) / sparse vectors (4 bytes)](https://github.com/pgvector/pgvector?tab=readme-ov-file#pgvector)|UInt8 / Float32|Binary / Float32 / Float16 / BFloat16 / SparseFloat| +|Supported Distance Functions|[Cosine / dot prod / euclidean dist (l2 norm)](https://learn.microsoft.com/en-us/azure/search/vector-search-ranking#similarity-metrics-used-to-measure-nearness)|[Cosine dist / dot prod / Squared L2 dist / hamming (num of diffs) / manhattan dist](https://weaviate.io/developers/weaviate/config-refs/distances#available-distance-metrics)|[Euclidean dist (L2) / Inner prod (IP) / Cosine dist](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/)|[Squared L2 / Inner prod / Cosine similarity](https://docs.trychroma.com/guides#changing-the-distance-function)||[cosine sim / euclidean dist / dot prod](https://docs.pinecone.io/reference/api/control-plane/create_index)||[L2 dist / inner prod / cosine dist / L1 dist / Hamming dist / Jaccard dist (NB: Specified at query time, not index creation time)](https://github.com/pgvector/pgvector?tab=readme-ov-file#pgvector)|[Dot prod / Cosine sim / Euclidean dist (L2) / Manhattan dist](https://qdrant.tech/documentation/concepts/search/)|[Cosine sim / Euclidean dist / Inner Prod](https://milvus.io/docs/index-vector-fields.md)| +|Supported index types|[Exhaustive KNN / HNSW](https://learn.microsoft.com/en-us/azure/search/vector-search-ranking#algorithms-used-in-vector-search)|[HNSW / Flat / Dynamic](https://weaviate.io/developers/weaviate/config-refs/schema/vector-index)|[HNSW / FLAT](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/#create-a-vector-field)|[HNSW not configurable](https://cookbook.chromadb.dev/core/concepts/#vector-index-hnsw-index)||[PGA](https://www.pinecone.io/blog/hnsw-not-enough/)||[HNSW / IVFFlat](https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing)|[HNSW for dense](https://qdrant.tech/documentation/concepts/indexing/#vector-index)|

[In Memory: FLAT / IVF_FLAT / IVF_SQ8 / IVF_PQ / HNSW / SCANN](https://milvus.io/docs/index.md)

[On Disk: DiskANN](https://milvus.io/docs/disk_index.md)

[GPU: GPU_CAGRA / GPU_IVF_FLAT / GPU_IVF_PQ / GPU_BRUTE_FORCE](https://milvus.io/docs/gpu_index.md)

| + +Footnotes: - HNSW = Hierarchical Navigable Small World (HNSW performs an [approximate nearest neighbor (ANN)](https://learn.microsoft.com/en-us/azure/search/vector-search-overview#approximate-nearest-neighbors) search) - KNN = k-nearest neighbors (performs a brute-force search that scans the entire vector space) - IVFFlat = Inverted File with Flat Compression (This index type uses approximate nearest neighbor search (ANNS) to provide fast searches) - Weaviate Dynamic = Starts as flat and switches to HNSW if the number of objects exceed a limit - PGA = [Pinecone Graph Algorithm](https://www.pinecone.io/blog/hnsw-not-enough/) +### Vector Store Cross Store support - Search and filtering + +|Feature|Azure AI Search|Weaviate|Redis|Chroma|FAISS|Pinecone|LLamaIndex|PostgreSql|Qdrant|Milvus| +|-|-|-|-|-|-|-|-|-|-|-| +|Index allows text search|Y|Y|Y|Y (On Metadata by default)||[Only in combination with Vector](https://docs.pinecone.io/guides/data/understanding-hybrid-search)||Y (with TSVECTOR field)|Y|Y| +|Text search query format|[Simple or Full Lucene](https://learn.microsoft.com/en-us/azure/search/search-query-create?tabs=portal-text-query#choose-a-query-type-simple--full)|[wildcard](https://weaviate.io/developers/weaviate/search/filters#filter-text-on-partial-matches)|wildcard & fuzzy|[contains & not contains](https://docs.trychroma.com/guides#filtering-by-document-contents)||Text only||[wildcard & binary operators](https://www.postgresql.org/docs/16/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES)|[Text only](https://qdrant.tech/documentation/concepts/filtering/#full-text-match)|[wildcard](https://milvus.io/docs/single-vector-search.md#Filtered-search)| +|Multi Field Vector Search Support|Y|[N](https://weaviate.io/developers/weaviate/search/similarity)||N (no multi vector support)||N||[Unclear due to order by syntax](https://github.com/pgvector/pgvector?tab=readme-ov-file#querying)|[N](https://qdrant.tech/documentation/concepts/search/)|[Y](https://milvus.io/api-reference/restful/v2.4.x/v2/Vector%20(v2)/Hybrid%20Search.md)| +|Targeted Multi Field Text Search Support|Y|[Y](https://weaviate.io/developers/weaviate/search/hybrid#set-weights-on-property-values)|[Y](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/query_syntax/#field-modifiers)|N (only on document)||N||Y|Y|Y| +|Vector per Vector Field for Search|Y|N/A||N/A|||N/A||N/A|N/A|[Y](https://milvus.io/docs/multi-vector-search.md#Step-1-Create-Multiple-AnnSearchRequest-Instances)| +|Separate text search query from vectors|Y|[Y](https://weaviate.io/developers/weaviate/search/hybrid#specify-a-search-vector)|Y|Y||Y||Y|Y|[Y](https://milvus.io/api-reference/restful/v2.4.x/v2/Vector%20(v2)/Hybrid%20Search.md)| +|Allows filtering|Y|Y|Y (on TAG)|Y (On Metadata by default)||[Y](https://docs.pinecone.io/guides/indexes/configure-pod-based-indexes#selective-metadata-indexing)||Y|Y|Y| +|Allows filter grouping|Y (Odata)|[Y](https://weaviate.io/developers/weaviate/search/filters#nested-filters)||[Y](https://docs.trychroma.com/guides#using-logical-operators)||Y||Y|[Y](https://qdrant.tech/documentation/concepts/filtering/#clauses-combination)|[Y](https://milvus.io/docs/get-and-scalar-query.md#Use-Basic-Operators)| +|Allows scalar index field setup|Y|Y|Y|N||Y||Y|Y|Y| +|Requires scalar index field setup to filter|Y|Y|Y|N||N (on by default for all)||N|N|N (can filter without index)| + ### Support for different mappers Mapping between data models and the storage models can also require custom logic depending on the type of data model and storage model involved. @@ -337,23 +356,23 @@ public record HotelInfo( Here is what the configuration objects would look like. ```cs -abstract class Field(string fieldName); +abstract class MemoryRecordProperty(string propertyName); -sealed class KeyField(string fieldName): Field(fieldName) +sealed class MemoryRecordKeyProperty(string propertyName): Field(propertyName) { } -sealed class DataField(string fieldName): Field(fieldName) +sealed class MemoryRecordDataProperty(string propertyName): Field(propertyName) { bool HasEmbedding; string EmbeddingPropertyName; } -sealed class VectorField(string fieldName): Field(fieldName) +sealed class MemoryRecordVectorProperty(string propertyName): Field(propertyName) { } sealed class MemoryRecordDefinition { - IReadOnlyList Fields; + IReadOnlyList Properties; } ``` @@ -832,6 +851,114 @@ Here, the purpose is not to provide generic database access to all databases tha concern with using a term such as VectorDB is that it opens up the scope of the feature set to include anything that stores a vector, without constraining it to any specific purpose. +## Usage Examples + +Common Code across all examples + +```cs +class CacheEntryModel(string prompt, string result, ReadOnlyMemory promptEmbedding); + +class SemanticTextMemory(IMemoryRecordService recordService, IMemoryCollectionService collectionService, ITextEmbeddingGenerationService embeddingGenerator): ISemanticTextMemory; + +class CacheSetFunctionFilter(ISemanticTextMemory memory); // Saves results to cache. +class CacheGetPromptFilter(ISemanticTextMemory memory); // Check cache for entries. + +var builder = Kernel.CreateBuilder(); +``` + +### DI Framework: Named Instances + +Similar to HttpClient, register implementations using names, that can only be constructed again +using a specific factory implementation. + +```cs +builder + .AddAzureOpenAITextEmbeddingGeneration(textEmbeddingDeploymentName, azureAIEndpoint, apiKey) + + // Collection Registration: + // Variant 1: Register just create. + .AddNamedAzureAISearchCollectionCreate(name: "CacheCreate", azureAISearchEndpoint, apiKey, createConfiguration) // Config + .AddNamedAzureAISearchCollectionCreate(name: "CacheCreate", sp => new CacheCreate(...)); // Custom implementation + // Create combined collection management that references the previously registered create instance. + .AddNamedAzureAISearchCollectionService(name: "Cache", azureAISearchEndpoint, apiKey, createName: "CacheCreate") + + // Variant 2: Register collection service in one line with config or custom create implementation. + .AddNamedAzureAISearchCollectionService(name: "Cache", azureAISearchEndpoint, apiKey, createConfiguration) // Config + .AddNamedAzureAISearchCollectionService(name: "Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)) // Custom implementation + + // Record Registration with variants 1 and 2: + // Add record services. + .AddAzureAISearchRecordService(name: "Cache", azureAISearchEndpoint, apiKey) + + // Variant 3: Register collection and record service in one line with config or custom create implementation. + // Does all of the preious variants in one line. + .AddAzureAISearchStorageServices(name: "Cache", azureAISearchEndpoint, apiKey, createConfiguration) // Config + .AddAzureAISearchStorageServices(name: "Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)) // Custom implementation + + // Add semantic text memory referencing collection and record services. + // This would register ISemanticTextMemory in the services container. + .AddSemanticTextMemory(collectionServiceName: "Cache", recordServiceName: "Cache"); + +// Add filter to retrieve items from cache and one to add items to cache. +// Since these filters depend on ISemanticTextMemory and that is already registered, it should get matched automatically. +builder.Services.AddTransient(); +builder.Services.AddTransient(); + +var kernel = + .Build(); + +var memoryFactory = kernel.Services.GetRequiredService(); +var cacheCollectionService = memoryFactory.CreateCollectionService(name: "Cache"); +var cacheRecordService = memoryFactory.CreateRecordService(name: "Cache"); +``` + +### DI Framework: Registration based on consumer type. + +Similar to `AddHttpClient`, this approach will register a specific implementation of +the storage implementations, for a provided consumer type. + +```cs +builder + .AddAzureOpenAITextEmbeddingGeneration(textEmbeddingDeploymentName, azureAIEndpoint, apiKey) + + // Collection and record registration with config or custom create implementation. + // This will register both IMemoryCollectionService and IMemoryRecordService and tie it to usage with SemanticTextMemory. + .AddAzureAISearchStorage>(azureAISearchEndpoint, apiKey, createConfiguration) // Config + .AddAzureAISearchStorage>(azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)); // Custom implementation + +// Add Semantic Cache Memory for the cache entry model. +builder.Services.AddTransient, SemanticTextMemory>(); + +// Add filter to retrieve items from cache and one to add items to cache. +// Since these filters depend on ISemanticTextMemory and that is already registered, it should get matched automatically. +builder.Services.AddTransient(); +builder.Services.AddTransient(); +``` + +### DI Framework: .net 8 Keyed Services + +```cs +builder + .AddAzureOpenAITextEmbeddingGeneration(textEmbeddingDeploymentName, azureAIEndpoint, apiKey) + + // Collection and record registration with config or custom create implementation. + .AddAzureAISearchStorageKeyedTransient("Cache", azureAISearchEndpoint, apiKey, createConfiguration) + .AddAzureAISearchStorageKeyedTransient("Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)); + +// Add Semantic Cache Memory for the cache entry model. +builder.Services.AddTransient>(sp => { + return new SemanticTextMemory( + sp.GetKeyedService>("Cache"), + sp.GetKeyedService("Cache"), + sp.GetRequiredService()); +}); + +// Add filter to retrieve items from cache and one to add items to cache. +// Since these filters depend on ISemanticTextMemory and that is already registered, it should get matched automatically. +builder.Services.AddTransient(); +builder.Services.AddTransient(); +``` + ## Roadmap ### Record Management diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordMapperType.cs new file mode 100644 index 000000000000..9438375dcbdd --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordMapperType.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json.Nodes; + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// The types of mapper supported by . +/// +public enum AzureAISearchMemoryRecordMapperType +{ + /// + /// Use the default mapper that is provided by the Azure AI Search client SDK. + /// + Default, + + /// + /// Use a custom mapper between and the data model. + /// + JsonObjectCustomMapper +} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordService.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordService.cs new file mode 100644 index 000000000000..637128e8a891 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordService.cs @@ -0,0 +1,391 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Text.Json.Nodes; +using System.Threading; +using System.Threading.Tasks; +using Azure; +using Azure.Search.Documents; +using Azure.Search.Documents.Indexes; +using Azure.Search.Documents.Models; +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// Service for storing and retrieving memory records, that uses Azure AI Search as the underlying storage. +/// +/// The data model to use for adding, updating and retrieving data from storage. +public sealed class AzureAISearchMemoryRecordService : IMemoryRecordService + where TDataModel : class +{ + /// A set of types that a key on the provided model may have. + private static readonly HashSet s_supportedKeyTypes = + [ + typeof(string) + ]; + + /// A set of types that vectors on the provided model may have. + /// + /// Azure AI Search is adding support for more types than just float32, but these are not available for use via the + /// SDK yet. We will update this list as the SDK is updated. + /// + /// + private static readonly HashSet s_supportedVectorTypes = + [ + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory?) + ]; + + /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. + private readonly SearchIndexClient _searchIndexClient; + + /// The name of the key field for the collections that this class is used with. + private readonly string _keyPropertyName; + + /// Azure AI Search clients that can be used to manage data in an Azure AI Search Service index. + private readonly ConcurrentDictionary _searchClientsByIndex = new(); + + /// Optional configuration options for this class. + private readonly AzureAISearchMemoryRecordServiceOptions _options; + + /// The names of all non vector fields on the current model. + private readonly List _nonVectorPropertyNames; + + /// + /// Initializes a new instance of the class. + /// + /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. + /// Optional configuration options for this class. + /// Thrown when is null. + /// Thrown when options are misconfigured. + public AzureAISearchMemoryRecordService(SearchIndexClient searchIndexClient, AzureAISearchMemoryRecordServiceOptions? options = default) + { + // Verify. + Verify.NotNull(searchIndexClient); + + // Assign. + this._searchIndexClient = searchIndexClient; + this._options = options ?? new AzureAISearchMemoryRecordServiceOptions(); + + // Verify custom mapper. + if (this._options.MapperType == AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper && this._options.JsonObjectCustomMapper is null) + { + throw new ArgumentException($"The {nameof(AzureAISearchMemoryRecordServiceOptions.JsonObjectCustomMapper)} option needs to be set if a {nameof(AzureAISearchMemoryRecordServiceOptions.MapperType)} of {nameof(AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper)} has been chosen.", nameof(options)); + } + + // Enumerate public properties using configuration or attributes. + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; + if (this._options.MemoryRecordDefinition is not null) + { + properties = MemoryServiceModelPropertyReader.FindProperties(typeof(TDataModel), this._options.MemoryRecordDefinition, supportsMultipleVectors: true); + } + else + { + properties = MemoryServiceModelPropertyReader.FindProperties(typeof(TDataModel), supportsMultipleVectors: true); + } + + // Validate property types and store for later use. + MemoryServiceModelPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + MemoryServiceModelPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); + this._keyPropertyName = properties.keyProperty.Name; + + // Build the list of property names from the current model that are either key or data fields. + this._nonVectorPropertyNames = properties.dataProperties.Concat([properties.keyProperty]).Select(x => x.Name).ToList(); + } + + /// + public Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNullOrWhiteSpace(key); + + // Create Options. + var innerOptions = this.ConvertGetDocumentOptions(options); + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Get record. + var searchClient = this.GetSearchClient(collectionName); + return this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, innerOptions, cancellationToken); + } + + /// + public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, Memory.GetRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + // Create Options + var innerOptions = this.ConvertGetDocumentOptions(options); + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Get records in parallel. + var searchClient = this.GetSearchClient(collectionName); + var tasks = keys.Select(key => this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, innerOptions, cancellationToken)); + var results = await Task.WhenAll(tasks).ConfigureAwait(false); + foreach (var result in results) { yield return result; } + } + + /// + public Task DeleteAsync(string key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNullOrWhiteSpace(key); + + // Create options. + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Remove record. + var searchClient = this.GetSearchClient(collectionName); + return RunOperationAsync( + () => searchClient.DeleteDocumentsAsync(this._keyPropertyName, [key], new IndexDocumentsOptions(), cancellationToken), + collectionName, + "DeleteDocuments"); + } + + /// + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + // Create options. + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Remove records. + var searchClient = this.GetSearchClient(collectionName); + return RunOperationAsync( + () => searchClient.DeleteDocumentsAsync(this._keyPropertyName, keys, new IndexDocumentsOptions(), cancellationToken), + collectionName, + "DeleteDocuments"); + } + + /// + public async Task UpsertAsync(TDataModel record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(record); + + // Create options. + var collectionName = this.ChooseCollectionName(options?.CollectionName); + var innerOptions = new IndexDocumentsOptions { ThrowOnAnyError = true }; + + // Upsert record. + var searchClient = this.GetSearchClient(collectionName); + var results = await this.MapToStorageModelAndUploadDocumentAsync(searchClient, collectionName, [record], innerOptions, cancellationToken).ConfigureAwait(false); + return results.Value.Results[0].Key; + } + + /// + public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(records); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + var innerOptions = new IndexDocumentsOptions { ThrowOnAnyError = true }; + + // Upsert records + var searchClient = this.GetSearchClient(collectionName); + var results = await this.MapToStorageModelAndUploadDocumentAsync(searchClient, collectionName, records, innerOptions, cancellationToken).ConfigureAwait(false); + + // Get results + var resultKeys = results.Value.Results.Select(x => x.Key).ToList(); + foreach (var resultKey in resultKeys) { yield return resultKey; } + } + + /// + /// Get the document with the given key and map it to the data model using the configured mapper type. + /// + /// The search client to use when fetching the document. + /// The name of the collection to retrieve the record from. + /// The key of the record to get. + /// The azure ai search sdk options for getting a document. + /// The to monitor for cancellation requests. The default is . + /// The retrieved document, mapped to the consumer data model. + private async Task GetDocumentAndMapToDataModelAsync( + SearchClient searchClient, + string collectionName, + string key, + GetDocumentOptions innerOptions, + CancellationToken cancellationToken) + { + // Use the user provided mapper. + if (this._options.MapperType == AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper) + { + var jsonObject = await RunOperationAsync( + () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken), + collectionName, + "GetDocument").ConfigureAwait(false); + + return RunModelConversion( + () => this._options.JsonObjectCustomMapper!.MapFromStorageToDataModel(jsonObject), + collectionName, + "GetDocument"); + } + + // Use the built in Azure AI Search mapper. + return await RunOperationAsync( + () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken), + collectionName, + "GetDocument").ConfigureAwait(false); + } + + /// + /// Map the data model to the storage model and upload the document. + /// + /// The search client to use when uploading the document. + /// The name of the collection to upsert the records to. + /// The records to upload. + /// The azure ai search sdk options for uploading a document. + /// The to monitor for cancellation requests. The default is . + /// The document upload result. + private Task> MapToStorageModelAndUploadDocumentAsync( + SearchClient searchClient, + string collectionName, + IEnumerable records, + IndexDocumentsOptions innerOptions, + CancellationToken cancellationToken) + { + // Use the user provided mapper. + if (this._options.MapperType == AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper) + { + var jsonObjects = RunModelConversion( + () => records.Select(this._options.JsonObjectCustomMapper!.MapFromDataToStorageModel), + collectionName, + "UploadDocuments"); + + return RunOperationAsync( + () => searchClient.UploadDocumentsAsync(jsonObjects, innerOptions, cancellationToken), + collectionName, + "UploadDocuments"); + } + + // Use the built in Azure AI Search mapper. + return RunOperationAsync( + () => searchClient.UploadDocumentsAsync(records, innerOptions, cancellationToken), + collectionName, + "UploadDocuments"); + } + + /// + /// Get a search client for the index specified. + /// Note: the index might not exist, but we avoid checking everytime and the extra latency. + /// + /// Index name + /// Search client ready to read/write + private SearchClient GetSearchClient(string indexName) + { + // Check the local cache first, if not found create a new one. + if (!this._searchClientsByIndex.TryGetValue(indexName, out SearchClient? client)) + { + client = this._searchIndexClient.GetSearchClient(indexName); + this._searchClientsByIndex[indexName] = client; + } + + return client; + } + + /// + /// Choose the right collection name to use for the operation by using the one provided + /// as part of the operation options, or the default one provided at construction time. + /// + /// The collection name provided on the operation options. + /// The collection name to use. + private string ChooseCollectionName(string? operationCollectionName) + { + var collectionName = operationCollectionName ?? this._options.DefaultCollectionName; + if (collectionName is null) + { +#pragma warning disable CA2208 // Instantiate argument exceptions correctly + throw new ArgumentException("Collection name must be provided in the operation options, since no default was provided at construction time.", "options"); +#pragma warning restore CA2208 // Instantiate argument exceptions correctly + } + + return collectionName; + } + + /// + /// Convert the public options model to the azure ai search options model. + /// + /// The public options model. + /// The azure ai search options model. + private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options) + { + var innerOptions = new GetDocumentOptions(); + if (options?.IncludeVectors is false) + { + innerOptions.SelectedFields.AddRange(this._nonVectorPropertyNames); + } + + return innerOptions; + } + + /// + /// Run the given operation and wrap any with ."/> + /// + /// The response type of the operation. + /// The operation to run. + /// The name of the collection the operation is being run on. + /// The type of database operation being run. + /// The result of the operation. + private static async Task RunOperationAsync(Func> operation, string collectionName, string operationName) + { + try + { + return await operation.Invoke().ConfigureAwait(false); + } + catch (AggregateException ex) when (ex.InnerException is RequestFailedException innerEx) + { + var wrapperException = new MemoryServiceCommandExecutionException("Call to memory service failed.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", "AzureAISearch"); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + catch (RequestFailedException ex) + { + var wrapperException = new MemoryServiceCommandExecutionException("Call to memory service failed.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", "AzureAISearch"); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + } + + /// + /// Run the given model conversion and wrap any exceptions with . + /// + /// The response type of the operation. + /// The operation to run. + /// The name of the collection the operation is being run on. + /// The type of database operation being run. + /// The result of the operation. + private static T RunModelConversion(Func operation, string collectionName, string operationName) + { + try + { + return operation.Invoke(); + } + catch (Exception ex) + { + var wrapperException = new MemoryDataModelMappingException("Failed to convert memory data model.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", "AzureAISearch"); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordServiceOptions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordServiceOptions.cs new file mode 100644 index 000000000000..f130f19ffc84 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordServiceOptions.cs @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json.Nodes; +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// Options when creating a . +/// +public sealed class AzureAISearchMemoryRecordServiceOptions + where TDataModel : class +{ + /// + /// Gets or sets the default collection name to use. + /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. + /// + public string? DefaultCollectionName { get; init; } = null; + + /// + /// Gets or sets the choice of mapper to use when converting between the data model and the azure ai search record. + /// + public AzureAISearchMemoryRecordMapperType MapperType { get; init; } = AzureAISearchMemoryRecordMapperType.Default; + + /// + /// Gets or sets an optional custom mapper to use when converting between the data model and the azure ai search record. + /// + /// + /// Set to to use this mapper."/> + /// + public IMemoryRecordMapper? JsonObjectCustomMapper { get; init; } = null; + + /// + /// Gets or sets an optional memory record definition that defines the schema of the memory record type. + /// + /// + /// If not provided, the schema will be inferred from the data model using reflection. + /// In this case, the data model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public MemoryRecordDefinition? MemoryRecordDefinition { get; init; } = null; +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryCollectionFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryCollectionFixture.cs new file mode 100644 index 000000000000..d64a6ae51b95 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryCollectionFixture.cs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; + +[CollectionDefinition("AzureAISearchMemoryCollection")] +public class AzureAISearchMemoryCollectionFixture : ICollectionFixture +{ +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryFixture.cs new file mode 100644 index 000000000000..723e88ef347a --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryFixture.cs @@ -0,0 +1,277 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using Azure; +using Azure.Search.Documents; +using Azure.Search.Documents.Indexes; +using Azure.Search.Documents.Indexes.Models; +using Azure.Search.Documents.Models; +using Microsoft.Extensions.Configuration; +using Microsoft.SemanticKernel.Memory; +using SemanticKernel.IntegrationTests.TestSettings.Memory; +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; + +/// +/// Helper class for setting up and tearing down Azure AI Search indexes for testing purposes. +/// +public class AzureAISearchMemoryFixture : IAsyncLifetime +{ + /// + /// Test index name which consists out of "hotels-" and the machine name with any non-alphanumeric characters removed. + /// +#pragma warning disable CA1308 // Normalize strings to uppercase + private readonly string _testIndexName = "hotels-" + new Regex("[^a-zA-Z0-9]").Replace(Environment.MachineName.ToLowerInvariant(), ""); +#pragma warning restore CA1308 // Normalize strings to uppercase + + /// + /// Test Configuration setup. + /// + private readonly IConfigurationRoot _configuration = new ConfigurationBuilder() + .AddJsonFile(path: "testsettings.json", optional: false, reloadOnChange: true) + .AddJsonFile(path: "testsettings.development.json", optional: true, reloadOnChange: true) + .AddEnvironmentVariables() + .AddUserSecrets() + .Build(); + + /// + /// Initializes a new instance of the class. + /// + public AzureAISearchMemoryFixture() + { + var config = this._configuration.GetRequiredSection("AzureAISearch").Get(); + Assert.NotNull(config); + this.Config = config; + this.SearchIndexClient = new SearchIndexClient(new Uri(config.ServiceUrl), new AzureKeyCredential(config.ApiKey)); + this.MemoryRecordDefinition = new MemoryRecordDefinition + { + Properties = new List + { + new MemoryRecordKeyProperty("HotelId"), + new MemoryRecordDataProperty("HotelName"), + new MemoryRecordDataProperty("Description"), + new MemoryRecordVectorProperty("DescriptionEmbedding"), + new MemoryRecordDataProperty("Tags"), + new MemoryRecordDataProperty("ParkingIncluded"), + new MemoryRecordDataProperty("LastRenovationDate"), + new MemoryRecordDataProperty("Rating"), + new MemoryRecordDataProperty("Address") + } + }; + } + + /// + /// Gets the Search Index Client to use for connecting to the Azure AI Search service. + /// + public SearchIndexClient SearchIndexClient { get; private set; } + + /// + /// Gets the name of the index that this fixture sets up and tears down. + /// + public string TestIndexName { get => this._testIndexName; } + + /// + /// Gets the manually created memory record definition for our test model. + /// + public MemoryRecordDefinition MemoryRecordDefinition { get; private set; } + + /// + /// Gets the configuration for the Azure AI Search service. + /// + public AzureAISearchConfiguration Config { get; private set; } + + /// + /// Create / Recreate index and upload documents before test run. + /// + /// An async task. + public async Task InitializeAsync() + { + await AzureAISearchMemoryFixture.DeleteIndexIfExistsAsync(this._testIndexName, this.SearchIndexClient); + await AzureAISearchMemoryFixture.CreateIndexAsync(this._testIndexName, this.SearchIndexClient); + AzureAISearchMemoryFixture.UploadDocuments(this.SearchIndexClient.GetSearchClient(this._testIndexName)); + } + + /// + /// Delete the index after the test run. + /// + /// An async task. + public async Task DisposeAsync() + { + await AzureAISearchMemoryFixture.DeleteIndexIfExistsAsync(this._testIndexName, this.SearchIndexClient); + } + + /// + /// Delete the index if it exists. + /// + /// The name of the index to delete. + /// The search index client to use for deleting the index. + /// An async task. + public static async Task DeleteIndexIfExistsAsync(string indexName, SearchIndexClient adminClient) + { + adminClient.GetIndexNames(); + { + await adminClient.DeleteIndexAsync(indexName); + } + } + + /// + /// Create an index with the given name. + /// + /// The name of the index to create. + /// The search index client to use for creating the index. + /// An async task. + public static async Task CreateIndexAsync(string indexName, SearchIndexClient adminClient) + { + FieldBuilder fieldBuilder = new(); + var searchFields = fieldBuilder.Build(typeof(Hotel)); + var embeddingfield = searchFields.First(x => x.Name == "DescriptionEmbedding"); + searchFields.Remove(embeddingfield); + searchFields.Add(new VectorSearchField("DescriptionEmbedding", 4, "my-vector-profile")); + + var definition = new SearchIndex(indexName, searchFields); + definition.VectorSearch = new VectorSearch(); + definition.VectorSearch.Algorithms.Add(new HnswAlgorithmConfiguration("my-hnsw-vector-config-1") { Parameters = new HnswParameters { Metric = VectorSearchAlgorithmMetric.Cosine } }); + definition.VectorSearch.Profiles.Add(new VectorSearchProfile("my-vector-profile", "my-hnsw-vector-config-1")); + + var suggester = new SearchSuggester("sg", new[] { "HotelName", "Address/City" }); + definition.Suggesters.Add(suggester); + + await adminClient.CreateOrUpdateIndexAsync(definition); + } + + /// + /// Upload test documents to the index. + /// + /// The client to use for uploading the documents. + public static void UploadDocuments(SearchClient searchClient) + { + IndexDocumentsBatch batch = IndexDocumentsBatch.Create( + IndexDocumentsAction.Upload( + new Hotel() + { + HotelId = "BaseSet-1", + HotelName = "Hotel 1", + Description = "This is a great hotel", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + Tags = new[] { "pool", "air conditioning", "concierge" }, + ParkingIncluded = false, + LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), + Rating = 3.6, + Address = new Address() + { + City = "New York", + Country = "USA" + } + }), + IndexDocumentsAction.Upload( + new Hotel() + { + HotelId = "BaseSet-2", + HotelName = "Hotel 2", + Description = "This is a great hotel", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + Tags = new[] { "pool", "free wifi", "concierge" }, + ParkingIncluded = false, + LastRenovationDate = new DateTimeOffset(1979, 2, 18, 0, 0, 0, TimeSpan.Zero), + Rating = 3.60, + Address = new Address() + { + City = "Sarasota", + Country = "USA" + } + }), + IndexDocumentsAction.Upload( + new Hotel() + { + HotelId = "BaseSet-3", + HotelName = "Hotel 3", + Description = "This is a great hotel", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + Tags = new[] { "air conditioning", "bar", "continental breakfast" }, + ParkingIncluded = true, + LastRenovationDate = new DateTimeOffset(2015, 9, 20, 0, 0, 0, TimeSpan.Zero), + Rating = 4.80, + Address = new Address() + { + City = "Atlanta", + Country = "USA" + } + }), + IndexDocumentsAction.Upload( + new Hotel() + { + HotelId = "BaseSet-4", + HotelName = "Hotel 4", + Description = "This is a great hotel", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + Tags = new[] { "concierge", "view", "24-hour front desk service" }, + ParkingIncluded = true, + LastRenovationDate = new DateTimeOffset(1960, 2, 06, 0, 0, 0, TimeSpan.Zero), + Rating = 4.60, + Address = new Address() + { + City = "San Antonio", + Country = "USA" + } + }) + ); + + searchClient.IndexDocuments(batch); + } + +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + public class Hotel + { + [SimpleField(IsKey = true, IsFilterable = true)] + [MemoryRecordKey] + public string HotelId { get; set; } + + [SearchableField(IsSortable = true)] + [MemoryRecordData] + public string HotelName { get; set; } + + [SearchableField(AnalyzerName = LexicalAnalyzerName.Values.EnLucene)] + [MemoryRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + public string Description { get; set; } + + [MemoryRecordVector] + public ReadOnlyMemory? DescriptionEmbedding { get; set; } + + [SearchableField(IsFilterable = true, IsFacetable = true)] + [MemoryRecordData] +#pragma warning disable CA1819 // Properties should not return arrays + public string[] Tags { get; set; } +#pragma warning restore CA1819 // Properties should not return arrays + + [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] + [MemoryRecordData] + public bool? ParkingIncluded { get; set; } + + [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] + [MemoryRecordData] + public DateTimeOffset? LastRenovationDate { get; set; } + + [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] + [MemoryRecordData] + public double? Rating { get; set; } + + [SearchableField] + [MemoryRecordData] + public Address Address { get; set; } + } + + public record Address + { + [SearchableField(IsFilterable = true, IsSortable = true, IsFacetable = true)] + public string City { get; set; } + + [SearchableField(IsFilterable = true, IsSortable = true, IsFacetable = true)] + public string Country { get; set; } + } +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryRecordServiceTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryRecordServiceTests.cs new file mode 100644 index 000000000000..8f7edf46e2d9 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryRecordServiceTests.cs @@ -0,0 +1,300 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using System.Threading.Tasks; +using Azure.Search.Documents.Indexes; +using Azure; +using Microsoft.SemanticKernel.Connectors.AzureAISearch; +using Microsoft.SemanticKernel.Memory; +using Xunit; +using Xunit.Abstractions; +using static SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch.AzureAISearchMemoryFixture; +using System.Text.Json.Nodes; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; + +/// +/// Integration tests for class. +/// Tests work with Azure AI Search Instance. +/// +[Collection("AzureAISearchMemoryCollection")] +public sealed class AzureAISearchMemoryRecordServiceTests(ITestOutputHelper output, AzureAISearchMemoryFixture fixture) : IClassFixture +{ + // If null, all tests will be enabled + private const string SkipReason = null; //"Requires Azure AI Search Service instance up and running"; + + [Theory(Skip = SkipReason)] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanUpsertDocumentToMemoryStoreAsync(bool useRecordDefinition) + { + // Arrange + var options = new AzureAISearchMemoryRecordServiceOptions + { + DefaultCollectionName = fixture.TestIndexName, + MemoryRecordDefinition = useRecordDefinition ? fixture.MemoryRecordDefinition : null + }; + var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + + // Act + var hotel = new Hotel() + { + HotelId = "Upsert-1", + HotelName = "MyHotel1", + Description = "My Hotel is great.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + Tags = new[] { "pool", "air conditioning", "concierge" }, + ParkingIncluded = true, + LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), + Rating = 3.6, + Address = new Address() + { + City = "New York", + Country = "USA" + } + }; + var upsertResult = await sut.UpsertAsync(hotel); + var getResult = await sut.GetAsync("Upsert-1"); + + // Assert + Assert.NotNull(upsertResult); + Assert.Equal("Upsert-1", upsertResult); + + Assert.NotNull(getResult); + Assert.Equal(hotel.HotelName, getResult.HotelName); + Assert.Equal(hotel.Description, getResult.Description); + Assert.NotNull(getResult.DescriptionEmbedding); + Assert.Equal(hotel.DescriptionEmbedding.Value, getResult.DescriptionEmbedding.Value); + Assert.Equal(hotel.Tags, getResult.Tags); + Assert.Equal(hotel.ParkingIncluded, getResult.ParkingIncluded); + Assert.Equal(hotel.LastRenovationDate, getResult.LastRenovationDate); + Assert.Equal(hotel.Rating, getResult.Rating); + Assert.Equal(hotel.Address.City, getResult.Address.City); + Assert.Equal(hotel.Address.Country, getResult.Address.Country); + + // Output + output.WriteLine(upsertResult); + output.WriteLine(getResult.ToString()); + } + + [Fact(Skip = SkipReason)] + public async Task ItCanUpsertManyDocumentsToMemoryStoreAsync() + { + // Arrange + var options = new AzureAISearchMemoryRecordServiceOptions + { + DefaultCollectionName = fixture.TestIndexName + }; + var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + + // Act + var results = sut.UpsertBatchAsync( + [ + CreateTestHotel("UpsertMany-1"), + CreateTestHotel("UpsertMany-2"), + CreateTestHotel("UpsertMany-3"), + ]); + + // Assert + Assert.NotNull(results); + var resultsList = await results.ToListAsync(); + + Assert.Equal(3, resultsList.Count); + Assert.Contains("UpsertMany-1", resultsList); + Assert.Contains("UpsertMany-2", resultsList); + Assert.Contains("UpsertMany-3", resultsList); + + // Output + foreach (var result in resultsList) + { + output.WriteLine(result); + } + } + + [Theory(Skip = SkipReason)] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task ItCanGetDocumentFromMemoryStoreAsync(bool includeVectors, bool useRecordDefinition) + { + // Arrange + var options = new AzureAISearchMemoryRecordServiceOptions + { + DefaultCollectionName = fixture.TestIndexName, + MemoryRecordDefinition = useRecordDefinition ? fixture.MemoryRecordDefinition : null + }; + var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + + // Act + var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); + + // Assert + Assert.NotNull(getResult); + + Assert.Equal("Hotel 1", getResult.HotelName); + Assert.Equal("This is a great hotel", getResult.Description); + Assert.Equal(includeVectors, getResult.DescriptionEmbedding != null); + if (includeVectors) + { + Assert.Equal(new[] { 30f, 31f, 32f, 33f }, getResult.DescriptionEmbedding!.Value.ToArray()); + } + Assert.Equal(new[] { "pool", "air conditioning", "concierge" }, getResult.Tags); + Assert.False(getResult.ParkingIncluded); + Assert.Equal(new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), getResult.LastRenovationDate); + Assert.Equal(3.6, getResult.Rating); + Assert.Equal("New York", getResult.Address.City); + Assert.Equal("USA", getResult.Address.Country); + + // Output + output.WriteLine(getResult.ToString()); + } + + [Fact(Skip = SkipReason)] + public async Task ItCanGetManyDocumentsFromMemoryStoreAsync() + { + // Arrange + var options = new AzureAISearchMemoryRecordServiceOptions + { + DefaultCollectionName = fixture.TestIndexName + }; + var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + + // Act + var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-2", "BaseSet-3", "BaseSet-4"], new GetRecordOptions { IncludeVectors = true }); + + // Assert + Assert.NotNull(hotels); + var hotelsList = await hotels.ToListAsync(); + Assert.Equal(4, hotelsList.Count); + + // Output + foreach (var hotel in hotelsList) + { + output.WriteLine(hotel.ToString()); + } + } + + [Fact] + public async Task ItThrowsForPartialGetBatchResultAsync() + { + // Arrange. + var options = new AzureAISearchMemoryRecordServiceOptions + { + DefaultCollectionName = fixture.TestIndexName + }; + var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + + // Act. + await Assert.ThrowsAsync(async () => await sut.GetBatchAsync(["BaseSet-1", "BaseSet-5", "BaseSet-2"]).ToListAsync()); + } + + [Theory(Skip = SkipReason)] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanRemoveDocumentFromMemoryStoreAsync(bool useRecordDefinition) + { + // Arrange + var options = new AzureAISearchMemoryRecordServiceOptions + { + DefaultCollectionName = fixture.TestIndexName, + MemoryRecordDefinition = useRecordDefinition ? fixture.MemoryRecordDefinition : null + }; + var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + await sut.UpsertAsync(CreateTestHotel("Remove-1")); + + // Act + await sut.DeleteAsync("Remove-1"); + + // Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync("Remove-1", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact(Skip = SkipReason)] + public async Task ItCanRemoveManyDocumentsFromMemoryStoreAsync() + { + // Arrange + var options = new AzureAISearchMemoryRecordServiceOptions + { + DefaultCollectionName = fixture.TestIndexName + }; + var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + await sut.UpsertAsync(CreateTestHotel("RemoveMany-1")); + await sut.UpsertAsync(CreateTestHotel("RemoveMany-2")); + await sut.UpsertAsync(CreateTestHotel("RemoveMany-3")); + + // Act + await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3"]); + + // Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact(Skip = SkipReason)] + public async Task ItThrowsCommandExecutionExceptionForFailedConnectionAsync() + { + // Arrange + var options = new AzureAISearchMemoryRecordServiceOptions { DefaultCollectionName = fixture.TestIndexName }; + var searchIndexClient = new SearchIndexClient(new Uri("https://localhost:12345"), new AzureKeyCredential("12345")); + var sut = new AzureAISearchMemoryRecordService(searchIndexClient, options); + + // Act & Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact(Skip = SkipReason)] + public async Task ItThrowsCommandExecutionExceptionForFailedAuthenticationAsync() + { + // Arrange + var options = new AzureAISearchMemoryRecordServiceOptions { DefaultCollectionName = fixture.TestIndexName }; + var searchIndexClient = new SearchIndexClient(new Uri(fixture.Config.ServiceUrl), new AzureKeyCredential("12345")); + var sut = new AzureAISearchMemoryRecordService(searchIndexClient, options); + + // Act & Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact(Skip = SkipReason)] + public async Task ItThrowsMappingExceptionForFailedMapperAsync() + { + // Arrange + var options = new AzureAISearchMemoryRecordServiceOptions { DefaultCollectionName = fixture.TestIndexName, MapperType = AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = new FailingMapper() }; + var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + + // Act & Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); + } + + private static Hotel CreateTestHotel(string hotelId) => new() + { + HotelId = hotelId, + HotelName = $"MyHotel {hotelId}", + Description = "My Hotel is great.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + Tags = new[] { "pool", "air conditioning", "concierge" }, + ParkingIncluded = true, + LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), + Rating = 3.6, + Address = new Address + { + City = "New York", + Country = "USA" + } + }; + + private class FailingMapper : IMemoryRecordMapper + { + public JsonObject MapFromDataToStorageModel(Hotel dataModel) + { + throw new NotImplementedException(); + } + + public Hotel MapFromStorageToDataModel(JsonObject storageModel, GetRecordOptions? options = null) + { + throw new NotImplementedException(); + } + } +} diff --git a/dotnet/src/IntegrationTests/IntegrationTests.csproj b/dotnet/src/IntegrationTests/IntegrationTests.csproj index df5afa473ce7..f80f75d770cf 100644 --- a/dotnet/src/IntegrationTests/IntegrationTests.csproj +++ b/dotnet/src/IntegrationTests/IntegrationTests.csproj @@ -66,6 +66,7 @@ + diff --git a/dotnet/src/IntegrationTests/TestSettings/Memory/AzureAISearchConfiguration.cs b/dotnet/src/IntegrationTests/TestSettings/Memory/AzureAISearchConfiguration.cs new file mode 100644 index 000000000000..fd4043ef9b83 --- /dev/null +++ b/dotnet/src/IntegrationTests/TestSettings/Memory/AzureAISearchConfiguration.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace SemanticKernel.IntegrationTests.TestSettings.Memory; + +[SuppressMessage("Design", "CA1054:URI-like parameters should not be strings", Justification = "This is just for test configuration")] +public sealed class AzureAISearchConfiguration(string serviceUrl, string apiKey) +{ + [SuppressMessage("Design", "CA1056:URI-like properties should not be strings", Justification = "This is just for test configuration")] + public string ServiceUrl { get; set; } = serviceUrl; + + public string ApiKey { get; set; } = apiKey; +} diff --git a/dotnet/src/IntegrationTests/TestSettings/Memory/AzureAISearchSetup.psm1 b/dotnet/src/IntegrationTests/TestSettings/Memory/AzureAISearchSetup.psm1 new file mode 100644 index 000000000000..64563abdeeb0 --- /dev/null +++ b/dotnet/src/IntegrationTests/TestSettings/Memory/AzureAISearchSetup.psm1 @@ -0,0 +1,74 @@ +# Copyright (c) Microsoft. All rights reserved. + +# This module requires powershell 7 and the Az and Az.Search modules. You may need to import Az and install Az.Search. +# Import-Module -Name Az +# Install-Module -Name Az.Search + +# Before running any of the functions you will need to connect to your azure account and pick the appropriate subscription. +# Connect-AzAccount +# Select-AzSubscription -SubscriptionName "My Dev Subscription" + +$resourceGroup = "sk-integration-test-infra" +$aiSearchResourceName = "aisearch-integration-test-basic" + +<# +.SYNOPSIS + Setup the infra required for Azure AI Search Integration tests, + retrieve the connection information for it, and update the secrets + store with these settings. + +.Parameter OverrideResourceGroup + Optional override resource group name if the default doesn't work. + +.Parameter OverrideAISearchResourceName + Optional override ai search resource name if the default doesn't work. +#> +function New-AzureAISearchIntegrationInfra($overrideResourceGroup = $resourceGroup, $overrideAISearchResourceName = $aiSearchResourceName) { + # Create the resource group if it doesn't exist. + Get-AzResourceGroup -Name $overrideResourceGroup -ErrorVariable notPresent -ErrorAction SilentlyContinue + if ($notPresent) { + Write-Host "Resource Group does not exist, creating '$overrideResourceGroup' ..." + New-AzResourceGroup -Name $overrideResourceGroup -Location "North Europe" + } + + # Create the ai search service if it doesn't exist. + $service = Get-AzSearchService -ResourceGroupName $resourceGroup -Name $aiSearchResourceName + if (-not $service) { + Write-Host "Service does not exist, creating '$overrideAISearchResourceName' ..." + New-AzSearchService -ResourceGroupName $overrideResourceGroup -Name $overrideAISearchResourceName -Sku "Basic" -Location "North Europe" -PartitionCount 1 -ReplicaCount 1 -HostingMode Default + } + + # Set the required local secrets. + Set-AzureAISearchIntegrationInfraUserSecrets -OverrideResourceGroup $overrideResourceGroup -OverrideAISearchResourceName $overrideAISearchResourceName +} + +<# +.SYNOPSIS + Set the user secrets required to run the Azure AI Search integration tests. + +.Parameter OverrideResourceGroup + Optional override resource group name if the default doesn't work. + +.Parameter OverrideAISearchResourceName + Optional override ai search resource name if the default doesn't work. +#> +function Set-AzureAISearchIntegrationInfraUserSecrets($overrideResourceGroup = $resourceGroup, $overrideAISearchResourceName = $aiSearchResourceName) { + # Set the required local secrets. + $keys = Get-AzSearchAdminKeyPair -ResourceGroupName $overrideResourceGroup -ServiceName $overrideAISearchResourceName + dotnet user-secrets set "AzureAISearch:ServiceUrl" "https://$overrideAISearchResourceName.search.windows.net" --project ../../IntegrationTests.csproj + dotnet user-secrets set "AzureAISearch:ApiKey" $keys.Primary --project ../../IntegrationTests.csproj +} + +<# +.SYNOPSIS + Tear down the infra required for Azure AI Search Integration tests. + +.Parameter OverrideResourceGroup + Optional override resource group name if the default doesn't work. + +.Parameter OverrideAISearchResourceName + Optional override ai search resource name if the default doesn't work. +#> +function Remove-AzureAISearchIntegrationInfra($overrideResourceGroup = $resourceGroup, $overrideAISearchResourceName = $aiSearchResourceName) { + Remove-AzSearchService -ResourceGroupName $overrideResourceGroup -Name $overrideAISearchResourceName +} \ No newline at end of file diff --git a/dotnet/src/InternalUtilities/src/Schema/VectorStoreModelPropertyReader.cs b/dotnet/src/InternalUtilities/src/Schema/VectorStoreModelPropertyReader.cs new file mode 100644 index 000000000000..2df10d7b6aac --- /dev/null +++ b/dotnet/src/InternalUtilities/src/Schema/VectorStoreModelPropertyReader.cs @@ -0,0 +1,214 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text.Json; +using System.Text.Json.Nodes; +using System.Text.Json.Serialization; +using System.Text.Json.Serialization.Metadata; +using JsonSchemaMapper; +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel; + +/// +/// Contains helpers for reading memory service model properties and their attributes. +/// +internal static class MemoryServiceModelPropertyReader +{ + /// Cache of property enumerations so that we don't incur reflection costs with each invocation. + private static readonly Dictionary dataProperties, List vectorProperties)> s_propertiesCache = new(); + + /// + /// Find the properties with , and attributes + /// and verify that they exist and that we have the expected numbers of each type. + /// Return those properties in separate categories. + /// + /// The data model to find the properties on. + /// A value indicating whether multiple vector properties are supported instead of just one. + /// The categorized properties. + public static (PropertyInfo keyProperty, List dataProperties, List vectorProperties) FindProperties(Type type, bool supportsMultipleVectors) + { + // First check the cache. + if (s_propertiesCache.TryGetValue(type, out var cachedProperties)) + { + return cachedProperties; + } + + PropertyInfo? keyProperty = null; + List dataProperties = new(); + List vectorProperties = new(); + bool singleVectorPropertyFound = false; + + foreach (var property in type.GetProperties()) + { + // Get Key property. + if (property.GetCustomAttribute() is not null) + { + if (keyProperty is not null) + { + throw new ArgumentException($"Multiple key properties found on type {type.FullName}."); + } + + keyProperty = property; + } + + // Get data properties. + if (property.GetCustomAttribute() is not null) + { + dataProperties.Add(property); + } + + // Get Vector properties. + if (property.GetCustomAttribute() is not null) + { + // Add all vector properties if we support multiple vectors. + if (supportsMultipleVectors) + { + vectorProperties.Add(property); + } + // Add only one vector property if we don't support multiple vectors. + else if (!singleVectorPropertyFound) + { + vectorProperties.Add(property); + singleVectorPropertyFound = true; + } + else + { + throw new ArgumentException($"Multiple vector properties found on type {type.FullName} while only one is supported."); + } + } + } + + // Check that we have a key property. + if (keyProperty is null) + { + throw new ArgumentException($"No key property found on type {type.FullName}."); + } + + // Check that we have one vector property if we don't have named vectors. + if (!supportsMultipleVectors && !singleVectorPropertyFound) + { + throw new ArgumentException($"No vector property found on type {type.FullName}."); + } + + // Update the cache. + s_propertiesCache[type] = (keyProperty, dataProperties, vectorProperties); + + return (keyProperty, dataProperties, vectorProperties); + } + + /// + /// Find the properties listed in the on the and verify + /// that they exist and that we have the expected numbers of each type. + /// Return those properties in separate categories. + /// + /// The data model to find the properties on. + /// The property configuration. + /// A value indicating whether multiple vector properties are supported instead of just one. + /// The categorized properties. + public static (PropertyInfo keyProperty, List dataProperties, List vectorProperties) FindProperties(Type type, MemoryRecordDefinition memoryRecordDefinition, bool supportsMultipleVectors) + { + PropertyInfo? keyProperty = null; + List dataProperties = new(); + List vectorProperties = new(); + bool singleVectorPropertyFound = false; + + foreach (MemoryRecordProperty property in memoryRecordDefinition.Properties) + { + // Key. + if (property is MemoryRecordKeyProperty keyPropertyInfo) + { + if (keyProperty is not null) + { + throw new ArgumentException($"Multiple key properties specified for type {type.FullName}."); + } + + keyProperty = type.GetProperty(keyPropertyInfo.PropertyName); + if (keyProperty == null) + { + throw new ArgumentException($"Key property '{keyPropertyInfo.PropertyName}' not found on type {type.FullName}."); + } + } + // Data. + else if (property is MemoryRecordDataProperty dataPropertyInfo) + { + var dataProperty = type.GetProperty(dataPropertyInfo.PropertyName); + if (dataProperty == null) + { + throw new ArgumentException($"Data property '{dataPropertyInfo.PropertyName}' not found on type {type.FullName}."); + } + + dataProperties.Add(dataProperty); + } + // Vector. + else if (property is MemoryRecordVectorProperty vectorPropertyInfo) + { + var vectorProperty = type.GetProperty(vectorPropertyInfo.PropertyName); + if (vectorProperty == null) + { + throw new ArgumentException($"Vector property '{vectorPropertyInfo.PropertyName}' not found on type {type.FullName}."); + } + + // Add all vector properties if we support multiple vectors. + if (supportsMultipleVectors) + { + vectorProperties.Add(vectorProperty); + } + // Add only one vector property if we don't support multiple vectors. + else if (!singleVectorPropertyFound) + { + vectorProperties.Add(vectorProperty); + singleVectorPropertyFound = true; + } + else + { + throw new ArgumentException($"Multiple vector properties configured for type {type.FullName} while only one is supported."); + } + } + else + { + throw new ArgumentException($"Unknown property type '{property.GetType().FullName}' in memory record definition."); + } + } + + // Check that we have one vector property if we don't have named vectors. + if (!supportsMultipleVectors && !singleVectorPropertyFound) + { + throw new ArgumentException($"No vector property configured for type {type.FullName}."); + } + + return (keyProperty!, dataProperties, vectorProperties); + } + + /// + /// Verify that the given properties are of the supported types. + /// + /// The properties to check. + /// A set of supported types that the provided properties may have. + /// A description of the category of properties being checked. Used for error messaging. + /// Thrown if any of the properties are not in the given set of types. + public static void VerifyPropertyTypes(List properties, HashSet supportedTypes, string propertyCategoryDescription) + { + foreach (var property in properties) + { + if (!supportedTypes.Contains(property.PropertyType)) + { + var supportedTypesString = string.Join(", ", supportedTypes.Select(t => t.FullName)); + throw new ArgumentException($"{propertyCategoryDescription} properties must be one of the supported types: {supportedTypesString}. Type of {property.Name} is {property.PropertyType.FullName}."); + } + } + } + + /// + /// Get the serialized name of a property by first checking the and then falling back to the property name. + /// + /// The property to retrieve a serialized name for. + /// The serialized name for the property. + public static string GetSerializedPropertyName(PropertyInfo property) + { + return property.GetCustomAttribute()?.Name ?? property.Name; + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordMapper.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordMapper.cs new file mode 100644 index 000000000000..8a4e4ce84d7a --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordMapper.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Interface for mapping between a storage model, and the consumer data model. +/// +/// The consumer data model to map to or from. +/// The storage model to map to or from. +public interface IMemoryRecordMapper + where TConsumerDataModel : class +{ + /// + /// Map from the consumer data model to the storage model. + /// + /// The consumer data model record to map. + /// The mapped result. + TStorageModel MapFromDataToStorageModel(TConsumerDataModel dataModel); + + /// + /// Map from the storage model to the consumer data model. + /// + /// The storage data model record to map. + /// The of the operation that this mapping is needed for. + /// The mapped result. + TConsumerDataModel MapFromStorageToDataModel(TStorageModel storageModel, GetRecordOptions? options = default); +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs index 9419eef1e81c..3d3149c4ce7c 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs @@ -24,7 +24,9 @@ public interface IMemoryRecordService /// Optional options for retrieving the record. /// The to monitor for cancellation requests. The default is . /// The memory record if found, otherwise null. - Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + /// Throw when the command fails to execute for any reason. + /// Throw when mapping between the storage model and data model fails. + Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); /// /// Gets a batch of memory records from the data store. Does not guarantee that the collection exists. @@ -35,7 +37,9 @@ public interface IMemoryRecordService /// Optional options for retrieving the records. /// The to monitor for cancellation requests. The default is . /// The vecmemorytor records associated with the unique keys provided. - IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + /// Throw when the command fails to execute for any reason. + /// Throw when mapping between the storage model and data model fails. + IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default); /// /// Deletes a memory record from the data store. Does not guarantee that the collection exists. @@ -44,6 +48,7 @@ public interface IMemoryRecordService /// Optional options for removing the record. /// The to monitor for cancellation requests. The default is . /// The unique identifier for the memory record. + /// Throw when the command fails to execute for any reason. Task DeleteAsync(TKey key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); /// @@ -53,6 +58,7 @@ public interface IMemoryRecordService /// The unique ids associated with the memory records to remove. /// Optional options for removing the records. /// The to monitor for cancellation requests. The default is . + /// Throw when the command fails to execute for any reason. Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); /// @@ -64,6 +70,8 @@ public interface IMemoryRecordService /// Optional options for upserting the record. /// The to monitor for cancellation requests. The default is . /// The unique identifier for the memory record. + /// Throw when the command fails to execute for any reason. + /// Throw when mapping between the storage model and data model fails. Task UpsertAsync(TDataModel record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); /// @@ -76,5 +84,7 @@ public interface IMemoryRecordService /// Optional options for upserting the records. /// The to monitor for cancellation requests. The default is . /// The unique identifiers for the memory records. + /// Throw when the command fails to execute for any reason. + /// Throw when mapping between the storage model and data model fails. IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryDataModelMappingException.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryDataModelMappingException.cs new file mode 100644 index 000000000000..242ef9f18e00 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryDataModelMappingException.cs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Exception thrown when a failure occurs while trying to convert memory models for storage or retrieval. +/// +public class MemoryDataModelMappingException : KernelException +{ + /// + /// Initializes a new instance of the class. + /// + public MemoryDataModelMappingException() + { + } + + /// + /// Initializes a new instance of the class with a specified error message. + /// + /// The error message that explains the reason for the exception. + public MemoryDataModelMappingException(string? message) : base(message) + { + } + + /// + /// Initializes a new instance of the class with a specified error message and a reference to the inner exception that is the cause of this exception. + /// + /// The error message that explains the reason for the exception. + /// The exception that is the cause of the current exception, or a null reference if no inner exception is specified. + public MemoryDataModelMappingException(string? message, Exception? innerException) : base(message, innerException) + { + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryServiceCommandExecutionException.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryServiceCommandExecutionException.cs new file mode 100644 index 000000000000..a78612f14dcd --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryServiceCommandExecutionException.cs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Exception thrown when a memory service command fails, such as upserting a record or deleting a collection. +/// +public class MemoryServiceCommandExecutionException : KernelException +{ + /// + /// Initializes a new instance of the class. + /// + public MemoryServiceCommandExecutionException() + { + } + + /// + /// Initializes a new instance of the class with a specified error message. + /// + /// The error message that explains the reason for the exception. + public MemoryServiceCommandExecutionException(string? message) : base(message) + { + } + + /// + /// Initializes a new instance of the class with a specified error message and a reference to the inner exception that is the cause of this exception. + /// + /// The error message that explains the reason for the exception. + /// The exception that is the cause of the current exception, or a null reference if no inner exception is specified. + public MemoryServiceCommandExecutionException(string? message, Exception? innerException) : base(message, innerException) + { + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDataProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDataProperty.cs new file mode 100644 index 000000000000..383d33ba5a5a --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDataProperty.cs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// A description of a data property for storage in a memory store. +/// +[Experimental("SKEXP0001")] +public sealed class MemoryRecordDataProperty : MemoryRecordProperty +{ + /// + /// Initializes a new instance of the class. + /// + /// The name of the property. + public MemoryRecordDataProperty(string propertyName) + : base(propertyName) + { + } + + /// + /// Initializes a new instance of the class by cloning the given source. + /// + /// The source to clone + public MemoryRecordDataProperty(MemoryRecordDataProperty source) + : base(source.PropertyName) + { + this.HasEmbedding = source.HasEmbedding; + this.EmbeddingPropertyName = source.EmbeddingPropertyName; + } + + /// + /// Gets or sets a value indicating whether this data property has an associated embedding property. + /// + /// Defaults to + public bool HasEmbedding { get; init; } + + /// + /// Gets or sets the name of the property that contains the embedding for this data property. + /// + public string? EmbeddingPropertyName { get; init; } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDefinition.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDefinition.cs new file mode 100644 index 000000000000..859aea9a51b7 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDefinition.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// A description of the properties of a record stored in a memory store, plus how the properties are used. +/// +[Experimental("SKEXP0001")] +public sealed class MemoryRecordDefinition +{ + /// Empty static list for initialization purposes. + private static readonly List s_emptyFields = new(); + + /// + /// The list of properties that are stored in the record. + /// + public IReadOnlyList Properties { get; init; } = s_emptyFields; +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordKeyProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordKeyProperty.cs new file mode 100644 index 000000000000..c545acdeb69d --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordKeyProperty.cs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// A description of a key property for storage in a memory store. +/// +[Experimental("SKEXP0001")] +public sealed class MemoryRecordKeyProperty : MemoryRecordProperty +{ + /// + /// Initializes a new instance of the class. + /// + /// The name of the property. + public MemoryRecordKeyProperty(string propertyName) + : base(propertyName) + { + } + + /// + /// Initializes a new instance of the class by cloning the given source. + /// + /// The source to clone + public MemoryRecordKeyProperty(MemoryRecordKeyProperty source) + : base(source.PropertyName) + { + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordProperty.cs new file mode 100644 index 000000000000..0b0cf226bb5d --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordProperty.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// A description of a property for storage in a memory store. +/// +[Experimental("SKEXP0001")] +public abstract class MemoryRecordProperty +{ + /// + /// Initializes a new instance of the class. + /// + /// The name of the property. + private protected MemoryRecordProperty(string propertyName) + { + this.PropertyName = propertyName; + } + + /// + /// Gets or sets the name of the property. + /// + public string PropertyName { get; set; } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordVectorProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordVectorProperty.cs new file mode 100644 index 000000000000..94e627103d9a --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordVectorProperty.cs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// A description of a vector property for storage in a memory store. +/// +[Experimental("SKEXP0001")] +public sealed class MemoryRecordVectorProperty : MemoryRecordProperty +{ + /// + /// Initializes a new instance of the class. + /// + /// The name of the property. + public MemoryRecordVectorProperty(string propertyName) + : base(propertyName) + { + } + + /// + /// Initializes a new instance of the class by cloning the given source. + /// + /// The source to clone + public MemoryRecordVectorProperty(MemoryRecordVectorProperty source) + : base(source.PropertyName) + { + } +} From bfa096717bd9c73422bbca927b53ee15a9180d38 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 17 Jun 2024 14:37:33 +0100 Subject: [PATCH 03/48] .Net: Rename memory to vector. (#6701) ### Motivation and Context Renaming to the new vector store nomenclature as discussed. There are the main changes. All implementations follow the same pattern. IMemoryRecordService => IVectorRecordStore IMemoryRecordMapper => IVectorStoreRecordMapper MemoryServiceCommandExecutionException => VectorStoreOperationException MemoryDataModelMappingException => VectorStoreRecordMappingException MemoryRecordDataAttribute => VectorStoreRecordDataAttribute and same for other attributes MemoryRecordDataProperty => VectorStoreRecordDataProperty and same for other property definitions ### Description For more information see the included ADR document. Related Issue: #5887 ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- .../0045-updated-vector-store-design.md | 334 +++++++++--------- ...AzureAISearchMemoryRecordServiceOptions.cs | 42 --- ...pe.cs => AzureAISearchRecordMapperType.cs} | 4 +- ...e.cs => AzureAISearchVectorRecordStore.cs} | 58 +-- .../AzureAISearchVectorRecordStoreOptions.cs | 42 +++ .../AzureAISearchMemoryCollectionFixture.cs | 10 - ...=> AzureAISearchVectorRecordStoreTests.cs} | 84 ++--- ...ureAISearchVectorStoreCollectionFixture.cs | 10 + ....cs => AzureAISearchVectorStoreFixture.cs} | 60 ++-- ....cs => VectorStoreRecordPropertyReader.cs} | 47 ++- .../IMemoryRecordService{TKey,TDataModel}.cs | 90 ----- .../Memory/IVectorRecordStore.cs | 90 +++++ ...dMapper.cs => IVectorStoreRecordMapper.cs} | 18 +- .../Memory/MemoryDataModelMappingException.cs | 35 -- .../MemoryServiceCommandExecutionException.cs | 35 -- .../VectorStoreRecordDataAttribute.cs} | 4 +- .../VectorStoreRecordKeyAttribute.cs} | 4 +- .../VectorStoreRecordVectorAttribute.cs} | 4 +- .../MemoryRecordKeyProperty.cs | 30 -- .../MemoryRecordVectorProperty.cs | 30 -- ...ty.cs => VectorStoreRecordDataProperty.cs} | 12 +- ...tion.cs => VectorStoreRecordDefinition.cs} | 8 +- .../VectorStoreRecordKeyProperty.cs | 30 ++ ...operty.cs => VectorStoreRecordProperty.cs} | 8 +- .../VectorStoreRecordVectorProperty.cs | 30 ++ .../RecordOptions/DeleteRecordOptions.cs | 2 +- .../Memory/RecordOptions/GetRecordOptions.cs | 2 +- .../RecordOptions/UpsertRecordOptions.cs | 2 +- .../Memory/VectorStoreOperationException.cs | 35 ++ .../VectorStoreRecordMappingException.cs | 35 ++ 30 files changed, 606 insertions(+), 589 deletions(-) delete mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordServiceOptions.cs rename dotnet/src/Connectors/Connectors.Memory.AzureAISearch/{AzureAISearchMemoryRecordMapperType.cs => AzureAISearchRecordMapperType.cs} (75%) rename dotnet/src/Connectors/Connectors.Memory.AzureAISearch/{AzureAISearchMemoryRecordService.cs => AzureAISearchVectorRecordStore.cs} (82%) create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs delete mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryCollectionFixture.cs rename dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/{AzureAISearchMemoryRecordServiceTests.cs => AzureAISearchVectorRecordStoreTests.cs} (64%) create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreCollectionFixture.cs rename dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/{AzureAISearchMemoryFixture.cs => AzureAISearchVectorStoreFixture.cs} (83%) rename dotnet/src/InternalUtilities/src/Schema/{VectorStoreModelPropertyReader.cs => VectorStoreRecordPropertyReader.cs} (79%) delete mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs rename dotnet/src/SemanticKernel.Abstractions/Memory/{IMemoryRecordMapper.cs => IVectorStoreRecordMapper.cs} (50%) delete mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/MemoryDataModelMappingException.cs delete mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/MemoryServiceCommandExecutionException.cs rename dotnet/src/SemanticKernel.Abstractions/Memory/{MemoryRecordAttributes/MemoryRecordDataAttribute.cs => RecordAttributes/VectorStoreRecordDataAttribute.cs} (82%) rename dotnet/src/SemanticKernel.Abstractions/Memory/{MemoryRecordAttributes/MemoryRecordKeyAttribute.cs => RecordAttributes/VectorStoreRecordKeyAttribute.cs} (62%) rename dotnet/src/SemanticKernel.Abstractions/Memory/{MemoryRecordAttributes/MemoryRecordVectorAttribute.cs => RecordAttributes/VectorStoreRecordVectorAttribute.cs} (67%) delete mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordKeyProperty.cs delete mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordVectorProperty.cs rename dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/{MemoryRecordDataProperty.cs => VectorStoreRecordDataProperty.cs} (67%) rename dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/{MemoryRecordDefinition.cs => VectorStoreRecordDefinition.cs} (54%) create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordKeyProperty.cs rename dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/{MemoryRecordProperty.cs => VectorStoreRecordProperty.cs} (63%) create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordVectorProperty.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs diff --git a/docs/decisions/0045-updated-vector-store-design.md b/docs/decisions/0045-updated-vector-store-design.md index 876e0c7b8664..0e06d1a05b6d 100644 --- a/docs/decisions/0045-updated-vector-store-design.md +++ b/docs/decisions/0045-updated-vector-store-design.md @@ -93,24 +93,24 @@ The separation between collection/index management and record management. title: SK Collection/Index and record management --- classDiagram - note for IMemoryRecordService "Can manage records for any scenario" - note for IMemoryCollectionCreateService "Can create collections and\nindexes" - note for IMemoryCollectionUpdateService "Can retrieve/delete any collections and\nindexes" + note for IVectorRecordStore "Can manage records for any scenario" + note for IVectorCollectionCreate "Can create collections and\nindexes" + note for IVectorCollectionNonSchema "Can retrieve/delete any collections and\nindexes" namespace SKAbstractions{ - class IMemoryCollectionCreateService{ + class IVectorCollectionCreate{ <> +CreateCollection } - class IMemoryCollectionUpdateService{ + class IVectorCollectionNonSchema{ <> +GetCollectionNames +CollectionExists +DeleteCollection } - class IMemoryRecordService~TModel~{ + class IVectorRecordStore~TModel~{ <> +Upsert(TModel record) string +UpserBatch(TModel record) string @@ -122,34 +122,34 @@ classDiagram } namespace AzureAIMemory{ - class AzureAISearchMemoryCollectionCreateService{ + class AzureAISearchVectorCollectionCreate{ } - class AzureAISearchMemoryCollectionUpdateService{ + class AzureAISearchVectorCollectionNonSchema{ } - class AzureAISearchMemoryRecordService{ + class AzureAISearchVectorRecordStore{ } } namespace RedisMemory{ - class RedisMemoryCollectionCreateService{ + class RedisVectorCollectionCreate{ } - class RedisMemoryCollectionUpdateService{ + class RedisVectorCollectionNonSchema{ } - class RedisMemoryRecordService{ + class RedisVectorRecordStore{ } } - IMemoryCollectionCreateService <|-- AzureAISearchMemoryCollectionCreateService - IMemoryCollectionUpdateService <|-- AzureAISearchMemoryCollectionUpdateService - IMemoryRecordService <|-- AzureAISearchMemoryRecordService + IVectorCollectionCreate <|-- AzureAISearchVectorCollectionCreate + IVectorCollectionNonSchema <|-- AzureAISearchVectorCollectionNonSchema + IVectorRecordStore <|-- AzureAISearchVectorRecordStore - IMemoryCollectionCreateService <|-- RedisMemoryCollectionCreateService - IMemoryCollectionUpdateService <|-- RedisMemoryCollectionUpdateService - IMemoryRecordService <|-- RedisMemoryRecordService + IVectorCollectionCreate <|-- RedisVectorCollectionCreate + IVectorCollectionNonSchema <|-- RedisVectorCollectionNonSchema + IVectorRecordStore <|-- RedisVectorRecordStore ``` How to use your own schema with core sk functionality. @@ -159,26 +159,26 @@ How to use your own schema with core sk functionality. title: Chat History Break Glass --- classDiagram - note for IMemoryRecordService "Can manage records\nfor any scenario" - note for IMemoryCollectionCreateService "Can create collections\nan dindexes" - note for IMemoryCollectionUpdateService "Can retrieve/delete any\ncollections and indexes" - note for CustomerHistoryMemoryCollectionCreateService "Creates history collections and indices\nusing Customer requirements" - note for CustomerHistoryMemoryRecordService "Decorator class for IMemoryRecordService that maps\nbetween the customer model to our model" + note for IVectorRecordStore "Can manage records\nfor any scenario" + note for IVectorCollectionCreate "Can create collections\nan dindexes" + note for IVectorCollectionNonSchema "Can retrieve/delete any\ncollections and indexes" + note for CustomerHistoryVectorCollectionCreate "Creates history collections and indices\nusing Customer requirements" + note for CustomerHistoryVectorRecordStore "Decorator class for IVectorRecordStore that maps\nbetween the customer model to our model" namespace SKAbstractions{ - class IMemoryCollectionCreateService{ + class IVectorCollectionCreate{ <> +CreateCollection } - class IMemoryCollectionUpdateService{ + class IVectorCollectionNonSchema{ <> +GetCollectionNames +CollectionExists +DeleteCollection } - class IMemoryRecordService~TModel~{ + class IVectorRecordStore~TModel~{ <> +Upsert(TModel record) string +Get(string key) TModel @@ -203,12 +203,12 @@ classDiagram +Dictionary~string, string~ properties } - class CustomerHistoryMemoryCollectionCreateService{ + class CustomerHistoryVectorCollectionCreate{ +CreateCollection } - class CustomerHistoryMemoryRecordService{ - -IMemoryRecordService~CustomerHistoryModel~ _store + class CustomerHistoryVectorRecordStore{ + -IVectorRecordStore~CustomerHistoryModel~ _store +Upsert(ChatHistoryModel record) string +Get(string key) ChatHistoryModel +Delete(string key) string @@ -217,7 +217,7 @@ classDiagram namespace SKCore{ class SemanticTextMemory{ - -IMemoryRecordService~ChatHistoryModel~ _MemoryRecordService + -IVectorRecordStore~ChatHistoryModel~ _VectorRecordStore -IMemoryCollectionService _collectionsService -ITextEmbeddingGenerationService _embeddingGenerationService } @@ -233,16 +233,16 @@ classDiagram } } - IMemoryCollectionCreateService <|-- CustomerHistoryMemoryCollectionCreateService + IVectorCollectionCreate <|-- CustomerHistoryVectorCollectionCreate - IMemoryRecordService <|-- CustomerHistoryMemoryRecordService - IMemoryRecordService <.. CustomerHistoryMemoryRecordService - CustomerHistoryModel <.. CustomerHistoryMemoryRecordService - ChatHistoryModel <.. CustomerHistoryMemoryRecordService + IVectorRecordStore <|-- CustomerHistoryVectorRecordStore + IVectorRecordStore <.. CustomerHistoryVectorRecordStore + CustomerHistoryModel <.. CustomerHistoryVectorRecordStore + ChatHistoryModel <.. CustomerHistoryVectorRecordStore ChatHistoryModel <.. SemanticTextMemory - IMemoryRecordService <.. SemanticTextMemory - IMemoryCollectionCreateService <.. SemanticTextMemory + IVectorRecordStore <.. SemanticTextMemory + IVectorCollectionCreate <.. SemanticTextMemory ISemanticTextMemory <.. ChatHistoryPlugin ``` @@ -310,7 +310,7 @@ Footnotes: Mapping between data models and the storage models can also require custom logic depending on the type of data model and storage model involved. -I'm therefore proposing that we allow mappers to be injectable for each `MemoryRecordService` instance. The interfaces for these would vary depending +I'm therefore proposing that we allow mappers to be injectable for each `VectorRecordStore` instance. The interfaces for these would vary depending on the storage models used by each vector store and any unique capabilities that each vector store may have, e.g. qdrant can operate in `single` or `multiple named vector` modes, which means the mapper needs to know whether to set a single vector or fill a vector map. @@ -334,45 +334,45 @@ should be filterable. Requiring users to switch to new attributes later will be Here is what the attributes would look like, plus a sample use case. ```cs -sealed class MemoryRecordKeyAttribute : Attribute +sealed class VectorStoreRecordKeyAttribute : Attribute { } -sealed class MemoryRecordDataAttribute : Attribute +sealed class VectorStoreRecordDataAttribute : Attribute { public bool HasEmbedding { get; set; } public string EmbeddingPropertyName { get; set; } } -sealed class MemoryRecordVectorAttribute : Attribute +sealed class VectorStoreRecordVectorAttribute : Attribute { } public record HotelInfo( - [property: MemoryRecordKey, JsonPropertyName("hotel-id")] string HotelId, - [property: MemoryRecordData, JsonPropertyName("hotel-name")] string HotelName, - [property: MemoryRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbeddings"), JsonPropertyName("description")] string Description, - [property: MemoryRecordVector, JsonPropertyName("description-embeddings")] ReadOnlyMemory? DescriptionEmbeddings); + [property: VectorStoreRecordKey, JsonPropertyName("hotel-id")] string HotelId, + [property: VectorStoreRecordData, JsonPropertyName("hotel-name")] string HotelName, + [property: VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbeddings"), JsonPropertyName("description")] string Description, + [property: VectorStoreRecordVector, JsonPropertyName("description-embeddings")] ReadOnlyMemory? DescriptionEmbeddings); ``` Here is what the configuration objects would look like. ```cs -abstract class MemoryRecordProperty(string propertyName); +abstract class VectorStoreRecordProperty(string propertyName); -sealed class MemoryRecordKeyProperty(string propertyName): Field(propertyName) +sealed class VectorStoreRecordKeyProperty(string propertyName): Field(propertyName) { } -sealed class MemoryRecordDataProperty(string propertyName): Field(propertyName) +sealed class VectorStoreRecordDataProperty(string propertyName): Field(propertyName) { bool HasEmbedding; string EmbeddingPropertyName; } -sealed class MemoryRecordVectorProperty(string propertyName): Field(propertyName) +sealed class VectorStoreRecordVectorProperty(string propertyName): Field(propertyName) { } -sealed class MemoryRecordDefinition +sealed class VectorStoreRecordDefinition { - IReadOnlyList Properties; + IReadOnlyList Properties; } ``` @@ -390,7 +390,7 @@ consistency and scalability. |Criteria|Current SK Implementation|Proposed SK Implementation|Spring AI|LlamaIndex|Langchain| |-|-|-|-|-|-| |Support for Custom Schemas|N|Y|N|N|N| -|Naming of store|MemoryStore|MemoryRecordService, MemoryCollectionCreateService, MemoryCollectionUpdateService|VectorStore|VectorStore|VectorStore| +|Naming of store|MemoryStore|VectorRecordStore, VectorCollectionCreate, VectorCollectionNonSchema, VectorCollectionStore, VectorStore|VectorStore|VectorStore|VectorStore| |MultiVector support|N|Y|N|N|N| |Support Multiple Collections via SDK params|Y|Y|N (via app config)|Y|Y| @@ -423,39 +423,39 @@ From GitHub Issue: #### Option 1 - Combined collection and record management ```cs -interface IMemoryRecordService +interface IVectorRecordStore { Task CreateCollectionAsync(CollectionCreateConfig collectionConfig, CancellationToken cancellationToken = default); IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default); Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default); Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default); - Task UpsertAsync(TDataModel data, CancellationToken cancellationToken = default); - IAsyncEnumerable UpsertBatchAsync(IEnumerable dataSet, CancellationToken cancellationToken = default); - Task GetAsync(string key, bool withEmbedding = false, CancellationToken cancellationToken = default); - IAsyncEnumerable GetBatchAsync(IEnumerable keys, bool withVectors = false, CancellationToken cancellationToken = default); + Task UpsertAsync(TRecord data, CancellationToken cancellationToken = default); + IAsyncEnumerable UpsertBatchAsync(IEnumerable dataSet, CancellationToken cancellationToken = default); + Task GetAsync(string key, bool withEmbedding = false, CancellationToken cancellationToken = default); + IAsyncEnumerable GetBatchAsync(IEnumerable keys, bool withVectors = false, CancellationToken cancellationToken = default); Task DeleteAsync(string key, CancellationToken cancellationToken = default); Task DeleteBatchAsync(IEnumerable keys, CancellationToken cancellationToken = default); } -class AzureAISearchMemoryRecordService( +class AzureAISearchVectorRecordStore( Azure.Search.Documents.Indexes.SearchIndexClient client, - Schema schema): IMemoryRecordService; + Schema schema): IVectorRecordStore; -class WeaviateMemoryRecordService( +class WeaviateVectorRecordStore( WeaviateClient client, - Schema schema): IMemoryRecordService; + Schema schema): IVectorRecordStore; -class RedisMemoryRecordService( +class RedisVectorRecordStore( StackExchange.Redis.IDatabase database, - Schema schema): IMemoryRecordService; + Schema schema): IVectorRecordStore; ``` #### Option 2 - Separated collection and record management with opinionated create implementations ```cs -interface IMemoryCollectionService +interface IVectorCollectionStore { virtual Task CreateChatHistoryCollectionAsync(string name, CancellationToken cancellationToken = default); virtual Task CreateSemanticCacheCollectionAsync(string name, CancellationToken cancellationToken = default); @@ -465,24 +465,24 @@ interface IMemoryCollectionService Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default); } -class AzureAISearchMemoryCollectionService: IMemoryCollectionService; -class RedisMemoryCollectionService: IMemoryCollectionService; -class WeaviateMemoryCollectionService: IMemoryCollectionService; +class AzureAISearchVectorCollectionStore: IVectorCollectionStore; +class RedisVectorCollectionStore: IVectorCollectionStore; +class WeaviateVectorCollectionStore: IVectorCollectionStore; // Customers can inherit from our implementations and replace just the creation scenarios to match their schemas. -class CustomerCollectionsService: AzureAISearchMemoryCollectionService, IMemoryCollectionService; +class CustomerCollectionStore: AzureAISearchVectorCollectionStore, IVectorCollectionStore; // We can also create implementations that create indices based on an MLIndex specification. -class MLIndexAzureAISearchMemoryCollectionService(MLIndex mlIndexSpec): AzureAISearchMemoryCollectionService, IMemoryCollectionService; +class MLIndexAzureAISearchVectorCollectionStore(MLIndex mlIndexSpec): AzureAISearchVectorCollectionStore, IVectorCollectionStore; -interface IMemoryRecordService +interface IVectorRecordStore { - Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); Task DeleteAsync(string key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); - Task UpsertAsync(TDataModel record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); + Task UpsertAsync(TRecord record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); } -class AzureAISearchMemoryRecordService(): IMemoryRecordService; +class AzureAISearchVectorRecordStore(): IVectorRecordStore; ``` #### Option 3 - Separated collection and record management with collection create separate from other operations. @@ -491,31 +491,31 @@ Vector store same as option 2 so not repeated for brevity. ```cs -interface IMemoryCollectionCreateService +interface IVectorCollectionCreate { virtual Task CreateCollectionAsync(string name, CancellationToken cancellationToken = default); } // Implement a generic version of create that takes a configuration that should work for 80% of cases. -class AzureAISearchConfiguredCollectionCreateService(CollectionCreateConfig collectionConfig): IMemoryCollectionCreateService; +class AzureAISearchConfiguredVectorCollectionCreate(CollectionCreateConfig collectionConfig): IVectorCollectionCreate; // Allow custom implementations of create for break glass scenarios for outside the 80% case. -class AzureAISearchChatHistoryCollectionCreateService: IMemoryCollectionCreateService; -class AzureAISearchSemanticCacheCollectionCreateService: IMemoryCollectionCreateService; +class AzureAISearchChatHistoryVectorCollectionCreate: IVectorCollectionCreate; +class AzureAISearchSemanticCacheVectorCollectionCreate: IVectorCollectionCreate; // Customers can create their own creation scenarios to match their schemas, but can continue to use our get, does exist and delete class. -class CustomerChatHistoryCollectionCreateService: IMemoryCollectionCreateService; +class CustomerChatHistoryVectorCollectionCreate: IVectorCollectionCreate; -interface IMemoryCollectionUpdateService +interface IVectorCollectionNonSchema { IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default); Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default); Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default); } -class AzureAISearchMemoryCollectionUpdateService: IMemoryCollectionUpdateService; -class RedisMemoryCollectionUpdateService: IMemoryCollectionUpdateService; -class WeaviateMemoryCollectionUpdateService: IMemoryCollectionUpdateService; +class AzureAISearchVectorCollectionNonSchema: IVectorCollectionNonSchema; +class RedisVectorCollectionNonSchema: IVectorCollectionNonSchema; +class WeaviateVectorCollectionNonSchema: IVectorCollectionNonSchema; ``` @@ -525,41 +525,41 @@ Variation on option 3. ```cs -interface IMemoryCollectionCreateService +interface IVectorCollectionCreate { virtual Task CreateCollectionAsync(string name, CancellationToken cancellationToken = default); } -interface IMemoryCollectionUpdateService +interface IVectorCollectionNonSchema { IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default); Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default); Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default); } -// DB Specific Update implementations -class AzureAISearchMemoryCollectionUpdateService: IMemoryCollectionUpdateService; -class RedisMemoryCollectionUpdateService: IMemoryCollectionUpdateService; +// DB Specific NonSchema implementations +class AzureAISearchVectorCollectionNonSchema: IVectorCollectionNonSchema; +class RedisVectorCollectionNonSchema: IVectorCollectionNonSchema; -// Combined Create + Update Interface -interface IMemoryCollectionService: IMemoryCollectionCreateService, IMemoryCollectionUpdateService {} +// Combined Create + NonSchema Interface +interface IVectorCollectionStore: IVectorCollectionCreate, IVectorCollectionNonSchema {} -// Base abstract class that forwards non-create operations to provided service. -abstract class MemoryCollectionService(IMemoryCollectionUpdateService collectionsUpdateService): IMemoryCollectionService +// Base abstract class that forwards non-create operations to provided implementation. +abstract class VectorCollectionStore(IVectorCollectionNonSchema collectionNonSchema): IVectorCollectionStore { public abstract Task CreateCollectionAsync(string name, CancellationToken cancellationToken = default); - public IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default) { return collectionsUpdateService.ListCollectionNamesAsync(cancellationToken); } - public Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default) { return collectionsUpdateService.CollectionExistsAsync(name, cancellationToken); } - public Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default) { return collectionsUpdateService.DeleteCollectionAsync(name, cancellationToken); } + public IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default) { return collectionNonSchema.ListCollectionNamesAsync(cancellationToken); } + public Task CollectionExistsAsync(string name, CancellationToken cancellationToken = default) { return collectionNonSchema.CollectionExistsAsync(name, cancellationToken); } + public Task DeleteCollectionAsync(string name, CancellationToken cancellationToken = default) { return collectionNonSchema.DeleteCollectionAsync(name, cancellationToken); } } -// Collections service implementations, that inherit from base class, and just adds the different creation implementations. -class AzureAISearchChatHistoryMemoryService(AzureAISearchMemoryCollectionUpdateService updateService): MemoryCollectionService(updateService); -class AzureAISearchSemanticCacheMemoryService(AzureAISearchMemoryCollectionUpdateService updateService): MemoryCollectionService(updateService); -class AzureAISearchMLIndexMemoryService(AzureAISearchMemoryCollectionUpdateService updateService): MemoryCollectionService(updateService); +// Collections store implementations, that inherit from base class, and just adds the different creation implementations. +class AzureAISearchChatHistoryVectorCollectionStore(AzureAISearchVectorCollectionNonSchema nonSchema): VectorCollectionStore(nonSchema); +class AzureAISearchSemanticCacheVectorCollectionStore(AzureAISearchVectorCollectionNonSchema nonSchema): VectorCollectionStore(nonSchema); +class AzureAISearchMLIndexVectorCollectionStore(AzureAISearchVectorCollectionNonSchema nonSchema): VectorCollectionStore(nonSchema); -// Customer collections service implementation, that uses the base Azure AI Search implementation for get, doesExist and delete, but adds it's own creation. -class ContosoProductsMemoryCollectionService(AzureAISearchMemoryCollectionUpdateService updateService): MemoryCollectionService(updateService); +// Customer collections store implementation, that uses the base Azure AI Search implementation for get, doesExist and delete, but adds its own creation. +class ContosoProductsVectorCollectionStore(AzureAISearchVectorCollectionNonSchema nonSchema): VectorCollectionStore(nonSchema); ``` @@ -569,12 +569,12 @@ Same as option 3 / 4, plus: ```cs -interface IMemoryService : IMemoryCollectionCreateService, IMemoryCollectionService, IMemoryRecordService +interface IVectorStore : IVectorCollectionStore, IVectorRecordStore { } // Create a static factory that produces one of these, so only the interface is public, not the class. -internal class CombinedMemoryService(IMemoryCollectionCreateService creation, IMemoryCollectionService collections, IMemoryRecordService records): IMemoryService +internal class VectorStore(IVectorCollectionCreate create, IVectorCollectionNonSchema nonSchema, IVectorRecordStore records): IVectorStore { } @@ -602,19 +602,19 @@ Chosen option: 4 + 5. - Collection create, configuration and supported options vary considerably across different schemas and database types. - Collection list, exists and delete is the same across different schemas, but varies by database type. - Vector storage, even with custom schemas can be supported using a single implementation per database type. -- We will need to support multiple collection create service implementations per store type, a single collection update service implementation per store type, and a single vector store implementation per store type. +- We will need to support multiple collection create implementations per store type, a single collection nonschema implementation per store type, and a single vector store implementation per store type. - At the same time we can layer interfaces on top that allow easy combined access to collection and record management. ### Question 2: Collection name and key value normalization in store, decorator or via injection. -#### Option 1 - Normalization in main record service +#### Option 1 - Normalization in main record store - Pros: Simple -- Cons: The normalization needs to vary separately from the record service, so this will not work +- Cons: The normalization needs to vary separately from the record store, so this will not work ```cs - public class AzureAISearchMemoryRecordService : IMemoryRecordService + public class AzureAISearchVectorRecordStore : IVectorRecordStore { ... @@ -633,28 +633,28 @@ Chosen option: 4 + 5. #### Option 2 - Normalization in decorator -- Pros: Allows normalization to vary separately from the record service. +- Pros: Allows normalization to vary separately from the record store. - Pros: No code executed when no normalization required. - Pros: Easy to package matching encoders/decoders together. - Pros: Easier to obsolete encoding/normalization as a concept. -- Cons: Not a major con, but need to implement the full MemoryRecordService interface, instead of e.g. just providing the two translation functions, if we go with option 3. +- Cons: Not a major con, but need to implement the full VectorRecordStore interface, instead of e.g. just providing the two translation functions, if we go with option 3. - Cons: Hard to have a generic implementation that can work with any model, without either changing the data in the provided object on upsert or doing cloning in an expensive way. ```cs - new KeyNormalizingAISearchMemoryRecordService( + new KeyNormalizingAISearchVectorRecordStore( "keyField", - new AzureAISearchMemoryRecordService(...)); + new AzureAISearchVectorRecordStore(...)); ``` -#### Option 3 - Normalization via optional function parameters to record service constructor +#### Option 3 - Normalization via optional function parameters to record store constructor -- Pros: Allows normalization to vary separately from the record service. -- Pros: No need to implement the full MemoryRecordService interface. +- Pros: Allows normalization to vary separately from the record store. +- Pros: No need to implement the full VectorRecordStore interface. - Pros: Can modify values on serialization without changing the incoming record, if supported by DB SDK. - Cons: Harder to package matching encoders/decoders together. ```cs -public class AzureAISearchMemoryRecordService(StoreOptions options); +public class AzureAISearchVectorRecordStore(StoreOptions options); public class StoreOptions { @@ -686,27 +686,27 @@ provide their own encoding / decoding behavior. #### Option 1 - Collection name as method param ```cs -public class MyMemoryStore() +public class MyVectorRecordStore() { - public async Task GetAsync(string collectionName, string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + public async Task GetAsync(string collectionName, string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); } ``` #### Option 2 - Collection name via constructor ```cs -public class MyMemoryStore(string defaultCollectionName) +public class MyVectorRecordStore(string defaultCollectionName) { - public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); } ``` #### Option 3 - Collection name via either ```cs -public class MyMemoryStore(string defaultCollectionName) +public class MyVectorRecordStore(string defaultCollectionName) { - public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); } public class GetRecordOptions @@ -724,7 +724,7 @@ Chosen option 3, to allow developers more choice. #### Option 1 - Take a string and convert to a type that was specified on the constructor ```cs -public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) { var convertedKey = this.keyType switch { @@ -742,7 +742,7 @@ public async Task GetAsync(string key, GetRecordOptions? options = #### Option 2 - Take an object and cast to a type that was specified on the constructor. ```cs -public async Task GetAsync(object key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +public async Task GetAsync(object key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) { var convertedKey = this.keyType switch { @@ -766,7 +766,7 @@ public async Task GetAsync(object key, GetRecordOptions? options = #### Option 3 - Multiple overloads where we convert where possible, throw when not possible. ```cs -public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) { var convertedKey = this.keyType switch { @@ -775,7 +775,7 @@ public async Task GetAsync(string key, GetRecordOptions? options = KeyType.GUID => Guid.Parse(key) } } -public async Task GetAsync(int key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +public async Task GetAsync(int key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) { var convertedKey = this.keyType switch { @@ -784,7 +784,7 @@ public async Task GetAsync(int key, GetRecordOptions? options = def KeyType.GUID => throw new InvalidOperationException($"The provided key must be convertible to a GUID.") } } -public async Task GetAsync(GUID key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) +public async Task GetAsync(GUID key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) { var convertedKey = this.keyType switch { @@ -801,16 +801,16 @@ public async Task GetAsync(GUID key, GetRecordOptions? options = de #### Option 4 - Add key type as generic to interface ```cs -interface IMemoryRecordService +interface IVectorRecordStore { - Task GetAsync(TKeyType key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); } -class AzureAISearchMemoryRecordService: IMemoryRecordService +class AzureAISearchVectorRecordStore: IVectorRecordStore { - public AzureAISearchMemoryRecordService() + public AzureAISearchVectorRecordStore() { - // Check if TKeyType matches the type of the field marked as a key on TDataModel and throw if they don't match. + // Check if TKey matches the type of the field marked as a key on TRecord and throw if they don't match. // Also check if keytype is one of the allowed types for Azure AI Search and throw if it isn't. } } @@ -830,26 +830,32 @@ each implementation to hardcode allowed key types if the vector db only supports #### Option 1 - VectorDB ```cs -IVectorDBRecordService -IVectorDBCollectionUpdateService -IVectorDBCollectionCreateService +interface IVectorDBRecordService {} +interface IVectorDBCollectionUpdateService {} +interface IVectorDBCollectionCreateService {} ``` #### Option 2 - Memory ```cs -IMemoryRecordService -IMemoryCollectionUpdateService -IMemoryCollectionCreateService +interface IMemoryRecordService {} +interface IMemoryCollectionUpdateService {} +interface IMemoryCollectionCreateService {} +``` + +### Option 3 - VectorStore + +```cs +interface IVectorRecordStore {} +interface IVectorCollectionNonSchema {} +interface IVectorCollectionCreate {} +interface IVectorCollectionStore {}: IVectorCollectionCreate, IVectorCollectionNonSchema +interface IVectorStore {}: IVectorCollectionStore, IVectorRecordStore ``` #### Decision Outcome -Chosen option 2. Memory constrains the scope of these classes to be for memory storage and retrieval in the context of an AI system. Since almost all -databases are currently adding vector support, including relational, it's important to clarify the purpose of these abstractions compared to others. -Here, the purpose is not to provide generic database access to all databases that support vectors, but rather for memory storage and retrieval. The -concern with using a term such as VectorDB is that it opens up the scope of the feature set to include anything that stores a vector, without -constraining it to any specific purpose. +Chosen option 3. The word memory is broad enough to encompass any data, so using it seems arbitrary. All competitors are using the term vector store, so using something similar is good for recognition. ## Usage Examples @@ -858,7 +864,7 @@ Common Code across all examples ```cs class CacheEntryModel(string prompt, string result, ReadOnlyMemory promptEmbedding); -class SemanticTextMemory(IMemoryRecordService recordService, IMemoryCollectionService collectionService, ITextEmbeddingGenerationService embeddingGenerator): ISemanticTextMemory; +class SemanticTextMemory(IVectorRecordStore recordStore, IVectorCollectionStore collectionStore, ITextEmbeddingGenerationService embeddingGenerator): ISemanticTextMemory; class CacheSetFunctionFilter(ISemanticTextMemory memory); // Saves results to cache. class CacheGetPromptFilter(ISemanticTextMemory memory); // Check cache for entries. @@ -880,24 +886,24 @@ builder .AddNamedAzureAISearchCollectionCreate(name: "CacheCreate", azureAISearchEndpoint, apiKey, createConfiguration) // Config .AddNamedAzureAISearchCollectionCreate(name: "CacheCreate", sp => new CacheCreate(...)); // Custom implementation // Create combined collection management that references the previously registered create instance. - .AddNamedAzureAISearchCollectionService(name: "Cache", azureAISearchEndpoint, apiKey, createName: "CacheCreate") + .AddNamedAzureAISearchCollectionStore(name: "Cache", azureAISearchEndpoint, apiKey, createName: "CacheCreate") - // Variant 2: Register collection service in one line with config or custom create implementation. - .AddNamedAzureAISearchCollectionService(name: "Cache", azureAISearchEndpoint, apiKey, createConfiguration) // Config - .AddNamedAzureAISearchCollectionService(name: "Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)) // Custom implementation + // Variant 2: Register collection store in one line with config or custom create implementation. + .AddNamedAzureAISearchCollectionStore(name: "Cache", azureAISearchEndpoint, apiKey, createConfiguration) // Config + .AddNamedAzureAISearchCollectionStore(name: "Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)) // Custom implementation // Record Registration with variants 1 and 2: - // Add record services. - .AddAzureAISearchRecordService(name: "Cache", azureAISearchEndpoint, apiKey) + // Add record stores. + .AddAzureAISearchRecordStore(name: "Cache", azureAISearchEndpoint, apiKey) - // Variant 3: Register collection and record service in one line with config or custom create implementation. + // Variant 3: Register collection and record store in one line with config or custom create implementation. // Does all of the preious variants in one line. - .AddAzureAISearchStorageServices(name: "Cache", azureAISearchEndpoint, apiKey, createConfiguration) // Config - .AddAzureAISearchStorageServices(name: "Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)) // Custom implementation + .AddAzureAISearchVectorStore(name: "Cache", azureAISearchEndpoint, apiKey, createConfiguration) // Config + .AddAzureAISearchVectorStore(name: "Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)) // Custom implementation - // Add semantic text memory referencing collection and record services. + // Add semantic text memory referencing collection and record stores. // This would register ISemanticTextMemory in the services container. - .AddSemanticTextMemory(collectionServiceName: "Cache", recordServiceName: "Cache"); + .AddSemanticTextMemory(collectionStoreName: "Cache", recordServiceName: "Cache"); // Add filter to retrieve items from cache and one to add items to cache. // Since these filters depend on ISemanticTextMemory and that is already registered, it should get matched automatically. @@ -907,9 +913,9 @@ builder.Services.AddTransient var kernel = .Build(); -var memoryFactory = kernel.Services.GetRequiredService(); -var cacheCollectionService = memoryFactory.CreateCollectionService(name: "Cache"); -var cacheRecordService = memoryFactory.CreateRecordService(name: "Cache"); +var vectorStoreFactory = kernel.Services.GetRequiredService(); +var cacheCollectionStore = vectorStoreFactory.CreateCollectionStore(name: "Cache"); +var cacheRecordStore = vectorStoreFactory.CreateRecordStore(name: "Cache"); ``` ### DI Framework: Registration based on consumer type. @@ -922,7 +928,7 @@ builder .AddAzureOpenAITextEmbeddingGeneration(textEmbeddingDeploymentName, azureAIEndpoint, apiKey) // Collection and record registration with config or custom create implementation. - // This will register both IMemoryCollectionService and IMemoryRecordService and tie it to usage with SemanticTextMemory. + // This will register both IVectorCollectionStore and IVectorRecordStore and tie it to usage with SemanticTextMemory. .AddAzureAISearchStorage>(azureAISearchEndpoint, apiKey, createConfiguration) // Config .AddAzureAISearchStorage>(azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)); // Custom implementation @@ -942,14 +948,14 @@ builder .AddAzureOpenAITextEmbeddingGeneration(textEmbeddingDeploymentName, azureAIEndpoint, apiKey) // Collection and record registration with config or custom create implementation. - .AddAzureAISearchStorageKeyedTransient("Cache", azureAISearchEndpoint, apiKey, createConfiguration) - .AddAzureAISearchStorageKeyedTransient("Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)); + .AddAzureAISearchVectorStoreKeyedTransient("Cache", azureAISearchEndpoint, apiKey, createConfiguration) + .AddAzureAISearchVectorStoreKeyedTransient("Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)); // Add Semantic Cache Memory for the cache entry model. builder.Services.AddTransient>(sp => { return new SemanticTextMemory( - sp.GetKeyedService>("Cache"), - sp.GetKeyedService("Cache"), + sp.GetKeyedService>("Cache"), + sp.GetKeyedService("Cache"), sp.GetRequiredService()); }); @@ -963,9 +969,9 @@ builder.Services.AddTransient ### Record Management -1. Release RecordService public interface and implementations for Azure AI Search, Qdrant and Redis. -2. Add support for registering record services with SK container to allow automatic dependency injection. -3. Add RecordService implementations for remaining stores. +1. Release VectorRecordStore public interface and implementations for Azure AI Search, Qdrant and Redis. +2. Add support for registering record stores with SK container to allow automatic dependency injection. +3. Add VectorRecordStore implementations for remaining stores. ### Collection Management diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordServiceOptions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordServiceOptions.cs deleted file mode 100644 index f130f19ffc84..000000000000 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordServiceOptions.cs +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System.Text.Json.Nodes; -using Microsoft.SemanticKernel.Memory; - -namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; - -/// -/// Options when creating a . -/// -public sealed class AzureAISearchMemoryRecordServiceOptions - where TDataModel : class -{ - /// - /// Gets or sets the default collection name to use. - /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. - /// - public string? DefaultCollectionName { get; init; } = null; - - /// - /// Gets or sets the choice of mapper to use when converting between the data model and the azure ai search record. - /// - public AzureAISearchMemoryRecordMapperType MapperType { get; init; } = AzureAISearchMemoryRecordMapperType.Default; - - /// - /// Gets or sets an optional custom mapper to use when converting between the data model and the azure ai search record. - /// - /// - /// Set to to use this mapper."/> - /// - public IMemoryRecordMapper? JsonObjectCustomMapper { get; init; } = null; - - /// - /// Gets or sets an optional memory record definition that defines the schema of the memory record type. - /// - /// - /// If not provided, the schema will be inferred from the data model using reflection. - /// In this case, the data model properties must be annotated with the appropriate attributes to indicate their usage. - /// See , and . - /// - public MemoryRecordDefinition? MemoryRecordDefinition { get; init; } = null; -} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs similarity index 75% rename from dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordMapperType.cs rename to dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs index 9438375dcbdd..e9d99fc87bdb 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordMapperType.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs @@ -5,9 +5,9 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; /// -/// The types of mapper supported by . +/// The types of mapper supported by . /// -public enum AzureAISearchMemoryRecordMapperType +public enum AzureAISearchRecordMapperType { /// /// Use the default mapper that is provided by the Azure AI Search client SDK. diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordService.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs similarity index 82% rename from dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordService.cs rename to dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index 637128e8a891..5710ffdeaf2e 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchMemoryRecordService.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -18,11 +18,11 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; /// -/// Service for storing and retrieving memory records, that uses Azure AI Search as the underlying storage. +/// Service for storing and retrieving records, that uses Azure AI Search as the underlying storage. /// -/// The data model to use for adding, updating and retrieving data from storage. -public sealed class AzureAISearchMemoryRecordService : IMemoryRecordService - where TDataModel : class +/// The data model to use for adding, updating and retrieving data from storage. +public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore + where TRecord : class { /// A set of types that a key on the provided model may have. private static readonly HashSet s_supportedKeyTypes = @@ -52,47 +52,47 @@ public sealed class AzureAISearchMemoryRecordService : IMemoryRecord private readonly ConcurrentDictionary _searchClientsByIndex = new(); /// Optional configuration options for this class. - private readonly AzureAISearchMemoryRecordServiceOptions _options; + private readonly AzureAISearchVectorRecordStoreOptions _options; /// The names of all non vector fields on the current model. private readonly List _nonVectorPropertyNames; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. /// Optional configuration options for this class. /// Thrown when is null. /// Thrown when options are misconfigured. - public AzureAISearchMemoryRecordService(SearchIndexClient searchIndexClient, AzureAISearchMemoryRecordServiceOptions? options = default) + public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, AzureAISearchVectorRecordStoreOptions? options = default) { // Verify. Verify.NotNull(searchIndexClient); // Assign. this._searchIndexClient = searchIndexClient; - this._options = options ?? new AzureAISearchMemoryRecordServiceOptions(); + this._options = options ?? new AzureAISearchVectorRecordStoreOptions(); // Verify custom mapper. - if (this._options.MapperType == AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper && this._options.JsonObjectCustomMapper is null) + if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper && this._options.JsonObjectCustomMapper is null) { - throw new ArgumentException($"The {nameof(AzureAISearchMemoryRecordServiceOptions.JsonObjectCustomMapper)} option needs to be set if a {nameof(AzureAISearchMemoryRecordServiceOptions.MapperType)} of {nameof(AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper)} has been chosen.", nameof(options)); + throw new ArgumentException($"The {nameof(AzureAISearchVectorRecordStoreOptions.JsonObjectCustomMapper)} option needs to be set if a {nameof(AzureAISearchVectorRecordStoreOptions.MapperType)} of {nameof(AzureAISearchRecordMapperType.JsonObjectCustomMapper)} has been chosen.", nameof(options)); } // Enumerate public properties using configuration or attributes. (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; - if (this._options.MemoryRecordDefinition is not null) + if (this._options.VectorStoreRecordDefinition is not null) { - properties = MemoryServiceModelPropertyReader.FindProperties(typeof(TDataModel), this._options.MemoryRecordDefinition, supportsMultipleVectors: true); + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: true); } else { - properties = MemoryServiceModelPropertyReader.FindProperties(typeof(TDataModel), supportsMultipleVectors: true); + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); } // Validate property types and store for later use. - MemoryServiceModelPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); - MemoryServiceModelPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); this._keyPropertyName = properties.keyProperty.Name; // Build the list of property names from the current model that are either key or data fields. @@ -100,7 +100,7 @@ public AzureAISearchMemoryRecordService(SearchIndexClient searchIndexClient, Azu } /// - public Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) + public Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) { Verify.NotNullOrWhiteSpace(key); @@ -114,7 +114,7 @@ public Task GetAsync(string key, GetRecordOptions? options = default } /// - public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, Memory.GetRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) { Verify.NotNull(keys); @@ -162,7 +162,7 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? opti } /// - public async Task UpsertAsync(TDataModel record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default) + public async Task UpsertAsync(TRecord record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default) { Verify.NotNull(record); @@ -177,7 +177,7 @@ public async Task UpsertAsync(TDataModel record, UpsertRecordOptions? op } /// - public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) { Verify.NotNull(records); @@ -203,7 +203,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable r /// The azure ai search sdk options for getting a document. /// The to monitor for cancellation requests. The default is . /// The retrieved document, mapped to the consumer data model. - private async Task GetDocumentAndMapToDataModelAsync( + private async Task GetDocumentAndMapToDataModelAsync( SearchClient searchClient, string collectionName, string key, @@ -211,7 +211,7 @@ private async Task GetDocumentAndMapToDataModelAsync( CancellationToken cancellationToken) { // Use the user provided mapper. - if (this._options.MapperType == AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper) + if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) { var jsonObject = await RunOperationAsync( () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken), @@ -226,7 +226,7 @@ private async Task GetDocumentAndMapToDataModelAsync( // Use the built in Azure AI Search mapper. return await RunOperationAsync( - () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken), + () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken), collectionName, "GetDocument").ConfigureAwait(false); } @@ -243,12 +243,12 @@ private async Task GetDocumentAndMapToDataModelAsync( private Task> MapToStorageModelAndUploadDocumentAsync( SearchClient searchClient, string collectionName, - IEnumerable records, + IEnumerable records, IndexDocumentsOptions innerOptions, CancellationToken cancellationToken) { // Use the user provided mapper. - if (this._options.MapperType == AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper) + if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) { var jsonObjects = RunModelConversion( () => records.Select(this._options.JsonObjectCustomMapper!.MapFromDataToStorageModel), @@ -263,7 +263,7 @@ private Task> MapToStorageModelAndUploadDocumentA // Use the built in Azure AI Search mapper. return RunOperationAsync( - () => searchClient.UploadDocumentsAsync(records, innerOptions, cancellationToken), + () => searchClient.UploadDocumentsAsync(records, innerOptions, cancellationToken), collectionName, "UploadDocuments"); } @@ -337,7 +337,7 @@ private static async Task RunOperationAsync(Func> operation, strin } catch (AggregateException ex) when (ex.InnerException is RequestFailedException innerEx) { - var wrapperException = new MemoryServiceCommandExecutionException("Call to memory service failed.", ex); + var wrapperException = new VectorStoreOperationException("Call to vector store failed.", ex); // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ @@ -349,7 +349,7 @@ private static async Task RunOperationAsync(Func> operation, strin } catch (RequestFailedException ex) { - var wrapperException = new MemoryServiceCommandExecutionException("Call to memory service failed.", ex); + var wrapperException = new VectorStoreOperationException("Call to vector store failed.", ex); // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ @@ -362,7 +362,7 @@ private static async Task RunOperationAsync(Func> operation, strin } /// - /// Run the given model conversion and wrap any exceptions with . + /// Run the given model conversion and wrap any exceptions with . /// /// The response type of the operation. /// The operation to run. @@ -377,7 +377,7 @@ private static T RunModelConversion(Func operation, string collectionName, } catch (Exception ex) { - var wrapperException = new MemoryDataModelMappingException("Failed to convert memory data model.", ex); + var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs new file mode 100644 index 000000000000..37bc65495dfc --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json.Nodes; +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// Options when creating a . +/// +public sealed class AzureAISearchVectorRecordStoreOptions + where TRecord : class +{ + /// + /// Gets or sets the default collection name to use. + /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. + /// + public string? DefaultCollectionName { get; init; } = null; + + /// + /// Gets or sets the choice of mapper to use when converting between the data model and the azure ai search record. + /// + public AzureAISearchRecordMapperType MapperType { get; init; } = AzureAISearchRecordMapperType.Default; + + /// + /// Gets or sets an optional custom mapper to use when converting between the data model and the azure ai search record. + /// + /// + /// Set to to use this mapper."/> + /// + public IVectorStoreRecordMapper? JsonObjectCustomMapper { get; init; } = null; + + /// + /// Gets or sets an optional record definition that defines the schema of the record type. + /// + /// + /// If not provided, the schema will be inferred from the record model class using reflection. + /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryCollectionFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryCollectionFixture.cs deleted file mode 100644 index d64a6ae51b95..000000000000 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryCollectionFixture.cs +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using Xunit; - -namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; - -[CollectionDefinition("AzureAISearchMemoryCollection")] -public class AzureAISearchMemoryCollectionFixture : ICollectionFixture -{ -} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryRecordServiceTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs similarity index 64% rename from dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryRecordServiceTests.cs rename to dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs index 8f7edf46e2d9..e260b83b14a5 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryRecordServiceTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs @@ -9,17 +9,17 @@ using Microsoft.SemanticKernel.Memory; using Xunit; using Xunit.Abstractions; -using static SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch.AzureAISearchMemoryFixture; +using static SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch.AzureAISearchVectorStoreFixture; using System.Text.Json.Nodes; namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; /// -/// Integration tests for class. +/// Integration tests for class. /// Tests work with Azure AI Search Instance. /// -[Collection("AzureAISearchMemoryCollection")] -public sealed class AzureAISearchMemoryRecordServiceTests(ITestOutputHelper output, AzureAISearchMemoryFixture fixture) : IClassFixture +[Collection("AzureAISearchVectorStoreCollection")] +public sealed class AzureAISearchVectorRecordStoreTests(ITestOutputHelper output, AzureAISearchVectorStoreFixture fixture) : IClassFixture { // If null, all tests will be enabled private const string SkipReason = null; //"Requires Azure AI Search Service instance up and running"; @@ -27,15 +27,15 @@ public sealed class AzureAISearchMemoryRecordServiceTests(ITestOutputHelper outp [Theory(Skip = SkipReason)] [InlineData(true)] [InlineData(false)] - public async Task ItCanUpsertDocumentToMemoryStoreAsync(bool useRecordDefinition) + public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition) { // Arrange - var options = new AzureAISearchMemoryRecordServiceOptions + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName, - MemoryRecordDefinition = useRecordDefinition ? fixture.MemoryRecordDefinition : null + VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); // Act var hotel = new Hotel() @@ -79,14 +79,14 @@ public async Task ItCanUpsertDocumentToMemoryStoreAsync(bool useRecordDefinition } [Fact(Skip = SkipReason)] - public async Task ItCanUpsertManyDocumentsToMemoryStoreAsync() + public async Task ItCanUpsertManyDocumentsToVectorStoreAsync() { // Arrange - var options = new AzureAISearchMemoryRecordServiceOptions + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; - var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); // Act var results = sut.UpsertBatchAsync( @@ -117,15 +117,15 @@ public async Task ItCanUpsertManyDocumentsToMemoryStoreAsync() [InlineData(true, false)] [InlineData(false, true)] [InlineData(false, false)] - public async Task ItCanGetDocumentFromMemoryStoreAsync(bool includeVectors, bool useRecordDefinition) + public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool useRecordDefinition) { // Arrange - var options = new AzureAISearchMemoryRecordServiceOptions + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName, - MemoryRecordDefinition = useRecordDefinition ? fixture.MemoryRecordDefinition : null + VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); // Act var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); @@ -152,14 +152,14 @@ public async Task ItCanGetDocumentFromMemoryStoreAsync(bool includeVectors, bool } [Fact(Skip = SkipReason)] - public async Task ItCanGetManyDocumentsFromMemoryStoreAsync() + public async Task ItCanGetManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new AzureAISearchMemoryRecordServiceOptions + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; - var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); // Act var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-2", "BaseSet-3", "BaseSet-4"], new GetRecordOptions { IncludeVectors = true }); @@ -180,46 +180,46 @@ public async Task ItCanGetManyDocumentsFromMemoryStoreAsync() public async Task ItThrowsForPartialGetBatchResultAsync() { // Arrange. - var options = new AzureAISearchMemoryRecordServiceOptions + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; - var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); // Act. - await Assert.ThrowsAsync(async () => await sut.GetBatchAsync(["BaseSet-1", "BaseSet-5", "BaseSet-2"]).ToListAsync()); + await Assert.ThrowsAsync(async () => await sut.GetBatchAsync(["BaseSet-1", "BaseSet-5", "BaseSet-2"]).ToListAsync()); } [Theory(Skip = SkipReason)] [InlineData(true)] [InlineData(false)] - public async Task ItCanRemoveDocumentFromMemoryStoreAsync(bool useRecordDefinition) + public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefinition) { // Arrange - var options = new AzureAISearchMemoryRecordServiceOptions + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName, - MemoryRecordDefinition = useRecordDefinition ? fixture.MemoryRecordDefinition : null + VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); await sut.UpsertAsync(CreateTestHotel("Remove-1")); // Act await sut.DeleteAsync("Remove-1"); // Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("Remove-1", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("Remove-1", new GetRecordOptions { IncludeVectors = true })); } [Fact(Skip = SkipReason)] - public async Task ItCanRemoveManyDocumentsFromMemoryStoreAsync() + public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new AzureAISearchMemoryRecordServiceOptions + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; - var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); await sut.UpsertAsync(CreateTestHotel("RemoveMany-1")); await sut.UpsertAsync(CreateTestHotel("RemoveMany-2")); await sut.UpsertAsync(CreateTestHotel("RemoveMany-3")); @@ -228,44 +228,44 @@ public async Task ItCanRemoveManyDocumentsFromMemoryStoreAsync() await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3"]); // Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); } [Fact(Skip = SkipReason)] public async Task ItThrowsCommandExecutionExceptionForFailedConnectionAsync() { // Arrange - var options = new AzureAISearchMemoryRecordServiceOptions { DefaultCollectionName = fixture.TestIndexName }; + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; var searchIndexClient = new SearchIndexClient(new Uri("https://localhost:12345"), new AzureKeyCredential("12345")); - var sut = new AzureAISearchMemoryRecordService(searchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(searchIndexClient, options); // Act & Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); } [Fact(Skip = SkipReason)] public async Task ItThrowsCommandExecutionExceptionForFailedAuthenticationAsync() { // Arrange - var options = new AzureAISearchMemoryRecordServiceOptions { DefaultCollectionName = fixture.TestIndexName }; + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; var searchIndexClient = new SearchIndexClient(new Uri(fixture.Config.ServiceUrl), new AzureKeyCredential("12345")); - var sut = new AzureAISearchMemoryRecordService(searchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(searchIndexClient, options); // Act & Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); } [Fact(Skip = SkipReason)] public async Task ItThrowsMappingExceptionForFailedMapperAsync() { // Arrange - var options = new AzureAISearchMemoryRecordServiceOptions { DefaultCollectionName = fixture.TestIndexName, MapperType = AzureAISearchMemoryRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = new FailingMapper() }; - var sut = new AzureAISearchMemoryRecordService(fixture.SearchIndexClient, options); + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName, MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = new FailingMapper() }; + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); // Act & Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); } private static Hotel CreateTestHotel(string hotelId) => new() @@ -285,7 +285,7 @@ public async Task ItThrowsMappingExceptionForFailedMapperAsync() } }; - private class FailingMapper : IMemoryRecordMapper + private sealed class FailingMapper : IVectorStoreRecordMapper { public JsonObject MapFromDataToStorageModel(Hotel dataModel) { diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreCollectionFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreCollectionFixture.cs new file mode 100644 index 000000000000..6c9870cf0327 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreCollectionFixture.cs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; + +[CollectionDefinition("AzureAISearchVectorStoreCollection")] +public class AzureAISearchVectorStoreCollectionFixture : ICollectionFixture +{ +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs similarity index 83% rename from dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryFixture.cs rename to dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs index 723e88ef347a..664143b60080 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchMemoryFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs @@ -20,7 +20,7 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; /// /// Helper class for setting up and tearing down Azure AI Search indexes for testing purposes. /// -public class AzureAISearchMemoryFixture : IAsyncLifetime +public class AzureAISearchVectorStoreFixture : IAsyncLifetime { /// /// Test index name which consists out of "hotels-" and the machine name with any non-alphanumeric characters removed. @@ -36,31 +36,31 @@ public class AzureAISearchMemoryFixture : IAsyncLifetime .AddJsonFile(path: "testsettings.json", optional: false, reloadOnChange: true) .AddJsonFile(path: "testsettings.development.json", optional: true, reloadOnChange: true) .AddEnvironmentVariables() - .AddUserSecrets() + .AddUserSecrets() .Build(); /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// - public AzureAISearchMemoryFixture() + public AzureAISearchVectorStoreFixture() { var config = this._configuration.GetRequiredSection("AzureAISearch").Get(); Assert.NotNull(config); this.Config = config; this.SearchIndexClient = new SearchIndexClient(new Uri(config.ServiceUrl), new AzureKeyCredential(config.ApiKey)); - this.MemoryRecordDefinition = new MemoryRecordDefinition + this.VectorStoreRecordDefinition = new VectorStoreRecordDefinition { - Properties = new List + Properties = new List { - new MemoryRecordKeyProperty("HotelId"), - new MemoryRecordDataProperty("HotelName"), - new MemoryRecordDataProperty("Description"), - new MemoryRecordVectorProperty("DescriptionEmbedding"), - new MemoryRecordDataProperty("Tags"), - new MemoryRecordDataProperty("ParkingIncluded"), - new MemoryRecordDataProperty("LastRenovationDate"), - new MemoryRecordDataProperty("Rating"), - new MemoryRecordDataProperty("Address") + new VectorStoreRecordKeyProperty("HotelId"), + new VectorStoreRecordDataProperty("HotelName"), + new VectorStoreRecordDataProperty("Description"), + new VectorStoreRecordVectorProperty("DescriptionEmbedding"), + new VectorStoreRecordDataProperty("Tags"), + new VectorStoreRecordDataProperty("ParkingIncluded"), + new VectorStoreRecordDataProperty("LastRenovationDate"), + new VectorStoreRecordDataProperty("Rating"), + new VectorStoreRecordDataProperty("Address") } }; } @@ -76,9 +76,9 @@ public AzureAISearchMemoryFixture() public string TestIndexName { get => this._testIndexName; } /// - /// Gets the manually created memory record definition for our test model. + /// Gets the manually created vector store record definition for our test model. /// - public MemoryRecordDefinition MemoryRecordDefinition { get; private set; } + public VectorStoreRecordDefinition VectorStoreRecordDefinition { get; private set; } /// /// Gets the configuration for the Azure AI Search service. @@ -91,9 +91,9 @@ public AzureAISearchMemoryFixture() /// An async task. public async Task InitializeAsync() { - await AzureAISearchMemoryFixture.DeleteIndexIfExistsAsync(this._testIndexName, this.SearchIndexClient); - await AzureAISearchMemoryFixture.CreateIndexAsync(this._testIndexName, this.SearchIndexClient); - AzureAISearchMemoryFixture.UploadDocuments(this.SearchIndexClient.GetSearchClient(this._testIndexName)); + await AzureAISearchVectorStoreFixture.DeleteIndexIfExistsAsync(this._testIndexName, this.SearchIndexClient); + await AzureAISearchVectorStoreFixture.CreateIndexAsync(this._testIndexName, this.SearchIndexClient); + AzureAISearchVectorStoreFixture.UploadDocuments(this.SearchIndexClient.GetSearchClient(this._testIndexName)); } /// @@ -102,7 +102,7 @@ public async Task InitializeAsync() /// An async task. public async Task DisposeAsync() { - await AzureAISearchMemoryFixture.DeleteIndexIfExistsAsync(this._testIndexName, this.SearchIndexClient); + await AzureAISearchVectorStoreFixture.DeleteIndexIfExistsAsync(this._testIndexName, this.SearchIndexClient); } /// @@ -228,40 +228,40 @@ public static void UploadDocuments(SearchClient searchClient) public class Hotel { [SimpleField(IsKey = true, IsFilterable = true)] - [MemoryRecordKey] + [VectorStoreRecordKey] public string HotelId { get; set; } [SearchableField(IsSortable = true)] - [MemoryRecordData] + [VectorStoreRecordData] public string HotelName { get; set; } [SearchableField(AnalyzerName = LexicalAnalyzerName.Values.EnLucene)] - [MemoryRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] public string Description { get; set; } - [MemoryRecordVector] + [VectorStoreRecordVector] public ReadOnlyMemory? DescriptionEmbedding { get; set; } [SearchableField(IsFilterable = true, IsFacetable = true)] - [MemoryRecordData] + [VectorStoreRecordData] #pragma warning disable CA1819 // Properties should not return arrays public string[] Tags { get; set; } #pragma warning restore CA1819 // Properties should not return arrays [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] - [MemoryRecordData] + [VectorStoreRecordData] public bool? ParkingIncluded { get; set; } [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] - [MemoryRecordData] + [VectorStoreRecordData] public DateTimeOffset? LastRenovationDate { get; set; } [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] - [MemoryRecordData] + [VectorStoreRecordData] public double? Rating { get; set; } [SearchableField] - [MemoryRecordData] + [VectorStoreRecordData] public Address Address { get; set; } } diff --git a/dotnet/src/InternalUtilities/src/Schema/VectorStoreModelPropertyReader.cs b/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs similarity index 79% rename from dotnet/src/InternalUtilities/src/Schema/VectorStoreModelPropertyReader.cs rename to dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs index 2df10d7b6aac..d61febe641cc 100644 --- a/dotnet/src/InternalUtilities/src/Schema/VectorStoreModelPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs @@ -14,15 +14,18 @@ namespace Microsoft.SemanticKernel; /// -/// Contains helpers for reading memory service model properties and their attributes. +/// Contains helpers for reading vector store model properties and their attributes. /// -internal static class MemoryServiceModelPropertyReader +internal static class VectorStoreRecordPropertyReader { /// Cache of property enumerations so that we don't incur reflection costs with each invocation. - private static readonly Dictionary dataProperties, List vectorProperties)> s_propertiesCache = new(); + private static readonly Dictionary dataProperties, List vectorProperties)> s_singleVectorPropertiesCache = new(); + + /// Cache of property enumerations so that we don't incur reflection costs with each invocation. + private static readonly Dictionary dataProperties, List vectorProperties)> s_multipleVectorsPropertiesCache = new(); /// - /// Find the properties with , and attributes + /// Find the properties with , and attributes /// and verify that they exist and that we have the expected numbers of each type. /// Return those properties in separate categories. /// @@ -31,8 +34,10 @@ internal static class MemoryServiceModelPropertyReader /// The categorized properties. public static (PropertyInfo keyProperty, List dataProperties, List vectorProperties) FindProperties(Type type, bool supportsMultipleVectors) { + var cache = supportsMultipleVectors ? s_multipleVectorsPropertiesCache : s_singleVectorPropertiesCache; + // First check the cache. - if (s_propertiesCache.TryGetValue(type, out var cachedProperties)) + if (cache.TryGetValue(type, out var cachedProperties)) { return cachedProperties; } @@ -45,7 +50,7 @@ public static (PropertyInfo keyProperty, List dataProperties, List foreach (var property in type.GetProperties()) { // Get Key property. - if (property.GetCustomAttribute() is not null) + if (property.GetCustomAttribute() is not null) { if (keyProperty is not null) { @@ -56,13 +61,13 @@ public static (PropertyInfo keyProperty, List dataProperties, List } // Get data properties. - if (property.GetCustomAttribute() is not null) + if (property.GetCustomAttribute() is not null) { dataProperties.Add(property); } // Get Vector properties. - if (property.GetCustomAttribute() is not null) + if (property.GetCustomAttribute() is not null) { // Add all vector properties if we support multiple vectors. if (supportsMultipleVectors) @@ -95,35 +100,35 @@ public static (PropertyInfo keyProperty, List dataProperties, List } // Update the cache. - s_propertiesCache[type] = (keyProperty, dataProperties, vectorProperties); + cache[type] = (keyProperty, dataProperties, vectorProperties); return (keyProperty, dataProperties, vectorProperties); } /// - /// Find the properties listed in the on the and verify + /// Find the properties listed in the on the and verify /// that they exist and that we have the expected numbers of each type. /// Return those properties in separate categories. /// /// The data model to find the properties on. - /// The property configuration. + /// The property configuration. /// A value indicating whether multiple vector properties are supported instead of just one. /// The categorized properties. - public static (PropertyInfo keyProperty, List dataProperties, List vectorProperties) FindProperties(Type type, MemoryRecordDefinition memoryRecordDefinition, bool supportsMultipleVectors) + public static (PropertyInfo keyProperty, List dataProperties, List vectorProperties) FindProperties(Type type, VectorStoreRecordDefinition vectorStoreRecordDefinition, bool supportsMultipleVectors) { PropertyInfo? keyProperty = null; List dataProperties = new(); List vectorProperties = new(); bool singleVectorPropertyFound = false; - foreach (MemoryRecordProperty property in memoryRecordDefinition.Properties) + foreach (VectorStoreRecordProperty property in vectorStoreRecordDefinition.Properties) { // Key. - if (property is MemoryRecordKeyProperty keyPropertyInfo) + if (property is VectorStoreRecordKeyProperty keyPropertyInfo) { if (keyProperty is not null) { - throw new ArgumentException($"Multiple key properties specified for type {type.FullName}."); + throw new ArgumentException($"Multiple key properties configured for type {type.FullName}."); } keyProperty = type.GetProperty(keyPropertyInfo.PropertyName); @@ -133,7 +138,7 @@ public static (PropertyInfo keyProperty, List dataProperties, List } } // Data. - else if (property is MemoryRecordDataProperty dataPropertyInfo) + else if (property is VectorStoreRecordDataProperty dataPropertyInfo) { var dataProperty = type.GetProperty(dataPropertyInfo.PropertyName); if (dataProperty == null) @@ -144,7 +149,7 @@ public static (PropertyInfo keyProperty, List dataProperties, List dataProperties.Add(dataProperty); } // Vector. - else if (property is MemoryRecordVectorProperty vectorPropertyInfo) + else if (property is VectorStoreRecordVectorProperty vectorPropertyInfo) { var vectorProperty = type.GetProperty(vectorPropertyInfo.PropertyName); if (vectorProperty == null) @@ -170,10 +175,16 @@ public static (PropertyInfo keyProperty, List dataProperties, List } else { - throw new ArgumentException($"Unknown property type '{property.GetType().FullName}' in memory record definition."); + throw new ArgumentException($"Unknown property type '{property.GetType().FullName}' in vector store record definition."); } } + // Check that we have a key property. + if (keyProperty is null) + { + throw new ArgumentException($"No key property configured for type {type.FullName}."); + } + // Check that we have one vector property if we don't have named vectors. if (!supportsMultipleVectors && !singleVectorPropertyFound) { diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs deleted file mode 100644 index 3d3149c4ce7c..000000000000 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordService{TKey,TDataModel}.cs +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System.Collections.Generic; -using System.Diagnostics.CodeAnalysis; -using System.Threading; -using System.Threading.Tasks; - -namespace Microsoft.SemanticKernel.Memory; - -/// -/// An interface for adding, updating, deleting and retrieving records from a memory store. -/// -/// The data type of the record key. -/// The data model to use for adding, updating and retrieving data from storage. -[Experimental("SKEXP0001")] -public interface IMemoryRecordService - where TDataModel : class -{ - /// - /// Gets a memory record from the data store. Does not guarantee that the collection exists. - /// Throws if the record is not found. - /// - /// The unique id associated with the memory record to get. - /// Optional options for retrieving the record. - /// The to monitor for cancellation requests. The default is . - /// The memory record if found, otherwise null. - /// Throw when the command fails to execute for any reason. - /// Throw when mapping between the storage model and data model fails. - Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); - - /// - /// Gets a batch of memory records from the data store. Does not guarantee that the collection exists. - /// Throws if any of the records are not found. - /// Gets will be made in a single request or in a single parallel batch depending on the available store functionality. - /// - /// The unique ids associated with the memory record to get. - /// Optional options for retrieving the records. - /// The to monitor for cancellation requests. The default is . - /// The vecmemorytor records associated with the unique keys provided. - /// Throw when the command fails to execute for any reason. - /// Throw when mapping between the storage model and data model fails. - IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default); - - /// - /// Deletes a memory record from the data store. Does not guarantee that the collection exists. - /// - /// The unique id associated with the memory record to remove. - /// Optional options for removing the record. - /// The to monitor for cancellation requests. The default is . - /// The unique identifier for the memory record. - /// Throw when the command fails to execute for any reason. - Task DeleteAsync(TKey key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); - - /// - /// Deletes a batch of memory records from the data store. Does not guarantee that the collection exists. - /// Deletes will be made in a single request or in a single parallel batch depending on the available store functionality. - /// - /// The unique ids associated with the memory records to remove. - /// Optional options for removing the records. - /// The to monitor for cancellation requests. The default is . - /// Throw when the command fails to execute for any reason. - Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); - - /// - /// Upserts a memory record into the data store. Does not guarantee that the collection exists. - /// If the record already exists, it will be updated. - /// If the record does not exist, it will be created. - /// - /// The memory record to upsert. - /// Optional options for upserting the record. - /// The to monitor for cancellation requests. The default is . - /// The unique identifier for the memory record. - /// Throw when the command fails to execute for any reason. - /// Throw when mapping between the storage model and data model fails. - Task UpsertAsync(TDataModel record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); - - /// - /// Upserts a group of memory records into the data store. Does not guarantee that the collection exists. - /// If the record already exists, it will be updated. - /// If the record does not exist, it will be created. - /// Upserts will be made in a single request or in a single parallel batch depending on the available store functionality. - /// - /// The memory records to upsert. - /// Optional options for upserting the records. - /// The to monitor for cancellation requests. The default is . - /// The unique identifiers for the memory records. - /// Throw when the command fails to execute for any reason. - /// Throw when mapping between the storage model and data model fails. - IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); -} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs new file mode 100644 index 000000000000..79578bfa907d --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs @@ -0,0 +1,90 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// An interface for adding, updating, deleting and retrieving records from a vector store. +/// +/// The data type of the record key. +/// The record data model to use for adding, updating and retrieving data from the store. +[Experimental("SKEXP0001")] +public interface IVectorRecordStore + where TRecord : class +{ + /// + /// Gets a record from the vector store. Does not guarantee that the collection exists. + /// Throws if the record is not found. + /// + /// The unique id associated with the record to get. + /// Optional options for retrieving the record. + /// The to monitor for cancellation requests. The default is . + /// The record if found, otherwise null. + /// Throw when the command fails to execute for any reason. + /// Throw when mapping between the storage model and record data model fails. + Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Gets a batch of records from the vector store. Does not guarantee that the collection exists. + /// Throws if any of the records are not found. + /// Gets will be made in a single request or in a single parallel batch depending on the available store functionality. + /// + /// The unique ids associated with the record to get. + /// Optional options for retrieving the records. + /// The to monitor for cancellation requests. The default is . + /// The records associated with the unique keys provided. + /// Throw when the command fails to execute for any reason. + /// Throw when mapping between the storage model and record data model fails. + IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Deletes a record from the vector store. Does not guarantee that the collection exists. + /// + /// The unique id associated with the record to remove. + /// Optional options for removing the record. + /// The to monitor for cancellation requests. The default is . + /// The unique identifier for the record. + /// Throw when the command fails to execute for any reason. + Task DeleteAsync(TKey key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Deletes a batch of records from the vector store. Does not guarantee that the collection exists. + /// Deletes will be made in a single request or in a single parallel batch depending on the available store functionality. + /// + /// The unique ids associated with the records to remove. + /// Optional options for removing the records. + /// The to monitor for cancellation requests. The default is . + /// Throw when the command fails to execute for any reason. + Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Upserts a record into the vector store. Does not guarantee that the collection exists. + /// If the record already exists, it will be updated. + /// If the record does not exist, it will be created. + /// + /// The record to upsert. + /// Optional options for upserting the record. + /// The to monitor for cancellation requests. The default is . + /// The unique identifier for the record. + /// Throw when the command fails to execute for any reason. + /// Throw when mapping between the storage model and record data model fails. + Task UpsertAsync(TRecord record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); + + /// + /// Upserts a group of records into the vector store. Does not guarantee that the collection exists. + /// If the record already exists, it will be updated. + /// If the record does not exist, it will be created. + /// Upserts will be made in a single request or in a single parallel batch depending on the available store functionality. + /// + /// The records to upsert. + /// Optional options for upserting the records. + /// The to monitor for cancellation requests. The default is . + /// The unique identifiers for the records. + /// Throw when the command fails to execute for any reason. + /// Throw when mapping between the storage model and record data model fails. + IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordMapper.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorStoreRecordMapper.cs similarity index 50% rename from dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordMapper.cs rename to dotnet/src/SemanticKernel.Abstractions/Memory/IVectorStoreRecordMapper.cs index 8a4e4ce84d7a..e47fa9c017e2 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryRecordMapper.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorStoreRecordMapper.cs @@ -3,25 +3,25 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// Interface for mapping between a storage model, and the consumer data model. +/// Interface for mapping between a storage model, and the consumer record data model. /// -/// The consumer data model to map to or from. +/// The consumer record data model to map to or from. /// The storage model to map to or from. -public interface IMemoryRecordMapper - where TConsumerDataModel : class +public interface IVectorStoreRecordMapper + where TRecordDataModel : class { /// - /// Map from the consumer data model to the storage model. + /// Map from the consumer record data model to the storage model. /// - /// The consumer data model record to map. + /// The consumer record data model record to map. /// The mapped result. - TStorageModel MapFromDataToStorageModel(TConsumerDataModel dataModel); + TStorageModel MapFromDataToStorageModel(TRecordDataModel dataModel); /// - /// Map from the storage model to the consumer data model. + /// Map from the storage model to the consumer record data model. /// /// The storage data model record to map. /// The of the operation that this mapping is needed for. /// The mapped result. - TConsumerDataModel MapFromStorageToDataModel(TStorageModel storageModel, GetRecordOptions? options = default); + TRecordDataModel MapFromStorageToDataModel(TStorageModel storageModel, GetRecordOptions? options = default); } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryDataModelMappingException.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryDataModelMappingException.cs deleted file mode 100644 index 242ef9f18e00..000000000000 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryDataModelMappingException.cs +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System; - -namespace Microsoft.SemanticKernel.Memory; - -/// -/// Exception thrown when a failure occurs while trying to convert memory models for storage or retrieval. -/// -public class MemoryDataModelMappingException : KernelException -{ - /// - /// Initializes a new instance of the class. - /// - public MemoryDataModelMappingException() - { - } - - /// - /// Initializes a new instance of the class with a specified error message. - /// - /// The error message that explains the reason for the exception. - public MemoryDataModelMappingException(string? message) : base(message) - { - } - - /// - /// Initializes a new instance of the class with a specified error message and a reference to the inner exception that is the cause of this exception. - /// - /// The error message that explains the reason for the exception. - /// The exception that is the cause of the current exception, or a null reference if no inner exception is specified. - public MemoryDataModelMappingException(string? message, Exception? innerException) : base(message, innerException) - { - } -} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryServiceCommandExecutionException.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryServiceCommandExecutionException.cs deleted file mode 100644 index a78612f14dcd..000000000000 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryServiceCommandExecutionException.cs +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System; - -namespace Microsoft.SemanticKernel.Memory; - -/// -/// Exception thrown when a memory service command fails, such as upserting a record or deleting a collection. -/// -public class MemoryServiceCommandExecutionException : KernelException -{ - /// - /// Initializes a new instance of the class. - /// - public MemoryServiceCommandExecutionException() - { - } - - /// - /// Initializes a new instance of the class with a specified error message. - /// - /// The error message that explains the reason for the exception. - public MemoryServiceCommandExecutionException(string? message) : base(message) - { - } - - /// - /// Initializes a new instance of the class with a specified error message and a reference to the inner exception that is the cause of this exception. - /// - /// The error message that explains the reason for the exception. - /// The exception that is the cause of the current exception, or a null reference if no inner exception is specified. - public MemoryServiceCommandExecutionException(string? message, Exception? innerException) : base(message, innerException) - { - } -} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordDataAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs similarity index 82% rename from dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordDataAttribute.cs rename to dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs index fbcfd7087722..8c054765196c 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordDataAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs @@ -6,11 +6,11 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// Attribute to mark a property on a vector model class as the data that is being indexed. +/// Attribute to mark a property on a record class as data. /// [Experimental("SKEXP0001")] [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] -public sealed class MemoryRecordDataAttribute : Attribute +public sealed class VectorStoreRecordDataAttribute : Attribute { /// /// Gets or sets a value indicating whether this data field has an associated embedding field. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordKeyAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs similarity index 62% rename from dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordKeyAttribute.cs rename to dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs index 72c1575acff4..c0a7cf5a75a1 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordKeyAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs @@ -6,10 +6,10 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// Attribute to mark a property on a class as the key under which data is stored in a vector store. +/// Attribute to mark a property on a record class as the key under which data is stored in a vector store. /// [Experimental("SKEXP0001")] [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] -public sealed class MemoryRecordKeyAttribute : Attribute +public sealed class VectorStoreRecordKeyAttribute : Attribute { } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordVectorAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs similarity index 67% rename from dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordVectorAttribute.cs rename to dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs index c09b47f5ddea..7f34448710f3 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/MemoryRecordAttributes/MemoryRecordVectorAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs @@ -6,10 +6,10 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// Attribute to mark a property on a vector model class as the vector. +/// Attribute to mark a property on a record class as the vector. /// [Experimental("SKEXP0001")] [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] -public sealed class MemoryRecordVectorAttribute : Attribute +public sealed class VectorStoreRecordVectorAttribute : Attribute { } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordKeyProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordKeyProperty.cs deleted file mode 100644 index c545acdeb69d..000000000000 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordKeyProperty.cs +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System.Diagnostics.CodeAnalysis; - -namespace Microsoft.SemanticKernel.Memory; - -/// -/// A description of a key property for storage in a memory store. -/// -[Experimental("SKEXP0001")] -public sealed class MemoryRecordKeyProperty : MemoryRecordProperty -{ - /// - /// Initializes a new instance of the class. - /// - /// The name of the property. - public MemoryRecordKeyProperty(string propertyName) - : base(propertyName) - { - } - - /// - /// Initializes a new instance of the class by cloning the given source. - /// - /// The source to clone - public MemoryRecordKeyProperty(MemoryRecordKeyProperty source) - : base(source.PropertyName) - { - } -} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordVectorProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordVectorProperty.cs deleted file mode 100644 index 94e627103d9a..000000000000 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordVectorProperty.cs +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System.Diagnostics.CodeAnalysis; - -namespace Microsoft.SemanticKernel.Memory; - -/// -/// A description of a vector property for storage in a memory store. -/// -[Experimental("SKEXP0001")] -public sealed class MemoryRecordVectorProperty : MemoryRecordProperty -{ - /// - /// Initializes a new instance of the class. - /// - /// The name of the property. - public MemoryRecordVectorProperty(string propertyName) - : base(propertyName) - { - } - - /// - /// Initializes a new instance of the class by cloning the given source. - /// - /// The source to clone - public MemoryRecordVectorProperty(MemoryRecordVectorProperty source) - : base(source.PropertyName) - { - } -} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDataProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDataProperty.cs similarity index 67% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDataProperty.cs rename to dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDataProperty.cs index 383d33ba5a5a..eb216825a56e 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDataProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDataProperty.cs @@ -5,25 +5,25 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// A description of a data property for storage in a memory store. +/// A description of a data property on a record for storage in a vector store. /// [Experimental("SKEXP0001")] -public sealed class MemoryRecordDataProperty : MemoryRecordProperty +public sealed class VectorStoreRecordDataProperty : VectorStoreRecordProperty { /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The name of the property. - public MemoryRecordDataProperty(string propertyName) + public VectorStoreRecordDataProperty(string propertyName) : base(propertyName) { } /// - /// Initializes a new instance of the class by cloning the given source. + /// Initializes a new instance of the class by cloning the given source. /// /// The source to clone - public MemoryRecordDataProperty(MemoryRecordDataProperty source) + public VectorStoreRecordDataProperty(VectorStoreRecordDataProperty source) : base(source.PropertyName) { this.HasEmbedding = source.HasEmbedding; diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDefinition.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDefinition.cs similarity index 54% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDefinition.cs rename to dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDefinition.cs index 859aea9a51b7..b475663a168a 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordDefinition.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDefinition.cs @@ -6,16 +6,16 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// A description of the properties of a record stored in a memory store, plus how the properties are used. +/// A description of the properties of a record stored in a vector store, plus how the properties are used. /// [Experimental("SKEXP0001")] -public sealed class MemoryRecordDefinition +public sealed class VectorStoreRecordDefinition { /// Empty static list for initialization purposes. - private static readonly List s_emptyFields = new(); + private static readonly List s_emptyFields = new(); /// /// The list of properties that are stored in the record. /// - public IReadOnlyList Properties { get; init; } = s_emptyFields; + public IReadOnlyList Properties { get; init; } = s_emptyFields; } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordKeyProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordKeyProperty.cs new file mode 100644 index 000000000000..62b4f4d172c6 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordKeyProperty.cs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// A description of a key property on a record for storage in a vector store. +/// +[Experimental("SKEXP0001")] +public sealed class VectorStoreRecordKeyProperty : VectorStoreRecordProperty +{ + /// + /// Initializes a new instance of the class. + /// + /// The name of the property. + public VectorStoreRecordKeyProperty(string propertyName) + : base(propertyName) + { + } + + /// + /// Initializes a new instance of the class by cloning the given source. + /// + /// The source to clone + public VectorStoreRecordKeyProperty(VectorStoreRecordKeyProperty source) + : base(source.PropertyName) + { + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs similarity index 63% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordProperty.cs rename to dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs index 0b0cf226bb5d..e4ca8661c786 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/MemoryRecordProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs @@ -5,16 +5,16 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// A description of a property for storage in a memory store. +/// A description of a property on a record for storage in a vector store. /// [Experimental("SKEXP0001")] -public abstract class MemoryRecordProperty +public abstract class VectorStoreRecordProperty { /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The name of the property. - private protected MemoryRecordProperty(string propertyName) + private protected VectorStoreRecordProperty(string propertyName) { this.PropertyName = propertyName; } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordVectorProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordVectorProperty.cs new file mode 100644 index 000000000000..7f4a7ddd78e3 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordVectorProperty.cs @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// A description of a vector property on a record for storage in a vector store. +/// +[Experimental("SKEXP0001")] +public sealed class VectorStoreRecordVectorProperty : VectorStoreRecordProperty +{ + /// + /// Initializes a new instance of the class. + /// + /// The name of the property. + public VectorStoreRecordVectorProperty(string propertyName) + : base(propertyName) + { + } + + /// + /// Initializes a new instance of the class by cloning the given source. + /// + /// The source to clone + public VectorStoreRecordVectorProperty(VectorStoreRecordVectorProperty source) + : base(source.PropertyName) + { + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs index d34f921975d5..357f344799b4 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// Optional options when calling . +/// Optional options when calling . /// [Experimental("SKEXP0001")] public class DeleteRecordOptions diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs index 6f284fb4328f..5a7daa8a6b42 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// Optional options when calling . +/// Optional options when calling . /// [Experimental("SKEXP0001")] public class GetRecordOptions diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs index 59bcdadbfa15..860630fba16c 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Memory; /// -/// Optional options when calling . +/// Optional options when calling . /// [Experimental("SKEXP0001")] public class UpsertRecordOptions diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs new file mode 100644 index 000000000000..1390f613324c --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Exception thrown when a vector store command fails, such as upserting a record or deleting a collection. +/// +public class VectorStoreOperationException : KernelException +{ + /// + /// Initializes a new instance of the class. + /// + public VectorStoreOperationException() + { + } + + /// + /// Initializes a new instance of the class with a specified error message. + /// + /// The error message that explains the reason for the exception. + public VectorStoreOperationException(string? message) : base(message) + { + } + + /// + /// Initializes a new instance of the class with a specified error message and a reference to the inner exception that is the cause of this exception. + /// + /// The error message that explains the reason for the exception. + /// The exception that is the cause of the current exception, or a null reference if no inner exception is specified. + public VectorStoreOperationException(string? message, Exception? innerException) : base(message, innerException) + { + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs new file mode 100644 index 000000000000..8955175737e9 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Exception thrown when a failure occurs while trying to convert models for storage or retrieval. +/// +public class VectorStoreRecordMappingException : KernelException +{ + /// + /// Initializes a new instance of the class. + /// + public VectorStoreRecordMappingException() + { + } + + /// + /// Initializes a new instance of the class with a specified error message. + /// + /// The error message that explains the reason for the exception. + public VectorStoreRecordMappingException(string? message) : base(message) + { + } + + /// + /// Initializes a new instance of the class with a specified error message and a reference to the inner exception that is the cause of this exception. + /// + /// The error message that explains the reason for the exception. + /// The exception that is the cause of the current exception, or a null reference if no inner exception is specified. + public VectorStoreRecordMappingException(string? message, Exception? innerException) : base(message, innerException) + { + } +} From 0740d884520695c434b6537bbe96ec4f49efd5a3 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 17 Jun 2024 16:24:09 +0100 Subject: [PATCH 04/48] .Net: Adding unit tests for property enumerator and excluding from code coverage since it's in InternalUtilities. (#6736) ### Motivation and Context The property enumerator helps to read property information from a model used with any of the vector stores. This property information is used to map from the storage model to the user data model. Adding unit tests for this property enumerator, and marking it with the exclude from code coverage attribute, since the file is located in the Internal Utilities folder and therefore is included in all projects, but it will not be unit tested in each, just in the one centralized location. ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- .../Schema/VectorStoreRecordPropertyReader.cs | 2 + .../VectorStoreRecordPropertyReaderTests.cs | 259 ++++++++++++++++++ 2 files changed, 261 insertions(+) create mode 100644 dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs diff --git a/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs index d61febe641cc..fc580c69bf9c 100644 --- a/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; using System.Linq; using System.Reflection; using System.Text.Json; @@ -16,6 +17,7 @@ namespace Microsoft.SemanticKernel; /// /// Contains helpers for reading vector store model properties and their attributes. /// +[ExcludeFromCodeCoverage] internal static class VectorStoreRecordPropertyReader { /// Cache of property enumerations so that we don't incur reflection costs with each invocation. diff --git a/dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs new file mode 100644 index 000000000000..232758d77bbb --- /dev/null +++ b/dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs @@ -0,0 +1,259 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Memory; +using Xunit; + +namespace SemanticKernel.UnitTests.Utilities; + +public class VectorStoreRecordPropertyReaderTests +{ + [Theory] + [InlineData(true, false)] + [InlineData(false, false)] + [InlineData(true, true)] + [InlineData(false, true)] + public void FindPropertiesCanFindAllPropertiesOnSinglePropsModel(bool supportsMultipleVectors, bool useConfig) + { + // Act. + var properties = useConfig ? + VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), this._singlePropsDefinition, supportsMultipleVectors) : + VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors); + + // Assert. + Assert.Equal("Key", properties.keyProperty.Name); + Assert.Single(properties.dataProperties); + Assert.Single(properties.vectorProperties); + Assert.Equal("Data", properties.dataProperties[0].Name); + Assert.Equal("Vector", properties.vectorProperties[0].Name); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void FindPropertiesCanFindAllPropertiesOnMultiPropsModel(bool useConfig) + { + // Act. + var properties = useConfig ? + VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), this._multiPropsDefinition, true) : + VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), true); + + // Assert. + Assert.Equal("Key", properties.keyProperty.Name); + Assert.Equal(2, properties.dataProperties.Count); + Assert.Equal(2, properties.vectorProperties.Count); + Assert.Equal("Data1", properties.dataProperties[0].Name); + Assert.Equal("Data2", properties.dataProperties[1].Name); + Assert.Equal("Vector1", properties.vectorProperties[0].Name); + Assert.Equal("Vector2", properties.vectorProperties[1].Name); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void FindPropertiesThrowsForMultipleVectorsWithSingleVectorSupport(bool useConfig) + { + // Act. + var ex = useConfig ? + Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), this._multiPropsDefinition, false)) : + Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), false)); + + // Assert. + var expectedMessage = useConfig ? + "Multiple vector properties configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported." : + "Multiple vector properties found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported."; + Assert.Equal(expectedMessage, ex.Message); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void FindPropertiesThrowsOnMultipleKeyProperties(bool useConfig) + { + // Act. + var ex = useConfig ? + Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(MultiKeysModel), this._multiKeysDefinition, true)) : + Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(MultiKeysModel), true)); + + // Assert. + var expectedMessage = useConfig ? + "Multiple key properties configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiKeysModel." : + "Multiple key properties found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiKeysModel."; + Assert.Equal(expectedMessage, ex.Message); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void FindPropertiesThrowsOnNoKeyProperty(bool useConfig) + { + // Act. + var ex = useConfig ? + Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(NoKeyModel), this._noKeyDefinition, true)) : + Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(NoKeyModel), true)); + + // Assert. + var expectedMessage = useConfig ? + "No key property configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoKeyModel." : + "No key property found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoKeyModel."; + Assert.Equal(expectedMessage, ex.Message); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void FindPropertiesThrowsOnNoVectorPropertyWithSingleVectorSupport(bool useConfig) + { + // Act. + var ex = useConfig ? + Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(NoVectorModel), this._noVectorDefinition, false)) : + Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(NoVectorModel), false)); + + // Assert. + var expectedMessage = useConfig ? + "No vector property configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoVectorModel." : + "No vector property found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoVectorModel."; + Assert.Equal(expectedMessage, ex.Message); + } + + [Theory] + [InlineData("Key", "MissingKey")] + [InlineData("Data", "MissingData")] + [InlineData("Vector", "MissingVector")] + public void FindPropertiesUsingConfigThrowsForNotFoundProperties(string propertyType, string propertyName) + { + var missingKeyDefinition = new VectorStoreRecordDefinition { Properties = [new VectorStoreRecordKeyProperty(propertyName)] }; + var missingDataDefinition = new VectorStoreRecordDefinition { Properties = [new VectorStoreRecordDataProperty(propertyName)] }; + var missingVectorDefinition = new VectorStoreRecordDefinition { Properties = [new VectorStoreRecordVectorProperty(propertyName)] }; + + var definition = propertyType switch + { + "Key" => missingKeyDefinition, + "Data" => missingDataDefinition, + "Vector" => missingVectorDefinition, + _ => throw new ArgumentException("Invalid property type.") + }; + + Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(NoKeyModel), definition, false)); + } + + [Fact] + public void VerifyPropertyTypesPassForAllowedTypes() + { + // Arrange. + var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), true); + + // Act. + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(string)], "Data"); + } + + [Fact] + public void VerifyPropertyTypesFailsForDisallowedTypes() + { + // Arrange. + var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), true); + + // Act. + var ex = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(int), typeof(float)], "Data")); + + // Assert. + Assert.Equal("Data properties must be one of the supported types: System.Int32, System.Single. Type of Data is System.String.", ex.Message); + } + +#pragma warning disable CA1812 // Invalid unused classes error, since I am using these for testing purposes above. + private sealed class NoKeyModel + { + } + + private readonly VectorStoreRecordDefinition _noKeyDefinition = new(); + + private sealed class NoVectorModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + } + + private readonly VectorStoreRecordDefinition _noVectorDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key") + ] + }; + + private sealed class MultiKeysModel + { + [VectorStoreRecordKey] + public string Key1 { get; set; } = string.Empty; + + [VectorStoreRecordKey] + public string Key2 { get; set; } = string.Empty; + } + + private readonly VectorStoreRecordDefinition _multiKeysDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key1"), + new VectorStoreRecordKeyProperty("Key2") + ] + }; + + private sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector] + public ReadOnlyMemory Vector { get; set; } + + public string NotAnnotated { get; set; } = string.Empty; + } + + private readonly VectorStoreRecordDefinition _singlePropsDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key"), + new VectorStoreRecordDataProperty("Data"), + new VectorStoreRecordVectorProperty("Vector") + ] + }; + + private sealed class MultiPropsModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData] + public string Data1 { get; set; } = string.Empty; + + [VectorStoreRecordData] + public string Data2 { get; set; } = string.Empty; + + [VectorStoreRecordVector] + public ReadOnlyMemory Vector1 { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory Vector2 { get; set; } + + public string NotAnnotated { get; set; } = string.Empty; + } + + private readonly VectorStoreRecordDefinition _multiPropsDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key"), + new VectorStoreRecordDataProperty("Data1"), + new VectorStoreRecordDataProperty("Data2"), + new VectorStoreRecordVectorProperty("Vector1"), + new VectorStoreRecordVectorProperty("Vector2") + ] + }; +#pragma warning restore CA1812 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. +} From c43ef3e3133f1e8b424fe71bdf3e7eebf79fdb9d Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Fri, 21 Jun 2024 17:30:06 +0100 Subject: [PATCH 05/48] .Net: Adding generic redis IVectorRecordStore implementation (#6741) ### Motivation and Context As part of the evolution of memory connectors, we need to support custom data models and remove opinionated behaviors, so adding a new record store implementation for redis. ### Description Adding an implementation for IVectorRecordStore for redis with support for: Custom mappers Generic data models Annotating data models via attributes or via definition objects. Also improving some styling in the AzureAISearch implementation. See #6525 ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- .../AzureAISearchVectorRecordStore.cs | 44 +- .../RedisRecordMapperType.cs | 21 + .../RedisVectorRecordStore.cs | 392 ++++++++++++++++++ .../RedisVectorRecordStoreOptions.cs | 52 +++ .../RedisVectorStoreRecordMapper.cs | 78 ++++ .../AzureAISearchVectorRecordStoreTests.cs | 34 +- .../Redis/RedisVectorRecordStoreTests.cs | 287 +++++++++++++ .../RedisVectorStoreCollectionFixture.cs | 10 + .../Memory/Redis/RedisVectorStoreFixture.cs | 207 +++++++++ .../IntegrationTests/IntegrationTests.csproj | 1 + .../Memory/IVectorRecordStore.cs | 6 +- 11 files changed, 1085 insertions(+), 47 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreCollectionFixture.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index 5710ffdeaf2e..ad16bbd8761f 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -18,7 +18,7 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; /// -/// Service for storing and retrieving records, that uses Azure AI Search as the underlying storage. +/// Service for storing and retrieving vector records, that uses Azure AI Search as the underlying storage. /// /// The data model to use for adding, updating and retrieving data from storage. public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore @@ -140,9 +140,9 @@ public Task DeleteAsync(string key, DeleteRecordOptions? options = default, Canc // Remove record. var searchClient = this.GetSearchClient(collectionName); return RunOperationAsync( - () => searchClient.DeleteDocumentsAsync(this._keyPropertyName, [key], new IndexDocumentsOptions(), cancellationToken), collectionName, - "DeleteDocuments"); + "DeleteDocuments", + () => searchClient.DeleteDocumentsAsync(this._keyPropertyName, [key], new IndexDocumentsOptions(), cancellationToken)); } /// @@ -156,9 +156,9 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? opti // Remove records. var searchClient = this.GetSearchClient(collectionName); return RunOperationAsync( - () => searchClient.DeleteDocumentsAsync(this._keyPropertyName, keys, new IndexDocumentsOptions(), cancellationToken), collectionName, - "DeleteDocuments"); + "DeleteDocuments", + () => searchClient.DeleteDocumentsAsync(this._keyPropertyName, keys, new IndexDocumentsOptions(), cancellationToken)); } /// @@ -214,21 +214,21 @@ private async Task GetDocumentAndMapToDataModelAsync( if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) { var jsonObject = await RunOperationAsync( - () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken), collectionName, - "GetDocument").ConfigureAwait(false); + "GetDocument", + () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken)).ConfigureAwait(false); return RunModelConversion( - () => this._options.JsonObjectCustomMapper!.MapFromStorageToDataModel(jsonObject), collectionName, - "GetDocument"); + "GetDocument", + () => this._options.JsonObjectCustomMapper!.MapFromStorageToDataModel(jsonObject)); } // Use the built in Azure AI Search mapper. return await RunOperationAsync( - () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken), collectionName, - "GetDocument").ConfigureAwait(false); + "GetDocument", + () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken)).ConfigureAwait(false); } /// @@ -251,21 +251,21 @@ private Task> MapToStorageModelAndUploadDocumentA if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) { var jsonObjects = RunModelConversion( - () => records.Select(this._options.JsonObjectCustomMapper!.MapFromDataToStorageModel), collectionName, - "UploadDocuments"); + "UploadDocuments", + () => records.Select(this._options.JsonObjectCustomMapper!.MapFromDataToStorageModel)); return RunOperationAsync( - () => searchClient.UploadDocumentsAsync(jsonObjects, innerOptions, cancellationToken), collectionName, - "UploadDocuments"); + "UploadDocuments", + () => searchClient.UploadDocumentsAsync(jsonObjects, innerOptions, cancellationToken)); } // Use the built in Azure AI Search mapper. return RunOperationAsync( - () => searchClient.UploadDocumentsAsync(records, innerOptions, cancellationToken), collectionName, - "UploadDocuments"); + "UploadDocuments", + () => searchClient.UploadDocumentsAsync(records, innerOptions, cancellationToken)); } /// @@ -322,14 +322,14 @@ private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options) } /// - /// Run the given operation and wrap any with ."/> + /// Run the given operation and wrap any with ."/> /// /// The response type of the operation. - /// The operation to run. /// The name of the collection the operation is being run on. /// The type of database operation being run. + /// The operation to run. /// The result of the operation. - private static async Task RunOperationAsync(Func> operation, string collectionName, string operationName) + private static async Task RunOperationAsync(string collectionName, string operationName, Func> operation) { try { @@ -365,11 +365,11 @@ private static async Task RunOperationAsync(Func> operation, strin /// Run the given model conversion and wrap any exceptions with . /// /// The response type of the operation. - /// The operation to run. /// The name of the collection the operation is being run on. /// The type of database operation being run. + /// The operation to run. /// The result of the operation. - private static T RunModelConversion(Func operation, string collectionName, string operationName) + private static T RunModelConversion(string collectionName, string operationName, Func operation) { try { diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs new file mode 100644 index 000000000000..9518c2e228a4 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json.Nodes; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// The types of mapper supported by . +/// +public enum RedisRecordMapperType +{ + /// + /// Use the default semantic kernel mapper that uses property attributes to determine how to map fields. + /// + Default, + + /// + /// Use a custom mapper between and the data model. + /// + JsonNodeCustomMapper +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs new file mode 100644 index 000000000000..1198653c088a --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -0,0 +1,392 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Text.Json; +using System.Text.Json.Nodes; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Memory; +using NRedisStack.Json.DataTypes; +using NRedisStack.RedisStackCommands; +using StackExchange.Redis; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Service for storing and retrieving vector records, that uses Redis as the underlying storage. +/// +/// The data model to use for adding, updating and retrieving data from storage. +public sealed class RedisVectorRecordStore : IVectorRecordStore + where TRecord : class +{ + /// A set of types that a key on the provided model may have. + private static readonly HashSet s_supportedKeyTypes = + [ + typeof(string) + ]; + + /// A set of types that vectors on the provided model may have. + private static readonly HashSet s_supportedVectorTypes = + [ + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory?), + typeof(ReadOnlyMemory?) + ]; + + /// The redis database to read/write records from. + private readonly IDatabase _database; + + /// Optional configuration options for this class. + private readonly RedisVectorRecordStoreOptions _options; + + /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. + private readonly PropertyInfo _keyPropertyInfo; + + /// The name of the temporary json property that the key property will be serialized / parsed from. + private readonly string _keyJsonPropertyName; + + /// An array of the names of all the data properties that are part of the redis payload, i.e. all properties except the key and vector properties. + private readonly string[] _dataPropertyNames; + + /// The mapper to use when mapping between the consumer data model and the redis record. + private readonly IVectorStoreRecordMapper _mapper; + + /// + /// Initializes a new instance of the class. + /// + /// The redis database to read/write records from. + /// Optional configuration options for this class. + /// Throw when parameters are invalid. + public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions? options) + { + // Verify. + Verify.NotNull(database); + + // Assign. + this._database = database; + this._options = options ?? new RedisVectorRecordStoreOptions(); + + // Enumerate public properties using configuration or attributes. + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; + if (this._options.VectorStoreRecordDefinition is not null) + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: true); + } + else + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); + } + + // Validate property types and store for later use. + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); + + this._keyPropertyInfo = properties.keyProperty; + this._keyJsonPropertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(this._keyPropertyInfo); + + this._dataPropertyNames = properties + .dataProperties + .Select(VectorStoreRecordPropertyReader.GetSerializedPropertyName) + .ToArray(); + + // Assign Mapper. + if (this._options.MapperType == RedisRecordMapperType.JsonNodeCustomMapper) + { + if (this._options.JsonNodeCustomMapper is null) + { + throw new ArgumentException($"The {nameof(RedisVectorRecordStoreOptions.JsonNodeCustomMapper)} option needs to be set if a {nameof(RedisVectorRecordStoreOptions.MapperType)} of {nameof(RedisRecordMapperType.JsonNodeCustomMapper)} has been chosen.", nameof(options)); + } + + this._mapper = this._options.JsonNodeCustomMapper; + } + else + { + this._mapper = new RedisVectorStoreRecordMapper(this._keyJsonPropertyName); + } + } + + /// + public async Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNullOrWhiteSpace(key); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + var maybePrefixedKey = this.PrefixKeyIfNeeded(key, collectionName); + + // Get the redis value. + var redisResult = await RunOperationAsync( + collectionName, + "GET", + () => options?.IncludeVectors is true ? + this._database + .JSON() + .GetAsync(maybePrefixedKey) : + this._database + .JSON() + .GetAsync(maybePrefixedKey, this._dataPropertyNames)).ConfigureAwait(false); + + // Check if the key was found before trying to parse the result. + if (redisResult.IsNull || redisResult is null) + { + throw new VectorStoreOperationException($"Could not find document with key '{key}'"); + } + + // Check if the value contained any json text before trying to parse the result. + var redisResultString = redisResult.ToString(); + if (redisResultString is null) + { + throw new VectorStoreRecordMappingException($"Document with key '{key}' does not contain any json."); + } + + // Convert to the caller's data model. + return RunModelConversion( + collectionName, + "GET", + () => + { + var node = JsonSerializer.Deserialize(redisResultString)!; + return this._mapper.MapFromStorageToDataModel((key, node)); + }); + } + + /// + public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + var keysList = keys.ToList(); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + var maybePrefixedKeys = keysList.Select(key => this.PrefixKeyIfNeeded(key, collectionName)); + var redisKeys = maybePrefixedKeys.Select(x => new RedisKey(x)).ToArray(); + + // Get the list of redis results. + var redisResults = await RunOperationAsync( + collectionName, + "MGET", + () => this._database + .JSON() + .MGetAsync(redisKeys, "$")).ConfigureAwait(false); + + // Loop through each key and result and convert to the caller's data model. + for (int i = 0; i < keysList.Count; i++) + { + var key = keysList[i]; + var redisResult = redisResults[i]; + + // Check if the key was found before trying to parse the result. + if (redisResult.IsNull || redisResult is null) + { + throw new VectorStoreOperationException($"Could not find document with key '{key}'"); + } + + // Check if the value contained any json text before trying to parse the result. + var redisResultString = redisResult.ToString(); + if (redisResultString is null) + { + throw new VectorStoreRecordMappingException($"Document with key '{key}' does not contain any json."); + } + + // Convert to the caller's data model. + yield return RunModelConversion( + collectionName, + "MGET", + () => + { + var node = JsonSerializer.Deserialize(redisResultString)!; + return this._mapper.MapFromStorageToDataModel((key, node)); + }); + } + } + + /// + public Task DeleteAsync(string key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNullOrWhiteSpace(key); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + var maybePrefixedKey = this.PrefixKeyIfNeeded(key, collectionName); + + // Remove. + return RunOperationAsync( + collectionName, + "DEL", + () => this._database + .JSON() + .DelAsync(maybePrefixedKey)); + } + + /// + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + // Remove records in parallel. + var tasks = keys.Select(key => this.DeleteAsync(key, options, cancellationToken)); + return Task.WhenAll(tasks); + } + + /// + public async Task UpsertAsync(TRecord record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(record); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Map. + var redisJsonRecord = RunModelConversion( + collectionName, + "SET", + () => this._mapper.MapFromDataToStorageModel(record)); + + // Upsert. + var maybePrefixedKey = this.PrefixKeyIfNeeded(redisJsonRecord.Key, collectionName); + await RunOperationAsync( + collectionName, + "SET", + () => this._database + .JSON() + .SetAsync( + maybePrefixedKey, + "$", + redisJsonRecord.Node)).ConfigureAwait(false); + + return redisJsonRecord.Key; + } + + /// + public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(records); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Map. + var redisRecords = new List<(string maybePrefixedKey, string originalKey, JsonNode jsonNode)>(); + foreach (var record in records) + { + var redisJsonRecord = RunModelConversion( + collectionName, + "MSET", + () => this._mapper.MapFromDataToStorageModel(record)); + + var maybePrefixedKey = this.PrefixKeyIfNeeded(redisJsonRecord.Key, collectionName); + redisRecords.Add((maybePrefixedKey, redisJsonRecord.Key, redisJsonRecord.Node)); + } + + // Upsert. + var keyPathValues = redisRecords.Select(x => new KeyPathValue(x.maybePrefixedKey, "$", x.jsonNode)).ToArray(); + await RunOperationAsync( + collectionName, + "MSET", + () => this._database + .JSON() + .MSetAsync(keyPathValues)).ConfigureAwait(false); + + // Return keys of upserted records. + foreach (var record in redisRecords) + { + yield return record.originalKey; + } + } + + /// + /// Prefix the key with the collection name if the option is set. + /// + /// The key to prefix. + /// The collection name that was provided as part of an operation to override the default or the default if not. + /// The updated key if updating is required, otherwise the input key. + private string PrefixKeyIfNeeded(string key, string? collectionName) + { + if (this._options.PrefixCollectionNameToKeyNames) + { + return $"{collectionName}:{key}"; + } + + return key; + } + + /// + /// Choose the right collection name to use for the operation by using the one provided + /// as part of the operation options, or the default one provided at construction time. + /// + /// The collection name provided on the operation options. + /// The collection name to use. + private string ChooseCollectionName(string? operationCollectionName) + { + var collectionName = operationCollectionName ?? this._options.DefaultCollectionName; + if (collectionName is null) + { +#pragma warning disable CA2208 // Instantiate argument exceptions correctly + throw new ArgumentException("Collection name must be provided in the operation options, since no default was provided at construction time.", "options"); +#pragma warning restore CA2208 // Instantiate argument exceptions correctly + } + + return collectionName; + } + + /// + /// Run the given operation and wrap any redis exceptions with ."/> + /// + /// The response type of the operation. + /// The name of the collection the operation is being run on. + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + private static async Task RunOperationAsync(string collectionName, string operationName, Func> operation) + { + try + { + return await operation.Invoke().ConfigureAwait(false); + } + catch (RedisConnectionException ex) + { + var wrapperException = new VectorStoreOperationException("Call to vector store failed.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", "Redis"); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + } + + /// + /// Run the given model conversion and wrap any exceptions with . + /// + /// The response type of the operation. + /// The name of the collection the operation is being run on. + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + private static T RunModelConversion(string collectionName, string operationName, Func operation) + { + try + { + return operation.Invoke(); + } + catch (Exception ex) + { + var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", "Redis"); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs new file mode 100644 index 000000000000..5cd7d5a33538 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json.Nodes; +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Options when creating a . +/// +public sealed class RedisVectorRecordStoreOptions + where TRecord : class +{ + /// + /// Gets or sets the default collection name to use. + /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. + /// + public string? DefaultCollectionName { get; init; } = null; + + /// + /// Gets or sets a value indicating whether the collection name should be prefixed to the + /// key names before reading or writing to the redis store. Default is false. + /// + /// + /// For a record to be indexed by a specific redis index, the key name must be prefixed with the matching prefix configured on the redis index. + /// You can either pass in keys that are already prefixed, or set this option to true to have the collection name prefixed to the key names automatically. + /// + public bool PrefixCollectionNameToKeyNames { get; init; } = false; + + /// + /// Gets or sets the choice of mapper to use when converting between the data model and the redis record. + /// + public RedisRecordMapperType MapperType { get; init; } = RedisRecordMapperType.Default; + + /// + /// Gets or sets an optional custom mapper to use when converting between the data model and the redis record. + /// + /// + /// Set to to use this mapper."/> + /// + public IVectorStoreRecordMapper? JsonNodeCustomMapper { get; init; } = null; + + /// + /// Gets or sets an optional record definition that defines the schema of the record type. + /// + /// + /// If not provided, the schema will be inferred from the record model class using reflection. + /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs new file mode 100644 index 000000000000..01c9ff69b96e --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Class for mapping between a json node stored in redis, and the consumer data model. +/// +/// The consumer data model to map to or from. +internal sealed class RedisVectorStoreRecordMapper : IVectorStoreRecordMapper + where TConsumerDataModel : class +{ + /// The name of the temporary json property that the key field will be serialized / parsed from. + private readonly string _keyFieldJsonPropertyName; + + /// + /// Initializes a new instance of the class. + /// + /// The name of the key field on the model when serialized to json. + public RedisVectorStoreRecordMapper(string keyFieldJsonPropertyName) + { + Verify.NotNullOrWhiteSpace(keyFieldJsonPropertyName); + this._keyFieldJsonPropertyName = keyFieldJsonPropertyName; + } + + /// + public (string Key, JsonNode Node) MapFromDataToStorageModel(TConsumerDataModel dataModel) + { + // Convert the provided record into a JsonNode object and try to get the key field for it. + // Since we already checked that the key field is a string in the constructor, and that it exists on the model, + // the only edge case we have to be concerned about is if the key field is null. + var jsonNode = JsonSerializer.SerializeToNode(dataModel); + if (jsonNode!.AsObject().TryGetPropertyValue(this._keyFieldJsonPropertyName, out var keyField) && keyField is JsonValue jsonValue) + { + // Remove the key field from the JSON object since we don't want to store it in the redis payload. + var keyValue = jsonValue.ToString(); + jsonNode.AsObject().Remove(this._keyFieldJsonPropertyName); + + return (keyValue, jsonNode); + } + + throw new VectorStoreRecordMappingException($"Missing key field {this._keyFieldJsonPropertyName} on provided record of type {typeof(TConsumerDataModel).FullName}."); + } + + /// + public TConsumerDataModel MapFromStorageToDataModel((string Key, JsonNode Node) storageModel, GetRecordOptions? options = null) + { + JsonObject jsonObject; + + // The redis result can be either a single object or an array with a single object in the case where we are doing an MGET. + if (storageModel.Node is JsonObject topLevelJsonObject) + { + jsonObject = topLevelJsonObject; + } + else if (storageModel.Node is JsonArray jsonArray && jsonArray.Count == 1 && jsonArray[0] is JsonObject arrayEntryJsonObject) + { + jsonObject = arrayEntryJsonObject; + } + else + { + throw new VectorStoreRecordMappingException($"Invalid data format for document with key '{storageModel.Key}'"); + } + + // Check that the key field is not already present in the redis value. + if (jsonObject.ContainsKey(this._keyFieldJsonPropertyName)) + { + throw new VectorStoreRecordMappingException($"Invalid data format for document with key '{storageModel.Key}'. Key property '{this._keyFieldJsonPropertyName}' is already present on retrieved object."); + } + + // Since the key is not stored in the redis value, add it back in before deserializing into the data model. + jsonObject.Add(this._keyFieldJsonPropertyName, storageModel.Key); + + return JsonSerializer.Deserialize(jsonObject)!; + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs index e260b83b14a5..a72859a4dcd2 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs @@ -2,15 +2,15 @@ using System; using System.Linq; +using System.Text.Json.Nodes; using System.Threading.Tasks; -using Azure.Search.Documents.Indexes; using Azure; +using Azure.Search.Documents.Indexes; using Microsoft.SemanticKernel.Connectors.AzureAISearch; using Microsoft.SemanticKernel.Memory; using Xunit; using Xunit.Abstractions; using static SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch.AzureAISearchVectorStoreFixture; -using System.Text.Json.Nodes; namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; @@ -38,22 +38,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); // Act - var hotel = new Hotel() - { - HotelId = "Upsert-1", - HotelName = "MyHotel1", - Description = "My Hotel is great.", - DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, - Tags = new[] { "pool", "air conditioning", "concierge" }, - ParkingIncluded = true, - LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), - Rating = 3.6, - Address = new Address() - { - City = "New York", - Country = "USA" - } - }; + var hotel = CreateTestHotel("Upsert-1"); var upsertResult = await sut.UpsertAsync(hotel); var getResult = await sut.GetAsync("Upsert-1"); @@ -65,7 +50,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition Assert.Equal(hotel.HotelName, getResult.HotelName); Assert.Equal(hotel.Description, getResult.Description); Assert.NotNull(getResult.DescriptionEmbedding); - Assert.Equal(hotel.DescriptionEmbedding.Value, getResult.DescriptionEmbedding.Value); + Assert.Equal(hotel.DescriptionEmbedding?.ToArray(), getResult.DescriptionEmbedding?.ToArray()); Assert.Equal(hotel.Tags, getResult.Tags); Assert.Equal(hotel.ParkingIncluded, getResult.ParkingIncluded); Assert.Equal(hotel.LastRenovationDate, getResult.LastRenovationDate); @@ -206,6 +191,8 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti // Act await sut.DeleteAsync("Remove-1"); + // Also delete a non-existing key to test that the operation does not fail for these. + await sut.DeleteAsync("Remove-2"); // Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("Remove-1", new GetRecordOptions { IncludeVectors = true })); @@ -225,7 +212,8 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() await sut.UpsertAsync(CreateTestHotel("RemoveMany-3")); // Act - await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3"]); + // Also include a non-existing key to test that the operation does not fail for these. + await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3", "RemoveMany-4"]); // Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); @@ -234,7 +222,7 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() } [Fact(Skip = SkipReason)] - public async Task ItThrowsCommandExecutionExceptionForFailedConnectionAsync() + public async Task ItThrowsOperationExceptionForFailedConnectionAsync() { // Arrange var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; @@ -246,7 +234,7 @@ public async Task ItThrowsCommandExecutionExceptionForFailedConnectionAsync() } [Fact(Skip = SkipReason)] - public async Task ItThrowsCommandExecutionExceptionForFailedAuthenticationAsync() + public async Task ItThrowsOperationExceptionForFailedAuthenticationAsync() { // Arrange var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; @@ -274,7 +262,7 @@ public async Task ItThrowsMappingExceptionForFailedMapperAsync() HotelName = $"MyHotel {hotelId}", Description = "My Hotel is great.", DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, - Tags = new[] { "pool", "air conditioning", "concierge" }, + Tags = ["pool", "air conditioning", "concierge"], ParkingIncluded = true, LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), Rating = 3.6, diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs new file mode 100644 index 000000000000..4d0a9f69473f --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs @@ -0,0 +1,287 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using System.Text.Json.Nodes; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.Redis; +using Microsoft.SemanticKernel.Memory; +using Xunit; +using Xunit.Abstractions; +using static SemanticKernel.IntegrationTests.Connectors.Memory.Redis.RedisVectorStoreFixture; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Redis; + +/// +/// Contains tests for the class. +/// +/// Used for logging. +/// Redis setup and teardown. +[Collection("RedisVectorStoreCollection")] +public sealed class RedisVectorRecordStoreTests(ITestOutputHelper output, RedisVectorStoreFixture fixture) +{ + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition) + { + // Arrange. + var options = new RedisVectorRecordStoreOptions + { + DefaultCollectionName = "hotels", + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null + }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + Hotel record = CreateTestHotel("Upsert-1", 1); + + // Act. + var upsertResult = await sut.UpsertAsync(record); + + // Assert. + var getResult = await sut.GetAsync("Upsert-1", new GetRecordOptions { IncludeVectors = true }); + Assert.Equal("Upsert-1", upsertResult); + Assert.Equal(record.HotelId, getResult?.HotelId); + Assert.Equal(record.HotelName, getResult?.HotelName); + Assert.Equal(record.HotelCode, getResult?.HotelCode); + Assert.Equal(record.Tags, getResult?.Tags); + Assert.Equal(record.ParkingIncluded, getResult?.ParkingIncluded); + Assert.Equal(record.LastRenovationDate, getResult?.LastRenovationDate); + Assert.Equal(record.Rating, getResult?.Rating); + Assert.Equal(record.Address.Country, getResult?.Address.Country); + Assert.Equal(record.Address.City, getResult?.Address.City); + Assert.Equal(record.Description, getResult?.Description); + Assert.Equal(record.DescriptionEmbedding?.ToArray(), getResult?.DescriptionEmbedding?.ToArray()); + + // Output. + output.WriteLine(upsertResult); + output.WriteLine(getResult?.ToString()); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanUpsertManyDocumentsToVectorStoreAsync(bool useRecordDefinition) + { + // Arrange. + var options = new RedisVectorRecordStoreOptions + { + DefaultCollectionName = "hotels", + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null + }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + + // Act. + var results = sut.UpsertBatchAsync( + [ + CreateTestHotel("UpsertMany-1", 1), + CreateTestHotel("UpsertMany-2", 2), + CreateTestHotel("UpsertMany-3", 3), + ]); + + // Assert. + Assert.NotNull(results); + var resultsList = await results.ToListAsync(); + + Assert.Equal(3, resultsList.Count); + Assert.Contains("UpsertMany-1", resultsList); + Assert.Contains("UpsertMany-2", resultsList); + Assert.Contains("UpsertMany-3", resultsList); + + // Output + foreach (var result in resultsList) + { + output.WriteLine(result); + } + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool useRecordDefinition) + { + // Arrange. + var options = new RedisVectorRecordStoreOptions + { + DefaultCollectionName = "hotels", + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null + }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + + // Act. + var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); + + // Assert. + Assert.Equal("BaseSet-1", getResult?.HotelId); + Assert.Equal("My Hotel 1", getResult?.HotelName); + Assert.Equal(1, getResult?.HotelCode); + Assert.Equal(new[] { "pool", "air conditioning", "concierge" }, getResult?.Tags); + Assert.True(getResult?.ParkingIncluded); + Assert.Equal(new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), getResult?.LastRenovationDate); + Assert.Equal(3.6, getResult?.Rating); + Assert.Equal("Seattle", getResult?.Address.City); + Assert.Equal("This is a great hotel.", getResult?.Description); + if (includeVectors) + { + Assert.Equal(new[] { 30f, 31f, 32f, 33f }, getResult?.DescriptionEmbedding?.ToArray()); + } + else + { + Assert.Null(getResult?.DescriptionEmbedding); + } + + // Output. + output.WriteLine(getResult?.ToString()); + } + + [Fact] + public async Task ItCanGetManyDocumentsFromVectorStoreAsync() + { + // Arrange + var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + + // Act + var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-2"], new GetRecordOptions { IncludeVectors = true }); + + // Assert + Assert.NotNull(hotels); + var hotelsList = await hotels.ToListAsync(); + Assert.Equal(2, hotelsList.Count); + + // Output + foreach (var hotel in hotelsList) + { + output.WriteLine(hotel?.ToString() ?? "Null"); + } + } + + [Fact] + public async Task ItFailsToGetDocumentsWithInvalidSchemaAsync() + { + // Arrange. + var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + + // Act & Assert. + await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-4-Invalid", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact] + public async Task ItThrowsForPartialGetBatchResultAsync() + { + // Arrange. + var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + + // Act & Assert. + await Assert.ThrowsAsync(async () => await sut.GetBatchAsync(["BaseSet-1", "nonexistent", "BaseSet-2"], new GetRecordOptions { IncludeVectors = true }).ToListAsync()); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefinition) + { + // Arrange. + var options = new RedisVectorRecordStoreOptions + { + DefaultCollectionName = "hotels", + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null + }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + var address = new HotelAddress { City = "Seattle", Country = "USA" }; + var record = new Hotel + { + HotelId = "Remove-1", + HotelName = "Remove Test Hotel", + HotelCode = 20, + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f } + }; + + await sut.UpsertAsync(record); + + // Act. + await sut.DeleteAsync("Remove-1"); + // Also delete a non-existing key to test that the operation does not fail for these. + await sut.DeleteAsync("Remove-2"); + + // Assert. + await Assert.ThrowsAsync(async () => await sut.GetAsync("Remove-1")); + } + + [Fact] + public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() + { + // Arrange + var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + await sut.UpsertAsync(CreateTestHotel("RemoveMany-1", 1)); + await sut.UpsertAsync(CreateTestHotel("RemoveMany-2", 2)); + await sut.UpsertAsync(CreateTestHotel("RemoveMany-3", 3)); + + // Act + // Also include a non-existing key to test that the operation does not fail for these. + await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3", "RemoveMany-4"]); + + // Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); + await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact] + public async Task ItThrowsMappingExceptionForFailedMapperAsync() + { + // Arrange + var options = new RedisVectorRecordStoreOptions + { + DefaultCollectionName = "hotels", + PrefixCollectionNameToKeyNames = true, + MapperType = RedisRecordMapperType.JsonNodeCustomMapper, + JsonNodeCustomMapper = new FailingMapper() + }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + + // Act & Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); + } + + private static Hotel CreateTestHotel(string hotelId, int hotelCode) + { + var address = new HotelAddress { City = "Seattle", Country = "USA" }; + var record = new Hotel + { + HotelId = hotelId, + HotelName = $"My Hotel {hotelCode}", + HotelCode = 1, + Tags = ["pool", "air conditioning", "concierge"], + ParkingIncluded = true, + LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), + Rating = 3.6, + Address = address, + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f } + }; + return record; + } + + private sealed class FailingMapper : IVectorStoreRecordMapper + { + public (string Key, JsonNode Node) MapFromDataToStorageModel(Hotel dataModel) + { + throw new NotImplementedException(); + } + + public Hotel MapFromStorageToDataModel((string Key, JsonNode Node) storageModel, GetRecordOptions? options = null) + { + throw new NotImplementedException(); + } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreCollectionFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreCollectionFixture.cs new file mode 100644 index 000000000000..1bebd51d8f5f --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreCollectionFixture.cs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Redis; + +[CollectionDefinition("RedisVectorStoreCollection")] +public class RedisVectorStoreCollectionFixture : ICollectionFixture +{ +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs new file mode 100644 index 000000000000..186b20a4b703 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs @@ -0,0 +1,207 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Docker.DotNet; +using Docker.DotNet.Models; +using Microsoft.SemanticKernel.Memory; +using NRedisStack.RedisStackCommands; +using NRedisStack.Search; +using StackExchange.Redis; +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Redis; + +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. +/// +/// Does setup and teardown of redis docker container and associated test data. +/// +public class RedisVectorStoreFixture : IAsyncLifetime +{ + /// The docker client we are using to create a redis container with. + private readonly DockerClient _client; + + /// The id of the redis container that we are testing with. + private string? _containerId = null; + + /// + /// Initializes a new instance of the class. + /// + public RedisVectorStoreFixture() + { + using var dockerClientConfiguration = new DockerClientConfiguration(); + this._client = dockerClientConfiguration.CreateClient(); + this.VectorStoreRecordDefinition = new VectorStoreRecordDefinition + { + Properties = new List + { + new VectorStoreRecordKeyProperty("HotelId"), + new VectorStoreRecordDataProperty("HotelName"), + new VectorStoreRecordDataProperty("HotelCode"), + new VectorStoreRecordDataProperty("Description"), + new VectorStoreRecordVectorProperty("DescriptionEmbedding"), + new VectorStoreRecordDataProperty("Tags"), + new VectorStoreRecordDataProperty("ParkingIncluded"), + new VectorStoreRecordDataProperty("LastRenovationDate"), + new VectorStoreRecordDataProperty("Rating"), + new VectorStoreRecordDataProperty("Address") + } + }; + } + + /// Gets the redis database connection to use for tests. + public IDatabase Database { get; private set; } + + /// Gets the manually created vector store record definition for our test model. + public VectorStoreRecordDefinition VectorStoreRecordDefinition { get; private set; } + + /// + /// Create / Recreate redis docker container, create an index and add test data. + /// + /// An async task. + public async Task InitializeAsync() + { + this._containerId = await SetupRedisContainerAsync(this._client); + + // Connect to redis. + ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost:6379"); + this.Database = redis.GetDatabase(); + + // Create a schema for the vector store. + var schema = new Schema(); + schema.AddTextField("HotelName"); + schema.AddNumericField("hotelCode"); + schema.AddTextField("Description"); + schema.AddVectorField("DescriptionEmbedding", Schema.VectorField.VectorAlgo.HNSW, new Dictionary() + { + ["TYPE"] = "FLOAT32", + ["DIM"] = "4", + ["DISTANCE_METRIC"] = "L2" + }); + var createParams = new FTCreateParams(); + createParams.AddPrefix("hotels"); + await this.Database.FT().CreateAsync("hotels", createParams, schema); + + // Create some test data. + var address = new HotelAddress { City = "Seattle", Country = "USA" }; + var embedding = new[] { 30f, 31f, 32f, 33f }; + + await this.Database.JSON().SetAsync("hotels:BaseSet-1", "$", new + { + HotelName = "My Hotel 1", + HotelCode = 1, + Description = "This is a great hotel.", + DescriptionEmbedding = embedding, + Tags = new[] { "pool", "air conditioning", "concierge" }, + ParkingIncluded = true, + LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), + Rating = 3.6, + Address = address + }); + await this.Database.JSON().SetAsync("hotels:BaseSet-2", "$", new { HotelName = "My Hotel 2", HotelCode = 2, Description = "This is a great hotel.", DescriptionEmbedding = embedding, ParkingIncluded = false }); + await this.Database.JSON().SetAsync("hotels:BaseSet-3", "$", new { HotelName = "My Hotel 3", HotelCode = 3, Description = "This is a great hotel.", DescriptionEmbedding = embedding, ParkingIncluded = false }); + await this.Database.JSON().SetAsync("hotels:BaseSet-4-Invalid", "$", new { HotelId = "AnotherId", HotelName = "My Invalid Hotel", HotelCode = 4, Description = "This is an invalid hotel.", DescriptionEmbedding = embedding, ParkingIncluded = false }); + } + + /// + /// Delete the docker container after the test run. + /// + /// An async task. + public async Task DisposeAsync() + { + if (this._containerId != null) + { + await this._client.Containers.StopContainerAsync(this._containerId, new ContainerStopParameters()); + await this._client.Containers.RemoveContainerAsync(this._containerId, new ContainerRemoveParameters()); + } + } + + /// + /// Setup the redis container by pulling the image and running it. + /// + /// The docker client to create the container with. + /// The id of the container. + private static async Task SetupRedisContainerAsync(DockerClient client) + { + await client.Images.CreateImageAsync( + new ImagesCreateParameters + { + FromImage = "redis/redis-stack", + Tag = "latest", + }, + null, + new Progress()); + + var container = await client.Containers.CreateContainerAsync(new CreateContainerParameters() + { + Image = "redis/redis-stack", + HostConfig = new HostConfig() + { + PortBindings = new Dictionary> + { + {"6379", new List {new() {HostPort = "6379"}}} + }, + PublishAllPorts = true + }, + ExposedPorts = new Dictionary + { + { "6379", default } + }, + }); + + await client.Containers.StartContainerAsync( + container.ID, + new ContainerStartParameters()); + + return container.ID; + } + + /// + /// A test model for the vector store. + /// + public class Hotel + { + [VectorStoreRecordKey] + public string HotelId { get; init; } + + [VectorStoreRecordData] + public string HotelName { get; init; } + + [VectorStoreRecordData] + public int HotelCode { get; init; } + + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + public string Description { get; init; } + + [VectorStoreRecordVector] + public ReadOnlyMemory? DescriptionEmbedding { get; init; } + +#pragma warning disable CA1819 // Properties should not return arrays + [VectorStoreRecordData] + public string[] Tags { get; init; } +#pragma warning restore CA1819 // Properties should not return arrays + + [VectorStoreRecordData] + public bool ParkingIncluded { get; init; } + + [VectorStoreRecordData] + public DateTimeOffset LastRenovationDate { get; init; } + + [VectorStoreRecordData] + public double Rating { get; init; } + + [VectorStoreRecordData] + public HotelAddress Address { get; init; } + } + + /// + /// A test model for the vector store to simulate a complex type. + /// + public class HotelAddress + { + public string City { get; init; } + public string Country { get; init; } + } +} +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. diff --git a/dotnet/src/IntegrationTests/IntegrationTests.csproj b/dotnet/src/IntegrationTests/IntegrationTests.csproj index f80f75d770cf..3c3fa543d5d6 100644 --- a/dotnet/src/IntegrationTests/IntegrationTests.csproj +++ b/dotnet/src/IntegrationTests/IntegrationTests.csproj @@ -63,6 +63,7 @@ + diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs index 79578bfa907d..6852ca14b6bc 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs @@ -48,17 +48,19 @@ public interface IVectorRecordStore /// Optional options for removing the record. /// The to monitor for cancellation requests. The default is . /// The unique identifier for the record. - /// Throw when the command fails to execute for any reason. + /// Throw when the command fails to execute for any reason other than that the record does not exit. Task DeleteAsync(TKey key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); /// /// Deletes a batch of records from the vector store. Does not guarantee that the collection exists. /// Deletes will be made in a single request or in a single parallel batch depending on the available store functionality. + /// If a record is not found, it will be ignored and the batch will succeed. + /// If any record cannot be deleted for any other reason, the operation will throw. Some records may have already been deleted, while others may not, so the entire operation should be retried. /// /// The unique ids associated with the records to remove. /// Optional options for removing the records. /// The to monitor for cancellation requests. The default is . - /// Throw when the command fails to execute for any reason. + /// Throw when the command fails to execute for any reason other than that a record does not exist. Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); /// From 15c1e9bac9be4a7a330ac8632f792f517bc0c25d Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 24 Jun 2024 18:34:42 +0100 Subject: [PATCH 06/48] .Net: Add qdrant vector record store implementation (#6904) ### Motivation and Context As part of the evolution of memory connectors, we need to support custom data models and remove opinionated behaviors, so adding a new record store implementation for qdrant. ### Description Adding an implementation for IVectorRecordStore for qdrant with support for: Custom mappers Generic data models Annotating data models via attributes or via definition objects. Also improving some styling in the AzureAISearch implementation. See #6525 ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- dotnet/Directory.Packages.props | 1 + .../AzureAISearchVectorRecordStore.cs | 103 +++-- .../Connectors.Memory.Qdrant.csproj | 1 + .../QdrantRecordMapperType.cs | 21 + .../QdrantVectorRecordStore.cs | 369 ++++++++++++++++++ .../QdrantVectorRecordStoreOptions.cs | 48 +++ .../QdrantVectorStoreRecordMapper.cs | 301 ++++++++++++++ .../QdrantVectorStoreRecordMapperOptions.cs | 27 ++ .../RedisVectorRecordStore.cs | 51 +-- .../AzureAISearchVectorRecordStoreTests.cs | 36 +- .../Qdrant/QdrantVectorRecordStoreTests.cs | 307 +++++++++++++++ .../QdrantVectorStoreCollectionFixture.cs | 10 + .../Memory/Qdrant/QdrantVectorStoreFixture.cs | 325 +++++++++++++++ .../Redis/RedisVectorRecordStoreTests.cs | 33 +- .../IntegrationTests/IntegrationTests.csproj | 1 + .../src/Data/VectorStoreErrorHandler.cs | 45 +++ .../VectorStoreRecordPropertyReader.cs | 42 +- .../Memory/IVectorRecordStore.cs | 7 +- .../VectorStoreRecordPropertyReaderTests.cs | 49 ++- 19 files changed, 1645 insertions(+), 132 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreCollectionFixture.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs create mode 100644 dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs rename dotnet/src/InternalUtilities/src/{Schema => Data}/VectorStoreRecordPropertyReader.cs (84%) rename dotnet/src/SemanticKernel.UnitTests/{Utilities => Data}/VectorStoreRecordPropertyReaderTests.cs (81%) diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props index 645c8a249d2a..152d4da183b8 100644 --- a/dotnet/Directory.Packages.props +++ b/dotnet/Directory.Packages.props @@ -92,6 +92,7 @@ + diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index ad16bbd8761f..f5cf330e5f3e 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -24,6 +24,9 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore where TRecord : class { + /// The name of this database for telemetry purposes. + private const string DatabaseName = "AzureAISearch"; + /// A set of types that a key on the provided model may have. private static readonly HashSet s_supportedKeyTypes = [ @@ -100,7 +103,7 @@ public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, Azure } /// - public Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) + public Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) { Verify.NotNullOrWhiteSpace(key); @@ -126,7 +129,13 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G var searchClient = this.GetSearchClient(collectionName); var tasks = keys.Select(key => this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, innerOptions, cancellationToken)); var results = await Task.WhenAll(tasks).ConfigureAwait(false); - foreach (var result in results) { yield return result; } + foreach (var result in results) + { + if (result is not null) + { + yield return result; + } + } } /// @@ -203,32 +212,40 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco /// The azure ai search sdk options for getting a document. /// The to monitor for cancellation requests. The default is . /// The retrieved document, mapped to the consumer data model. - private async Task GetDocumentAndMapToDataModelAsync( + private async Task GetDocumentAndMapToDataModelAsync( SearchClient searchClient, string collectionName, string key, GetDocumentOptions innerOptions, CancellationToken cancellationToken) { + const string OperationName = "GetDocument"; + // Use the user provided mapper. if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) { var jsonObject = await RunOperationAsync( collectionName, - "GetDocument", - () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken)).ConfigureAwait(false); + OperationName, + () => GetDocumentWithNotFoundHandlingAsync(searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); + + if (jsonObject is null) + { + return null; + } - return RunModelConversion( + return VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, - "GetDocument", + OperationName, () => this._options.JsonObjectCustomMapper!.MapFromStorageToDataModel(jsonObject)); } // Use the built in Azure AI Search mapper. return await RunOperationAsync( collectionName, - "GetDocument", - () => searchClient.GetDocumentAsync(key, innerOptions, cancellationToken)).ConfigureAwait(false); + OperationName, + () => GetDocumentWithNotFoundHandlingAsync(searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); } /// @@ -247,24 +264,27 @@ private Task> MapToStorageModelAndUploadDocumentA IndexDocumentsOptions innerOptions, CancellationToken cancellationToken) { + const string OperationName = "UploadDocuments"; + // Use the user provided mapper. if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) { - var jsonObjects = RunModelConversion( + var jsonObjects = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, - "UploadDocuments", + OperationName, () => records.Select(this._options.JsonObjectCustomMapper!.MapFromDataToStorageModel)); return RunOperationAsync( collectionName, - "UploadDocuments", + OperationName, () => searchClient.UploadDocumentsAsync(jsonObjects, innerOptions, cancellationToken)); } // Use the built in Azure AI Search mapper. return RunOperationAsync( collectionName, - "UploadDocuments", + OperationName, () => searchClient.UploadDocumentsAsync(records, innerOptions, cancellationToken)); } @@ -321,6 +341,31 @@ private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options) return innerOptions; } + /// + /// Get a document with the given key, and return null if it is not found. + /// + /// The type to deserialize the document to. + /// The search client to use when fetching the document. + /// The key of the record to get. + /// The azure ai search sdk options for getting a document. + /// The to monitor for cancellation requests. The default is . + /// The retrieved document, mapped to the consumer data model, or null if not found. + private static async Task GetDocumentWithNotFoundHandlingAsync( + SearchClient searchClient, + string key, + GetDocumentOptions innerOptions, + CancellationToken cancellationToken) + { + try + { + return await searchClient.GetDocumentAsync(key, innerOptions, cancellationToken).ConfigureAwait(false); + } + catch (RequestFailedException ex) when (ex.Status == 404) + { + return default; + } + } + /// /// Run the given operation and wrap any with ."/> /// @@ -341,7 +386,7 @@ private static async Task RunOperationAsync(string collectionName, string // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "AzureAISearch"); + wrapperException.Data.Add("db.system", DatabaseName); wrapperException.Data.Add("db.collection.name", collectionName); wrapperException.Data.Add("db.operation.name", operationName); @@ -353,35 +398,7 @@ private static async Task RunOperationAsync(string collectionName, string // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "AzureAISearch"); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; - } - } - - /// - /// Run the given model conversion and wrap any exceptions with . - /// - /// The response type of the operation. - /// The name of the collection the operation is being run on. - /// The type of database operation being run. - /// The operation to run. - /// The result of the operation. - private static T RunModelConversion(string collectionName, string operationName, Func operation) - { - try - { - return operation.Invoke(); - } - catch (Exception ex) - { - var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "AzureAISearch"); + wrapperException.Data.Add("db.system", DatabaseName); wrapperException.Data.Add("db.collection.name", collectionName); wrapperException.Data.Add("db.operation.name", operationName); diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj b/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj index d9037605f6e5..7e8e40b3a783 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj @@ -20,6 +20,7 @@ + diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs new file mode 100644 index 000000000000..cb8f7bf8b14c --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// The types of mapper supported by . +/// +public enum QdrantRecordMapperType +{ + /// + /// Use the default mapper that is provided by the semantic kernel SDK that uses json as an intermediary to allows automatic mapping to a wide variety of types. + /// + Default, + + /// + /// Use a custom mapper between and the data model. + /// + QdrantPointStructCustomMapper +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs new file mode 100644 index 000000000000..b05766ff9b17 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs @@ -0,0 +1,369 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using Grpc.Core; +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client; +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Service for storing and retrieving vector records, that uses Qdrant as the underlying storage. +/// +/// The data model to use for adding, updating and retrieving data from storage. +public sealed class QdrantVectorRecordStore : IVectorRecordStore, IVectorRecordStore + where TRecord : class +{ + /// The name of this database for telemetry purposes. + private const string DatabaseName = "Qdrant"; + + /// The name of the upsert operation for telemetry purposes. + private const string UpsertName = "Upsert"; + + /// The name of the Delete operation for telemetry purposes. + private const string DeleteName = "Delete"; + + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + private readonly QdrantClient _qdrantClient; + + /// Optional configuration options for this class. + private readonly QdrantVectorRecordStoreOptions _options; + + /// A mapper to use for converting between qdrant point and consumer models. + private readonly IVectorStoreRecordMapper _mapper; + + /// + /// Initializes a new instance of the class. + /// + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + /// Optional configuration options for this class. + /// + /// + public QdrantVectorRecordStore(QdrantClient qdrantClient, QdrantVectorRecordStoreOptions? options = null) + { + // Verify. + Verify.NotNull(qdrantClient); + + // Assign. + this._qdrantClient = qdrantClient; + this._options = options ?? new QdrantVectorRecordStoreOptions(); + + // Assign Mapper. + if (this._options.MapperType == QdrantRecordMapperType.QdrantPointStructCustomMapper) + { + // Custom Mapper. + if (this._options.PointStructCustomMapper is null) + { + throw new ArgumentException($"The {nameof(QdrantVectorRecordStoreOptions.PointStructCustomMapper)} option needs to be set if a {nameof(QdrantVectorRecordStoreOptions.MapperType)} of {nameof(QdrantRecordMapperType.QdrantPointStructCustomMapper)} has been chosen.", nameof(options)); + } + + this._mapper = this._options.PointStructCustomMapper; + } + else + { + // Default Mapper. + this._mapper = new QdrantVectorStoreRecordMapper(new QdrantVectorStoreRecordMapperOptions + { + HasNamedVectors = this._options.HasNamedVectors, + VectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition + }); + } + } + + /// + public async Task GetAsync(ulong key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNull(key); + + var retrievedPoints = await this.GetBatchAsync([key], options, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); + return retrievedPoints.FirstOrDefault(); + } + + /// + public async Task GetAsync(Guid key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNull(key); + + var retrievedPoints = await this.GetBatchAsync([key], options, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); + return retrievedPoints.FirstOrDefault(); + } + + /// + public IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default) + { + return this.GetBatchByPointIdAsync(keys, key => new PointId { Num = key }, options, cancellationToken); + } + + /// + public IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default) + { + return this.GetBatchByPointIdAsync(keys, key => new PointId { Uuid = key.ToString("D") }, options, cancellationToken); + } + + /// + public Task DeleteAsync(ulong key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNull(key); + + var collectionName = this.ChooseCollectionName(options?.CollectionName); + return RunOperationAsync( + collectionName, + DeleteName, + () => this._qdrantClient.DeleteAsync( + collectionName, + key, + wait: true, + cancellationToken: cancellationToken)); + } + + /// + public Task DeleteAsync(Guid key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNull(key); + + var collectionName = this.ChooseCollectionName(options?.CollectionName); + return RunOperationAsync( + collectionName, + DeleteName, + () => this._qdrantClient.DeleteAsync( + collectionName, + key, + wait: true, + cancellationToken: cancellationToken)); + } + + /// + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + var collectionName = this.ChooseCollectionName(options?.CollectionName); + return RunOperationAsync( + collectionName, + DeleteName, + () => this._qdrantClient.DeleteAsync( + collectionName, + keys.ToList(), + wait: true, + cancellationToken: cancellationToken)); + } + + /// + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + var collectionName = this.ChooseCollectionName(options?.CollectionName); + return RunOperationAsync( + collectionName, + DeleteName, + () => this._qdrantClient.DeleteAsync( + collectionName, + keys.ToList(), + wait: true, + cancellationToken: cancellationToken)); + } + + /// + public async Task UpsertAsync(TRecord record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(record); + + // Create options. + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Create point from record. + var pointStruct = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + collectionName, + UpsertName, + () => this._mapper.MapFromDataToStorageModel(record)); + + // Upsert. + await RunOperationAsync( + collectionName, + UpsertName, + () => this._qdrantClient.UpsertAsync(collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); + return pointStruct.Id.Num; + } + + /// + async Task IVectorRecordStore.UpsertAsync(TRecord record, UpsertRecordOptions? options, CancellationToken cancellationToken) + { + Verify.NotNull(record); + + // Create options. + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Create point from record. + var pointStruct = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + collectionName, + UpsertName, + () => this._mapper.MapFromDataToStorageModel(record)); + + // Upsert. + await RunOperationAsync( + collectionName, + UpsertName, + () => this._qdrantClient.UpsertAsync(collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); + return Guid.Parse(pointStruct.Id.Uuid); + } + + /// + public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(records); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Create points from records. + var pointStructs = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + collectionName, + UpsertName, + () => records.Select(this._mapper.MapFromDataToStorageModel).ToList()); + + // Upsert. + await RunOperationAsync( + collectionName, + UpsertName, + () => this._qdrantClient.UpsertAsync(collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); + + foreach (var pointStruct in pointStructs) + { + yield return pointStruct.Id.Num; + } + } + + /// + async IAsyncEnumerable IVectorRecordStore.UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options, [EnumeratorCancellation] CancellationToken cancellationToken) + { + Verify.NotNull(records); + + // Create Options + var collectionName = this.ChooseCollectionName(options?.CollectionName); + + // Create points from records. + var pointStructs = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + collectionName, + UpsertName, + () => records.Select(this._mapper.MapFromDataToStorageModel).ToList()); + + // Upsert. + await RunOperationAsync( + collectionName, + UpsertName, + () => this._qdrantClient.UpsertAsync(collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); + + foreach (var pointStruct in pointStructs) + { + yield return Guid.Parse(pointStruct.Id.Uuid); + } + } + + /// + /// Get the requested records from the Qdrant store using the provided keys. + /// + /// The keys of the points to retrieve. + /// Function to convert the provided keys to point ids. + /// The retrieval options. + /// The to monitor for cancellation requests. The default is . + /// The retrieved points. + private async IAsyncEnumerable GetBatchByPointIdAsync( + IEnumerable keys, + Func keyConverter, + GetRecordOptions? options, + [EnumeratorCancellation] CancellationToken cancellationToken) + { + const string OperationName = "Retrieve"; + Verify.NotNull(keys); + + // Create options. + var collectionName = this.ChooseCollectionName(options?.CollectionName); + var pointsIds = keys.Select(key => keyConverter(key)).ToArray(); + + // Retrieve data points. + var retrievedPoints = await RunOperationAsync( + collectionName, + OperationName, + () => this._qdrantClient.RetrieveAsync(collectionName, pointsIds, true, options?.IncludeVectors ?? false, cancellationToken: cancellationToken)).ConfigureAwait(false); + + // Convert the retrieved points to the target data model. + foreach (var retrievedPoint in retrievedPoints) + { + var pointStruct = new PointStruct + { + Id = retrievedPoint.Id, + Vectors = retrievedPoint.Vectors, + Payload = { } + }; + + foreach (KeyValuePair payloadEntry in retrievedPoint.Payload) + { + pointStruct.Payload.Add(payloadEntry.Key, payloadEntry.Value); + } + + yield return VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + collectionName, + OperationName, + () => this._mapper.MapFromStorageToDataModel(pointStruct, options)); + } + } + + /// + /// Choose the right collection name to use for the operation by using the one provided + /// as part of the operation options, or the default one provided at construction time. + /// + /// The collection name provided on the operation options. + /// The collection name to use. + private string ChooseCollectionName(string? operationCollectionName) + { + var collectionName = operationCollectionName ?? this._options.DefaultCollectionName; + if (collectionName is null) + { +#pragma warning disable CA2208 // Instantiate argument exceptions correctly + throw new ArgumentException("Collection name must be provided in the operation options, since no default was provided at construction time.", "options"); +#pragma warning restore CA2208 // Instantiate argument exceptions correctly + } + + return collectionName; + } + + /// + /// Run the given operation and wrap any with ."/> + /// + /// The response type of the operation. + /// The name of the collection the operation is being run on. + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + private static async Task RunOperationAsync(string collectionName, string operationName, Func> operation) + { + try + { + return await operation.Invoke().ConfigureAwait(false); + } + catch (RpcException ex) + { + var wrapperException = new VectorStoreOperationException("Call to vector store failed.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", DatabaseName); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs new file mode 100644 index 000000000000..d3e568057976 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs @@ -0,0 +1,48 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Options when creating a . +/// +public sealed class QdrantVectorRecordStoreOptions + where TRecord : class +{ + /// + /// Gets or sets the default collection name to use. + /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. + /// + public string? DefaultCollectionName { get; init; } = null; + + /// + /// Gets or sets a value indicating whether the vectors in the store are named and multiple vectors are supported, or whether there is just a single unnamed vector per qdrant point. + /// Defaults to single vector per point. + /// + public bool HasNamedVectors { get; set; } = false; + + /// + /// Gets or sets the choice of mapper to use when converting between the data model and the qdrant point. + /// + public QdrantRecordMapperType MapperType { get; init; } = QdrantRecordMapperType.Default; + + /// + /// Gets or sets an optional custom mapper to use when converting between the data model and the qdrant point. + /// + /// + /// Set to to use this mapper."/> + /// + public IVectorStoreRecordMapper? PointStructCustomMapper { get; init; } = null; + + /// + /// Gets or sets an optional record definition that defines the schema of the record type. + /// + /// + /// If not provided, the schema will be inferred from the record model class using reflection. + /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs new file mode 100644 index 000000000000..d7e122a2627a --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -0,0 +1,301 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Mapper between a Qdrant record and the consumer data model that uses json as an intermediary to allow supporting a wide range of models. +/// +/// The consumer data model to map to or from. +internal sealed class QdrantVectorStoreRecordMapper : IVectorStoreRecordMapper + where TRecord : class +{ + /// A set of types that a key on the provided model may have. + private static readonly HashSet s_supportedKeyTypes = + [ + typeof(ulong), + typeof(Guid) + ]; + + /// A set of types that data properties on the provided model may have. + private static readonly HashSet s_supportedDataTypes = + [ + typeof(List), + typeof(List), + typeof(List), + typeof(List), + typeof(List), + typeof(List), + typeof(string), + typeof(int), + typeof(long), + typeof(double), + typeof(float), + typeof(bool), + typeof(int?), + typeof(long?), + typeof(double?), + typeof(float?), + typeof(bool?) + ]; + + /// A set of types that vectors on the provided model may have. + /// + /// While qdrant supports float32 and uint64, the api only supports float64, therefore + /// any float32 vectors will be converted to float64 before being sent to qdrant. + /// + private static readonly HashSet s_supportedVectorTypes = + [ + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory?), + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory?) + ]; + + /// A list of property info objects that point at the payload properties in the current model, and allows easy reading and writing of these properties. + private readonly List _payloadPropertiesInfo = new(); + + /// A list of property info objects that point at the vector properties in the current model, and allows easy reading and writing of these properties. + private readonly List _vectorPropertiesInfo = new(); + + /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. + private readonly PropertyInfo _keyPropertyInfo; + + /// Configuration options for this class. + private readonly QdrantVectorStoreRecordMapperOptions _options; + + /// + /// Initializes a new instance of the class. + /// + /// Options to use when doing the model conversion. + public QdrantVectorStoreRecordMapper(QdrantVectorStoreRecordMapperOptions options) + { + Verify.NotNull(options); + this._options = options; + + // Enumerate public properties using configuration or attributes. + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; + if (this._options.VectorStoreRecordDefinition is not null) + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: this._options.HasNamedVectors); + } + else + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: this._options.HasNamedVectors); + } + + // Validate property types and store for later use. + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); + + this._keyPropertyInfo = properties.keyProperty; + this._payloadPropertiesInfo = properties.dataProperties; + this._vectorPropertiesInfo = properties.vectorProperties; + } + + /// + public PointStruct MapFromDataToStorageModel(TRecord dataModel) + { + PointId pointId; + if (this._keyPropertyInfo.PropertyType == typeof(ulong)) + { + var key = this._keyPropertyInfo.GetValue(dataModel) as ulong? ?? throw new VectorStoreRecordMappingException($"Missing key property {this._keyPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName}."); + pointId = new PointId { Num = key }; + } + else if (this._keyPropertyInfo.PropertyType == typeof(Guid)) + { + var key = this._keyPropertyInfo.GetValue(dataModel) as Guid? ?? throw new VectorStoreRecordMappingException($"Missing key property {this._keyPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName}."); + pointId = new PointId { Uuid = key.ToString("D") }; + } + else + { + throw new VectorStoreRecordMappingException($"Unsupported key type {this._keyPropertyInfo.PropertyType.FullName} for key property {this._keyPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName}."); + } + + // Create point. + var pointStruct = new PointStruct + { + Id = pointId, + Vectors = new Vectors(), + Payload = { }, + }; + + // Add point payload. + foreach (var payloadPropertyInfo in this._payloadPropertiesInfo) + { + var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(payloadPropertyInfo); + var propertyValue = payloadPropertyInfo.GetValue(dataModel); + pointStruct.Payload.Add(propertyName, ConvertToGrpcFieldValue(propertyValue)); + } + + // Add vectors. + if (this._options.HasNamedVectors) + { + var namedVectors = new NamedVectors(); + foreach (var vectorPropertyInfo in this._vectorPropertiesInfo) + { + var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(vectorPropertyInfo); + var propertyValue = vectorPropertyInfo.GetValue(dataModel); + if (propertyValue is not null) + { + var castPropertyValue = (ReadOnlyMemory)propertyValue; + namedVectors.Vectors.Add(propertyName, castPropertyValue.ToArray()); + } + } + + pointStruct.Vectors.Vectors_ = namedVectors; + } + else + { + // We already verified in the constructor via FindProperties that there is exactly one vector property when not using named vectors. + var vectorPropertyInfo = this._vectorPropertiesInfo.First(); + if (vectorPropertyInfo.GetValue(dataModel) is ReadOnlyMemory floatROM) + { + pointStruct.Vectors.Vector = floatROM.ToArray(); + } + else + { + throw new VectorStoreRecordMappingException($"Vector property {vectorPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName} may not be null when not using named vectors."); + } + } + + return pointStruct; + } + + /// + public TRecord MapFromStorageToDataModel(PointStruct storageModel, GetRecordOptions? options = default) + { + // Get the key property name and value. + var keyPropertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(this._keyPropertyInfo); + var keyPropertyValue = storageModel.Id.HasNum ? storageModel.Id.Num as object : storageModel.Id.Uuid as object; + + // Create a json object to represent the point. + var outputJsonObject = new JsonObject + { + { keyPropertyName, JsonValue.Create(keyPropertyValue) }, + }; + + // Add each vector property if embeddings are included in the point. + if (options?.IncludeVectors is true) + { + foreach (var vectorProperty in this._vectorPropertiesInfo) + { + var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(vectorProperty); + + if (this._options.HasNamedVectors) + { + if (storageModel.Vectors.Vectors_.Vectors.TryGetValue(propertyName, out var vector)) + { + outputJsonObject.Add(propertyName, new JsonArray(vector.Data.Select(x => JsonValue.Create(x)).ToArray())); + } + } + else + { + outputJsonObject.Add(propertyName, new JsonArray(storageModel.Vectors.Vector.Data.Select(x => JsonValue.Create(x)).ToArray())); + } + } + } + + // Add each payload property. + foreach (var payloadProperty in this._payloadPropertiesInfo) + { + var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(payloadProperty); + if (storageModel.Payload.TryGetValue(propertyName, out var value)) + { + outputJsonObject.Add(propertyName, ConvertFromGrpcFieldValueToJsonNode(value)); + } + } + + // Convert from json object to the target data model. + return JsonSerializer.Deserialize(outputJsonObject)!; + } + + /// + /// Convert the given to the correct native type based on its properties. + /// + /// The value to convert to a native type. + /// The converted native value. + /// Thrown when an unsupported type is encountered. + private static JsonNode? ConvertFromGrpcFieldValueToJsonNode(Value payloadValue) + { + return payloadValue.KindCase switch + { + Value.KindOneofCase.NullValue => null, + Value.KindOneofCase.IntegerValue => JsonValue.Create(payloadValue.IntegerValue), + Value.KindOneofCase.StringValue => JsonValue.Create(payloadValue.StringValue), + Value.KindOneofCase.DoubleValue => JsonValue.Create(payloadValue.DoubleValue), + Value.KindOneofCase.BoolValue => JsonValue.Create(payloadValue.BoolValue), + Value.KindOneofCase.ListValue => new JsonArray(payloadValue.ListValue.Values.Select(x => ConvertFromGrpcFieldValueToJsonNode(x)).ToArray()), + Value.KindOneofCase.StructValue => new JsonObject(payloadValue.StructValue.Fields.ToDictionary(x => x.Key, x => ConvertFromGrpcFieldValueToJsonNode(x.Value))), + _ => throw new VectorStoreRecordMappingException($"Unsupported grpc value kind {payloadValue.KindCase}."), + }; + } + + /// + /// Convert the given to a object that can be stored in Qdrant. + /// + /// The object to convert. + /// The converted Qdrant value. + /// Thrown when an unsupported type is encountered. + private static Value ConvertToGrpcFieldValue(object? sourceValue) + { + var value = new Value(); + if (sourceValue is null) + { + value.NullValue = NullValue.NullValue; + } + else if (sourceValue is int intValue) + { + value.IntegerValue = intValue; + } + else if (sourceValue is long longValue) + { + value.IntegerValue = longValue; + } + else if (sourceValue is string stringValue) + { + value.StringValue = stringValue; + } + else if (sourceValue is float floatValue) + { + value.DoubleValue = floatValue; + } + else if (sourceValue is double doubleValue) + { + value.DoubleValue = doubleValue; + } + else if (sourceValue is bool boolValue) + { + value.BoolValue = boolValue; + } + else if (sourceValue is IEnumerable || + sourceValue is IEnumerable || + sourceValue is IEnumerable || + sourceValue is IEnumerable || + sourceValue is IEnumerable || + sourceValue is IEnumerable) + { + var listValue = sourceValue as IEnumerable; + value.ListValue = new ListValue(); + foreach (var item in listValue!) + { + value.ListValue.Values.Add(ConvertToGrpcFieldValue(item)); + } + } + else + { + throw new VectorStoreRecordMappingException($"Unsupported source value type {sourceValue?.GetType().FullName}."); + } + + return value; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs new file mode 100644 index 000000000000..c5a9ffa46865 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Options when creating a . +/// +internal sealed class QdrantVectorStoreRecordMapperOptions +{ + /// + /// Gets or sets a value indicating whether the vectors in the store are named, or whether there is just a single vector per qdrant point. + /// Defaults to single vector per point. + /// + public bool HasNamedVectors { get; set; } = false; + + /// + /// Gets or sets an optional record definition that defines the schema of the record type. + /// + /// + /// If not provided, the schema will be inferred from the record model class using reflection. + /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs index 1198653c088a..7192c2125dc2 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -23,6 +23,9 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; public sealed class RedisVectorRecordStore : IVectorRecordStore where TRecord : class { + /// The name of this database for telemetry purposes. + private const string DatabaseName = "Redis"; + /// A set of types that a key on the provided model may have. private static readonly HashSet s_supportedKeyTypes = [ @@ -111,7 +114,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< } /// - public async Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + public async Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) { Verify.NotNullOrWhiteSpace(key); @@ -134,7 +137,7 @@ public async Task GetAsync(string key, GetRecordOptions? options = null // Check if the key was found before trying to parse the result. if (redisResult.IsNull || redisResult is null) { - throw new VectorStoreOperationException($"Could not find document with key '{key}'"); + return null; } // Check if the value contained any json text before trying to parse the result. @@ -145,7 +148,8 @@ public async Task GetAsync(string key, GetRecordOptions? options = null } // Convert to the caller's data model. - return RunModelConversion( + return VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, "GET", () => @@ -183,7 +187,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G // Check if the key was found before trying to parse the result. if (redisResult.IsNull || redisResult is null) { - throw new VectorStoreOperationException($"Could not find document with key '{key}'"); + continue; } // Check if the value contained any json text before trying to parse the result. @@ -194,7 +198,8 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G } // Convert to the caller's data model. - yield return RunModelConversion( + yield return VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, "MGET", () => @@ -242,7 +247,8 @@ public async Task UpsertAsync(TRecord record, UpsertRecordOptions? optio var collectionName = this.ChooseCollectionName(options?.CollectionName); // Map. - var redisJsonRecord = RunModelConversion( + var redisJsonRecord = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, "SET", () => this._mapper.MapFromDataToStorageModel(record)); @@ -274,7 +280,8 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco var redisRecords = new List<(string maybePrefixedKey, string originalKey, JsonNode jsonNode)>(); foreach (var record in records) { - var redisJsonRecord = RunModelConversion( + var redisJsonRecord = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, collectionName, "MSET", () => this._mapper.MapFromDataToStorageModel(record)); @@ -354,35 +361,7 @@ private static async Task RunOperationAsync(string collectionName, string // Using Open Telemetry standard for naming of these entries. // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "Redis"); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; - } - } - - /// - /// Run the given model conversion and wrap any exceptions with . - /// - /// The response type of the operation. - /// The name of the collection the operation is being run on. - /// The type of database operation being run. - /// The operation to run. - /// The result of the operation. - private static T RunModelConversion(string collectionName, string operationName, Func operation) - { - try - { - return operation.Invoke(); - } - catch (Exception ex) - { - var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", "Redis"); + wrapperException.Data.Add("db.system", DatabaseName); wrapperException.Data.Add("db.collection.name", collectionName); wrapperException.Data.Add("db.operation.name", operationName); diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs index a72859a4dcd2..747076ba9779 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs @@ -147,7 +147,8 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); // Act - var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-2", "BaseSet-3", "BaseSet-4"], new GetRecordOptions { IncludeVectors = true }); + // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. + var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-2", "BaseSet-3", "BaseSet-5", "BaseSet-4"], new GetRecordOptions { IncludeVectors = true }); // Assert Assert.NotNull(hotels); @@ -161,20 +162,6 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() } } - [Fact] - public async Task ItThrowsForPartialGetBatchResultAsync() - { - // Arrange. - var options = new AzureAISearchVectorRecordStoreOptions - { - DefaultCollectionName = fixture.TestIndexName - }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); - - // Act. - await Assert.ThrowsAsync(async () => await sut.GetBatchAsync(["BaseSet-1", "BaseSet-5", "BaseSet-2"]).ToListAsync()); - } - [Theory(Skip = SkipReason)] [InlineData(true)] [InlineData(false)] @@ -195,7 +182,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti await sut.DeleteAsync("Remove-2"); // Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("Remove-1", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("Remove-1", new GetRecordOptions { IncludeVectors = true })); } [Fact(Skip = SkipReason)] @@ -216,9 +203,20 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3", "RemoveMany-4"]); // Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact(Skip = SkipReason)] + public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() + { + // Arrange + var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + + // Act & Assert + Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); } [Fact(Skip = SkipReason)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs new file mode 100644 index 000000000000..78b3b7dfeba0 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs @@ -0,0 +1,307 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Globalization; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client.Grpc; +using Xunit; +using Xunit.Abstractions; +using static SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant.QdrantVectorStoreFixture; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; + +/// +/// Contains tests for the class. +/// +/// Used for logging. +/// Qdrant setup and teardown. +[Collection("QdrantVectorStoreCollection")] +public sealed class QdrantVectorRecordStoreTests(ITestOutputHelper output, QdrantVectorStoreFixture fixture) +{ + [Theory] + [InlineData(true, "singleVectorHotels", false)] + [InlineData(false, "singleVectorHotels", false)] + [InlineData(true, "namedVectorsHotels", true)] + [InlineData(false, "namedVectorsHotels", true)] + public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions + { + HasNamedVectors = hasNamedVectors, + DefaultCollectionName = collectionName, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + var record = this.CreateTestHotel(20); + + // Act. + var upsertResult = await sut.UpsertAsync(record); + + // Assert. + var getResult = await sut.GetAsync(20, new GetRecordOptions { IncludeVectors = true }); + Assert.Equal(20ul, upsertResult); + Assert.Equal(record.HotelId, getResult?.HotelId); + Assert.Equal(record.HotelName, getResult?.HotelName); + Assert.Equal(record.HotelCode, getResult?.HotelCode); + Assert.Equal(record.HotelRating, getResult?.HotelRating); + Assert.Equal(record.ParkingIncluded, getResult?.ParkingIncluded); + Assert.Equal(record.Tags.ToArray(), getResult?.Tags.ToArray()); + Assert.Equal(record.Description, getResult?.Description); + + // TODO: figure out why original array is different from the one we get back. + //Assert.Equal(record.DescriptionEmbedding?.ToArray(), getResult?.DescriptionEmbedding?.ToArray()); + + // Output. + output.WriteLine(upsertResult.ToString(CultureInfo.InvariantCulture)); + output.WriteLine(getResult?.ToString()); + } + + [Fact] + public async Task ItCanUpsertAndRemoveDocumentWithGuidIdToVectorStoreAsync() + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false, DefaultCollectionName = "singleVectorGuidIdHotels" }; + IVectorRecordStore sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + var record = new HotelInfoWithGuidId + { + HotelId = Guid.Parse("55555555-5555-5555-5555-555555555555"), + HotelName = "My Hotel 5", + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + }; + + // Act. + var upsertResult = await sut.UpsertAsync(record); + + // Assert. + var getResult = await sut.GetAsync(Guid.Parse("55555555-5555-5555-5555-555555555555"), new GetRecordOptions { IncludeVectors = true }); + Assert.Equal(Guid.Parse("55555555-5555-5555-5555-555555555555"), upsertResult); + Assert.Equal(record.HotelId, getResult?.HotelId); + Assert.Equal(record.HotelName, getResult?.HotelName); + Assert.Equal(record.Description, getResult?.Description); + + // Act. + await sut.DeleteAsync(Guid.Parse("55555555-5555-5555-5555-555555555555")); + + // Assert. + Assert.Null(await sut.GetAsync(Guid.Parse("55555555-5555-5555-5555-555555555555"))); + + // Output. + output.WriteLine(upsertResult.ToString("D")); + output.WriteLine(getResult?.ToString()); + } + + [Theory] + [InlineData(true, true, "singleVectorHotels", false)] + [InlineData(true, false, "singleVectorHotels", false)] + [InlineData(false, true, "singleVectorHotels", false)] + [InlineData(false, false, "singleVectorHotels", false)] + [InlineData(true, true, "namedVectorsHotels", true)] + [InlineData(true, false, "namedVectorsHotels", true)] + [InlineData(false, true, "namedVectorsHotels", true)] + [InlineData(false, false, "namedVectorsHotels", true)] + public async Task ItCanGetDocumentFromVectorStoreAsync(bool useRecordDefinition, bool withEmbeddings, string collectionName, bool hasNamedVectors) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions + { + HasNamedVectors = hasNamedVectors, + DefaultCollectionName = collectionName, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act. + var getResult = await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = withEmbeddings }); + + // Assert. + Assert.Equal(11ul, getResult?.HotelId); + Assert.Equal("My Hotel 11", getResult?.HotelName); + Assert.Equal(11, getResult?.HotelCode); + Assert.True(getResult?.ParkingIncluded); + Assert.Equal(4.5f, getResult?.HotelRating); + Assert.Equal(2, getResult?.Tags.Count); + Assert.Equal("t1", getResult?.Tags[0]); + Assert.Equal("t2", getResult?.Tags[1]); + Assert.Equal("This is a great hotel.", getResult?.Description); + if (withEmbeddings) + { + Assert.NotNull(getResult?.DescriptionEmbedding); + } + else + { + Assert.Null(getResult?.DescriptionEmbedding); + } + + // Output. + output.WriteLine(getResult?.ToString()); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task ItCanGetDocumentWithGuidIdFromVectorStoreAsync(bool useRecordDefinition, bool withEmbeddings) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions + { + HasNamedVectors = false, + DefaultCollectionName = "singleVectorGuidIdHotels", + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelWithGuidIdVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act. + var getResult = await sut.GetAsync(Guid.Parse("11111111-1111-1111-1111-111111111111"), new GetRecordOptions { IncludeVectors = withEmbeddings }); + + // Assert. + Assert.Equal(Guid.Parse("11111111-1111-1111-1111-111111111111"), getResult?.HotelId); + Assert.Equal("My Hotel 11", getResult?.HotelName); + Assert.Equal("This is a great hotel.", getResult?.Description); + if (withEmbeddings) + { + Assert.NotNull(getResult?.DescriptionEmbedding); + } + else + { + Assert.Null(getResult?.DescriptionEmbedding); + } + + // Output. + output.WriteLine(getResult?.ToString()); + } + + [Fact] + public async Task ItCanGetManyDocumentsFromVectorStoreAsync() + { + // Arrange + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = true, DefaultCollectionName = "namedVectorsHotels" }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act + // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. + var hotels = sut.GetBatchAsync([11, 15, 12], new GetRecordOptions { IncludeVectors = true }); + + // Assert + Assert.NotNull(hotels); + var hotelsList = await hotels.ToListAsync(); + Assert.Equal(2, hotelsList.Count); + + // Output + foreach (var hotel in hotelsList) + { + output.WriteLine(hotel?.ToString() ?? "Null"); + } + } + + [Theory] + [InlineData(true, "singleVectorHotels", false)] + [InlineData(false, "singleVectorHotels", false)] + [InlineData(true, "namedVectorsHotels", true)] + [InlineData(false, "namedVectorsHotels", true)] + public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions + { + HasNamedVectors = hasNamedVectors, + DefaultCollectionName = collectionName, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + await sut.UpsertAsync(this.CreateTestHotel(20)); + + // Act. + await sut.DeleteAsync(20); + // Also delete a non-existing key to test that the operation does not fail for these. + await sut.DeleteAsync(21); + + // Assert. + Assert.Null(await sut.GetAsync(20)); + } + + [Theory] + [InlineData(true, "singleVectorHotels", false)] + [InlineData(false, "singleVectorHotels", false)] + [InlineData(true, "namedVectorsHotels", true)] + [InlineData(false, "namedVectorsHotels", true)] + public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors) + { + // Arrange. + var options = new QdrantVectorRecordStoreOptions + { + HasNamedVectors = hasNamedVectors, + DefaultCollectionName = collectionName, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + await sut.UpsertAsync(this.CreateTestHotel(20)); + + // Act. + // Also delete a non-existing key to test that the operation does not fail for these. + await sut.DeleteBatchAsync([20, 21]); + + // Assert. + Assert.Null(await sut.GetAsync(20)); + } + + [Fact] + public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() + { + // Arrange + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false, DefaultCollectionName = "singleVectorHotels" }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act & Assert + Assert.Null(await sut.GetAsync(15, new GetRecordOptions { IncludeVectors = true })); + } + + [Fact] + public async Task ItThrowsMappingExceptionForFailedMapperAsync() + { + // Arrange + var options = new QdrantVectorRecordStoreOptions { DefaultCollectionName = "singleVectorHotels", MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = new FailingMapper() }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + + // Act & Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = true })); + } + + private HotelInfo CreateTestHotel(uint hotelId) + { + return new HotelInfo + { + HotelId = hotelId, + HotelName = $"My Hotel {hotelId}", + HotelCode = (int)hotelId, + HotelRating = 4.5f, + ParkingIncluded = true, + Tags = { "t1", "t2" }, + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + }; + } + + private sealed class FailingMapper : IVectorStoreRecordMapper + { + public PointStruct MapFromDataToStorageModel(HotelInfo dataModel) + { + throw new NotImplementedException(); + } + + public HotelInfo MapFromStorageToDataModel(PointStruct storageModel, GetRecordOptions? options = null) + { + throw new NotImplementedException(); + } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreCollectionFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreCollectionFixture.cs new file mode 100644 index 000000000000..a7b565d71c2d --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreCollectionFixture.cs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; + +[CollectionDefinition("QdrantVectorStoreCollection")] +public class QdrantVectorStoreCollectionFixture : ICollectionFixture +{ +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs new file mode 100644 index 000000000000..80316f1bd6fd --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs @@ -0,0 +1,325 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Docker.DotNet; +using Docker.DotNet.Models; +using Grpc.Core; +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client; +using Qdrant.Client.Grpc; +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; + +public class QdrantVectorStoreFixture : IAsyncLifetime +{ + /// The docker client we are using to create a qdrant container with. + private readonly DockerClient _client; + + /// The id of the qdrant container that we are testing with. + private string? _containerId = null; + +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + + /// + /// Initializes a new instance of the class. + /// + public QdrantVectorStoreFixture() + { + using var dockerClientConfiguration = new DockerClientConfiguration(); + this._client = dockerClientConfiguration.CreateClient(); + this.HotelVectorStoreRecordDefinition = new VectorStoreRecordDefinition + { + Properties = new List + { + new VectorStoreRecordKeyProperty("HotelId"), + new VectorStoreRecordDataProperty("HotelName"), + new VectorStoreRecordDataProperty("HotelCode"), + new VectorStoreRecordDataProperty("ParkingIncluded"), + new VectorStoreRecordDataProperty("HotelRating"), + new VectorStoreRecordDataProperty("Tags"), + new VectorStoreRecordDataProperty("Description"), + new VectorStoreRecordVectorProperty("DescriptionEmbedding") + } + }; + this.HotelWithGuidIdVectorStoreRecordDefinition = new VectorStoreRecordDefinition + { + Properties = new List + { + new VectorStoreRecordKeyProperty("HotelId"), + new VectorStoreRecordDataProperty("HotelName"), + new VectorStoreRecordDataProperty("Description"), + new VectorStoreRecordVectorProperty("DescriptionEmbedding") + } + }; + } + +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + + /// Gets the qdrant client connection to use for tests. + public QdrantClient QdrantClient { get; private set; } + + /// Gets the manually created vector store record definition for our test model. + public VectorStoreRecordDefinition HotelVectorStoreRecordDefinition { get; private set; } + + /// Gets the manually created vector store record definition for our test model. + public VectorStoreRecordDefinition HotelWithGuidIdVectorStoreRecordDefinition { get; private set; } + + /// + /// Create / Recreate qdrant docker container and run it. + /// + /// An async task. + public async Task InitializeAsync() + { + this._containerId = await SetupQdrantContainerAsync(this._client); + + // Connect to qdrant. + this.QdrantClient = new QdrantClient("localhost"); + + // Create schemas for the vector store. + var vectorParamsMap = new VectorParamsMap(); + vectorParamsMap.Map.Add("DescriptionEmbedding", new VectorParams { Size = 4, Distance = Distance.Cosine }); + + // Wait for the qdrant container to be ready. + var retryCount = 0; + while (retryCount++ < 5) + { + try + { + await this.QdrantClient.ListCollectionsAsync(); + } + catch (RpcException e) + { + if (e.StatusCode != Grpc.Core.StatusCode.Unavailable) + { + throw; + } + + await Task.Delay(1000); + } + } + + await this.QdrantClient.CreateCollectionAsync( + "namedVectorsHotels", + vectorParamsMap); + + await this.QdrantClient.CreateCollectionAsync( + "singleVectorHotels", + new VectorParams { Size = 4, Distance = Distance.Cosine }); + + await this.QdrantClient.CreateCollectionAsync( + "singleVectorGuidIdHotels", + new VectorParams { Size = 4, Distance = Distance.Cosine }); + + // Create test data common to both named and unnamed vectors. + var tags = new ListValue(); + tags.Values.Add("t1"); + tags.Values.Add("t2"); + var tagsValue = new Value(); + tagsValue.ListValue = tags; + + // Create some test data using named vectors. + var embedding = new[] { 30f, 31f, 32f, 33f }; + + var namedVectors1 = new NamedVectors(); + var namedVectors2 = new NamedVectors(); + var namedVectors3 = new NamedVectors(); + + namedVectors1.Vectors.Add("DescriptionEmbedding", embedding); + namedVectors2.Vectors.Add("DescriptionEmbedding", embedding); + namedVectors3.Vectors.Add("DescriptionEmbedding", embedding); + + List namedVectorPoints = + [ + new PointStruct + { + Id = 11, + Vectors = new Vectors { Vectors_ = namedVectors1 }, + Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["ParkingIncluded"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = 12, + Vectors = new Vectors { Vectors_ = namedVectors2 }, + Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = 13, + Vectors = new Vectors { Vectors_ = namedVectors3 }, + Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + }, + ]; + + await this.QdrantClient.UpsertAsync("namedVectorsHotels", namedVectorPoints); + + // Create some test data using a single unnamed vector. + List unnamedVectorPoints = + [ + new PointStruct + { + Id = 11, + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["ParkingIncluded"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = 12, + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = 13, + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + }, + ]; + + await this.QdrantClient.UpsertAsync("singleVectorHotels", unnamedVectorPoints); + + // Create some test data using a single unnamed vector and a guid id. + List unnamedVectorGuidIdPoints = + [ + new PointStruct + { + Id = Guid.Parse("11111111-1111-1111-1111-111111111111"), + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 11", ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = Guid.Parse("22222222-2222-2222-2222-222222222222"), + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 12", ["Description"] = "This is a great hotel." } + }, + new PointStruct + { + Id = Guid.Parse("33333333-3333-3333-3333-333333333333"), + Vectors = embedding, + Payload = { ["HotelName"] = "My Hotel 13", ["Description"] = "This is a great hotel." } + }, + ]; + + await this.QdrantClient.UpsertAsync("singleVectorGuidIdHotels", unnamedVectorGuidIdPoints); + } + + /// + /// Delete the docker container after the test run. + /// + /// An async task. + public async Task DisposeAsync() + { + if (this._containerId != null) + { + await this._client.Containers.StopContainerAsync(this._containerId, new ContainerStopParameters()); + await this._client.Containers.RemoveContainerAsync(this._containerId, new ContainerRemoveParameters()); + } + } + + /// + /// Setup the qdrant container by pulling the image and running it. + /// + /// The docker client to create the container with. + /// The id of the container. + private static async Task SetupQdrantContainerAsync(DockerClient client) + { + await client.Images.CreateImageAsync( + new ImagesCreateParameters + { + FromImage = "qdrant/qdrant", + Tag = "latest", + }, + null, + new Progress()); + + var container = await client.Containers.CreateContainerAsync(new CreateContainerParameters() + { + Image = "qdrant/qdrant", + HostConfig = new HostConfig() + { + PortBindings = new Dictionary> + { + {"6333", new List {new() {HostPort = "6333" } }}, + {"6334", new List {new() {HostPort = "6334" } }} + }, + PublishAllPorts = true + }, + ExposedPorts = new Dictionary + { + { "6333", default }, + { "6334", default } + }, + }); + + await client.Containers.StartContainerAsync( + container.ID, + new ContainerStartParameters()); + + return container.ID; + } + + /// + /// A test model for the qdrant vector store. + /// +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + public record HotelInfo() + { + /// The key of the record. + [VectorStoreRecordKey] + public ulong HotelId { get; init; } + + /// A string metadata field. + [VectorStoreRecordData] + public string? HotelName { get; set; } + + /// An int metadata field. + [VectorStoreRecordData] + public int HotelCode { get; set; } + + /// A float metadata field. + [VectorStoreRecordData] + public float? HotelRating { get; set; } + + /// A bool metadata field. + [VectorStoreRecordData] + public bool ParkingIncluded { get; set; } + + [VectorStoreRecordData] + public List Tags { get; set; } = new List(); + + /// A data field. + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + public string Description { get; set; } + + /// A vector field. + [VectorStoreRecordVector] + public ReadOnlyMemory? DescriptionEmbedding { get; set; } + } + + /// + /// A test model for the qdrant vector store. + /// +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + public record HotelInfoWithGuidId() + { + /// The key of the record. + [VectorStoreRecordKey] + public Guid HotelId { get; init; } + + /// A string metadata field. + [VectorStoreRecordData] + public string? HotelName { get; set; } + + /// A data field. + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + public string Description { get; set; } + + /// A vector field. + [VectorStoreRecordVector] + public ReadOnlyMemory? DescriptionEmbedding { get; set; } + } +} +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs index 4d0a9f69473f..02cd55df0a41 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs @@ -146,7 +146,8 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() var sut = new RedisVectorRecordStore(fixture.Database, options); // Act - var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-2"], new GetRecordOptions { IncludeVectors = true }); + // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. + var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-5", "BaseSet-2"], new GetRecordOptions { IncludeVectors = true }); // Assert Assert.NotNull(hotels); @@ -171,17 +172,6 @@ public async Task ItFailsToGetDocumentsWithInvalidSchemaAsync() await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-4-Invalid", new GetRecordOptions { IncludeVectors = true })); } - [Fact] - public async Task ItThrowsForPartialGetBatchResultAsync() - { - // Arrange. - var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, options); - - // Act & Assert. - await Assert.ThrowsAsync(async () => await sut.GetBatchAsync(["BaseSet-1", "nonexistent", "BaseSet-2"], new GetRecordOptions { IncludeVectors = true }).ToListAsync()); - } - [Theory] [InlineData(true)] [InlineData(false)] @@ -213,7 +203,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti await sut.DeleteAsync("Remove-2"); // Assert. - await Assert.ThrowsAsync(async () => await sut.GetAsync("Remove-1")); + Assert.Null(await sut.GetAsync("Remove-1")); } [Fact] @@ -231,9 +221,20 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3", "RemoveMany-4"]); // Assert - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); - await Assert.ThrowsAsync(async () => await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact] + public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() + { + // Arrange + var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, options); + + // Act & Assert + Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); } [Fact] diff --git a/dotnet/src/IntegrationTests/IntegrationTests.csproj b/dotnet/src/IntegrationTests/IntegrationTests.csproj index 3c3fa543d5d6..87924fd854e4 100644 --- a/dotnet/src/IntegrationTests/IntegrationTests.csproj +++ b/dotnet/src/IntegrationTests/IntegrationTests.csproj @@ -64,6 +64,7 @@ + diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs new file mode 100644 index 000000000000..f2fc3f992de7 --- /dev/null +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using Microsoft.SemanticKernel.Memory; + +namespace Microsoft.SemanticKernel; + +/// +/// Contains helpers for reading vector store model properties and their attributes. +/// +[ExcludeFromCodeCoverage] +internal static class VectorStoreErrorHandler +{ + /// + /// Run the given model conversion and wrap any exceptions with . + /// + /// The response type of the operation. + /// The name of the database system the operation is being run on. + /// The name of the collection the operation is being run on. + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static T RunModelConversion(string databaseSystemName, string collectionName, string operationName, Func operation) + { + try + { + return operation.Invoke(); + } + catch (Exception ex) + { + var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); + + // Using Open Telemetry standard for naming of these entries. + // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ + wrapperException.Data.Add("db.system", databaseSystemName); + wrapperException.Data.Add("db.collection.name", collectionName); + wrapperException.Data.Add("db.operation.name", operationName); + + throw wrapperException; + } + } +} diff --git a/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs similarity index 84% rename from dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs rename to dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index fc580c69bf9c..20318c8d385b 100644 --- a/dotnet/src/InternalUtilities/src/Schema/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -5,11 +5,7 @@ using System.Diagnostics.CodeAnalysis; using System.Linq; using System.Reflection; -using System.Text.Json; -using System.Text.Json.Nodes; using System.Text.Json.Serialization; -using System.Text.Json.Serialization.Metadata; -using JsonSchemaMapper; using Microsoft.SemanticKernel.Memory; namespace Microsoft.SemanticKernel; @@ -196,6 +192,44 @@ public static (PropertyInfo keyProperty, List dataProperties, List return (keyProperty!, dataProperties, vectorProperties); } + /// + /// Create a by reading the attributes on the properties of the given type. + /// + /// The type to create the definition for. + /// if the store supports multiple vectors, otherwise. + /// The based on the given type. + public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromType(Type type, bool supportsMultipleVectors) + { + var properties = FindProperties(type, supportsMultipleVectors); + var definitionProperties = new List(); + + definitionProperties.Add(new VectorStoreRecordKeyProperty(properties.keyProperty.Name)); + + foreach (var dataProperty in properties.dataProperties) + { + var dataAttribute = dataProperty.GetCustomAttribute(); + if (dataAttribute is not null) + { + definitionProperties.Add(new VectorStoreRecordDataProperty(dataProperty.Name) + { + HasEmbedding = dataAttribute.HasEmbedding, + EmbeddingPropertyName = dataAttribute.EmbeddingPropertyName, + }); + } + } + + foreach (var vectorProperty in properties.vectorProperties) + { + var vectorAttribute = vectorProperty.GetCustomAttribute(); + if (vectorAttribute is not null) + { + definitionProperties.Add(new VectorStoreRecordVectorProperty(vectorProperty.Name)); + } + } + + return new VectorStoreRecordDefinition { Properties = definitionProperties }; + } + /// /// Verify that the given properties are of the supported types. /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs index 6852ca14b6bc..1caa5fd59018 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs @@ -18,7 +18,7 @@ public interface IVectorRecordStore { /// /// Gets a record from the vector store. Does not guarantee that the collection exists. - /// Throws if the record is not found. + /// Returns null if the record is not found. /// /// The unique id associated with the record to get. /// Optional options for retrieving the record. @@ -26,12 +26,13 @@ public interface IVectorRecordStore /// The record if found, otherwise null. /// Throw when the command fails to execute for any reason. /// Throw when mapping between the storage model and record data model fails. - Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); /// /// Gets a batch of records from the vector store. Does not guarantee that the collection exists. - /// Throws if any of the records are not found. /// Gets will be made in a single request or in a single parallel batch depending on the available store functionality. + /// Only found records will be returned, so the resultset may be smaller than the requested keys. + /// Throws for any issues other than records not being found. /// /// The unique ids associated with the record to get. /// Optional options for retrieving the records. diff --git a/dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs similarity index 81% rename from dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs rename to dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 232758d77bbb..b702f2b799a0 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Utilities/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -5,7 +5,7 @@ using Microsoft.SemanticKernel.Memory; using Xunit; -namespace SemanticKernel.UnitTests.Utilities; +namespace SemanticKernel.UnitTests.Data; public class VectorStoreRecordPropertyReaderTests { @@ -61,8 +61,8 @@ public void FindPropertiesThrowsForMultipleVectorsWithSingleVectorSupport(bool u // Assert. var expectedMessage = useConfig ? - "Multiple vector properties configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported." : - "Multiple vector properties found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported."; + "Multiple vector properties configured for type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported." : + "Multiple vector properties found on type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+MultiPropsModel while only one is supported."; Assert.Equal(expectedMessage, ex.Message); } @@ -78,8 +78,8 @@ public void FindPropertiesThrowsOnMultipleKeyProperties(bool useConfig) // Assert. var expectedMessage = useConfig ? - "Multiple key properties configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiKeysModel." : - "Multiple key properties found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+MultiKeysModel."; + "Multiple key properties configured for type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+MultiKeysModel." : + "Multiple key properties found on type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+MultiKeysModel."; Assert.Equal(expectedMessage, ex.Message); } @@ -95,8 +95,8 @@ public void FindPropertiesThrowsOnNoKeyProperty(bool useConfig) // Assert. var expectedMessage = useConfig ? - "No key property configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoKeyModel." : - "No key property found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoKeyModel."; + "No key property configured for type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+NoKeyModel." : + "No key property found on type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+NoKeyModel."; Assert.Equal(expectedMessage, ex.Message); } @@ -112,8 +112,8 @@ public void FindPropertiesThrowsOnNoVectorPropertyWithSingleVectorSupport(bool u // Assert. var expectedMessage = useConfig ? - "No vector property configured for type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoVectorModel." : - "No vector property found on type SemanticKernel.UnitTests.Utilities.VectorStoreRecordPropertyReaderTests+NoVectorModel."; + "No vector property configured for type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+NoVectorModel." : + "No vector property found on type SemanticKernel.UnitTests.Data.VectorStoreRecordPropertyReaderTests+NoVectorModel."; Assert.Equal(expectedMessage, ex.Message); } @@ -138,6 +138,33 @@ public void FindPropertiesUsingConfigThrowsForNotFoundProperties(string property Assert.Throws(() => VectorStoreRecordPropertyReader.FindProperties(typeof(NoKeyModel), definition, false)); } + [Fact] + public void CreateVectorStoreRecordDefinitionFromTypeConvertsAllProps() + { + // Act. + var definition = VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(MultiPropsModel), true); + + // Assert. + Assert.Equal(5, definition.Properties.Count); + Assert.Equal("Key", definition.Properties[0].PropertyName); + Assert.Equal("Data1", definition.Properties[1].PropertyName); + Assert.Equal("Data2", definition.Properties[2].PropertyName); + Assert.Equal("Vector1", definition.Properties[3].PropertyName); + Assert.Equal("Vector2", definition.Properties[4].PropertyName); + + Assert.IsType(definition.Properties[0]); + Assert.IsType(definition.Properties[1]); + Assert.IsType(definition.Properties[2]); + Assert.IsType(definition.Properties[3]); + Assert.IsType(definition.Properties[4]); + + var data1 = (VectorStoreRecordDataProperty)definition.Properties[1]; + var data2 = (VectorStoreRecordDataProperty)definition.Properties[2]; + + Assert.True(data1.HasEmbedding); + Assert.False(data2.HasEmbedding); + } + [Fact] public void VerifyPropertyTypesPassForAllowedTypes() { @@ -229,7 +256,7 @@ private sealed class MultiPropsModel [VectorStoreRecordKey] public string Key { get; set; } = string.Empty; - [VectorStoreRecordData] + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1")] public string Data1 { get; set; } = string.Empty; [VectorStoreRecordData] @@ -249,7 +276,7 @@ private sealed class MultiPropsModel Properties = [ new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data1"), + new VectorStoreRecordDataProperty("Data1") { HasEmbedding = true, EmbeddingPropertyName = "Vector1" }, new VectorStoreRecordDataProperty("Data2"), new VectorStoreRecordVectorProperty("Vector1"), new VectorStoreRecordVectorProperty("Vector2") From 17d55e9e1a0587322947972785472ef99dbc197e Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 26 Jun 2024 16:33:06 +0100 Subject: [PATCH 07/48] .Net: VectorStore: Add unit tests for mappers and adding dedicated options for mapper. (#6948) ### Motivation and Context As part of adding new vector stores for redis and qdrant, these include mappers from data models to storage models and back, that require unit testing. The mapper interface also uses an options parameter which was reused from the IVectorRecordStore.GetAsync method, but really should have been a dedicated options object from the start. ### Description - Added unit tests for the redis and qdrant data model mappers. - Added a dedicated options model for the mapper. ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- .../AzureAISearchVectorRecordStore.cs | 10 +- .../QdrantVectorRecordStore.cs | 5 +- .../QdrantVectorStoreRecordMapper.cs | 5 +- .../RedisVectorRecordStore.cs | 6 +- .../RedisVectorStoreRecordMapper.cs | 2 +- .../QdrantVectorStoreRecordMapperTests.cs | 382 ++++++++++++++++++ .../RedisVectorStoreRecordMapperTests.cs | 97 +++++ .../AzureAISearchVectorRecordStoreTests.cs | 2 +- .../Qdrant/QdrantVectorRecordStoreTests.cs | 2 +- .../Redis/RedisVectorRecordStoreTests.cs | 2 +- .../Memory/IVectorStoreRecordMapper.cs | 4 +- .../Memory/StorageToDataModelMapperOptions.cs | 14 + 12 files changed, 516 insertions(+), 15 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs create mode 100644 dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/StorageToDataModelMapperOptions.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index f5cf330e5f3e..612a93ba64c1 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -110,10 +110,11 @@ public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, Azure // Create Options. var innerOptions = this.ConvertGetDocumentOptions(options); var collectionName = this.ChooseCollectionName(options?.CollectionName); + var includeVectors = options?.IncludeVectors ?? false; // Get record. var searchClient = this.GetSearchClient(collectionName); - return this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, innerOptions, cancellationToken); + return this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, includeVectors, innerOptions, cancellationToken); } /// @@ -124,10 +125,11 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G // Create Options var innerOptions = this.ConvertGetDocumentOptions(options); var collectionName = this.ChooseCollectionName(options?.CollectionName); + var includeVectors = options?.IncludeVectors ?? false; // Get records in parallel. var searchClient = this.GetSearchClient(collectionName); - var tasks = keys.Select(key => this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, innerOptions, cancellationToken)); + var tasks = keys.Select(key => this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, includeVectors, innerOptions, cancellationToken)); var results = await Task.WhenAll(tasks).ConfigureAwait(false); foreach (var result in results) { @@ -209,6 +211,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco /// The search client to use when fetching the document. /// The name of the collection to retrieve the record from. /// The key of the record to get. + /// A value indicating whether to include vectors in the result or not. /// The azure ai search sdk options for getting a document. /// The to monitor for cancellation requests. The default is . /// The retrieved document, mapped to the consumer data model. @@ -216,6 +219,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco SearchClient searchClient, string collectionName, string key, + bool includeVectors, GetDocumentOptions innerOptions, CancellationToken cancellationToken) { @@ -238,7 +242,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco DatabaseName, collectionName, OperationName, - () => this._options.JsonObjectCustomMapper!.MapFromStorageToDataModel(jsonObject)); + () => this._options.JsonObjectCustomMapper!.MapFromStorageToDataModel(jsonObject, new() { IncludeVectors = includeVectors })); } // Use the built in Azure AI Search mapper. diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs index b05766ff9b17..d18754741817 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs @@ -290,12 +290,13 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( // Create options. var collectionName = this.ChooseCollectionName(options?.CollectionName); var pointsIds = keys.Select(key => keyConverter(key)).ToArray(); + var includeVectors = options?.IncludeVectors ?? false; // Retrieve data points. var retrievedPoints = await RunOperationAsync( collectionName, OperationName, - () => this._qdrantClient.RetrieveAsync(collectionName, pointsIds, true, options?.IncludeVectors ?? false, cancellationToken: cancellationToken)).ConfigureAwait(false); + () => this._qdrantClient.RetrieveAsync(collectionName, pointsIds, true, includeVectors, cancellationToken: cancellationToken)).ConfigureAwait(false); // Convert the retrieved points to the target data model. foreach (var retrievedPoint in retrievedPoints) @@ -316,7 +317,7 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( DatabaseName, collectionName, OperationName, - () => this._mapper.MapFromStorageToDataModel(pointStruct, options)); + () => this._mapper.MapFromStorageToDataModel(pointStruct, new() { IncludeVectors = includeVectors })); } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index d7e122a2627a..7b439616db5c 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections; using System.Collections.Generic; using System.Linq; using System.Reflection; @@ -172,7 +173,7 @@ public PointStruct MapFromDataToStorageModel(TRecord dataModel) } /// - public TRecord MapFromStorageToDataModel(PointStruct storageModel, GetRecordOptions? options = default) + public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToDataModelMapperOptions options) { // Get the key property name and value. var keyPropertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(this._keyPropertyInfo); @@ -284,7 +285,7 @@ sourceValue is IEnumerable || sourceValue is IEnumerable || sourceValue is IEnumerable) { - var listValue = sourceValue as IEnumerable; + var listValue = sourceValue as IEnumerable; value.ListValue = new ListValue(); foreach (var item in listValue!) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs index 7192c2125dc2..9a21339f9a8d 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -121,6 +121,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< // Create Options var collectionName = this.ChooseCollectionName(options?.CollectionName); var maybePrefixedKey = this.PrefixKeyIfNeeded(key, collectionName); + var includeVectors = options?.IncludeVectors ?? false; // Get the redis value. var redisResult = await RunOperationAsync( @@ -155,7 +156,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< () => { var node = JsonSerializer.Deserialize(redisResultString)!; - return this._mapper.MapFromStorageToDataModel((key, node)); + return this._mapper.MapFromStorageToDataModel((key, node), new() { IncludeVectors = includeVectors }); }); } @@ -169,6 +170,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G var collectionName = this.ChooseCollectionName(options?.CollectionName); var maybePrefixedKeys = keysList.Select(key => this.PrefixKeyIfNeeded(key, collectionName)); var redisKeys = maybePrefixedKeys.Select(x => new RedisKey(x)).ToArray(); + var includeVectors = options?.IncludeVectors ?? false; // Get the list of redis results. var redisResults = await RunOperationAsync( @@ -205,7 +207,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G () => { var node = JsonSerializer.Deserialize(redisResultString)!; - return this._mapper.MapFromStorageToDataModel((key, node)); + return this._mapper.MapFromStorageToDataModel((key, node), new() { IncludeVectors = includeVectors }); }); } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs index 01c9ff69b96e..61c90e5eda07 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs @@ -46,7 +46,7 @@ public RedisVectorStoreRecordMapper(string keyFieldJsonPropertyName) } /// - public TConsumerDataModel MapFromStorageToDataModel((string Key, JsonNode Node) storageModel, GetRecordOptions? options = null) + public TConsumerDataModel MapFromStorageToDataModel((string Key, JsonNode Node) storageModel, StorageToDataModelMapperOptions options) { JsonObject jsonObject; diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs new file mode 100644 index 000000000000..8623dbab9f2b --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs @@ -0,0 +1,382 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Microsoft.SemanticKernel.Memory; +using Qdrant.Client.Grpc; +using Xunit; + +namespace SemanticKernel.Connectors.Qdrant.UnitTests; + +/// +/// Contains tests for the class. +/// +public class QdrantVectorStoreRecordMapperTests +{ + [Theory] + [InlineData(true)] + [InlineData(false)] + public void MapsSinglePropsFromDataToStorageModelWithUlong(bool hasNamedVectors) + { + // Arrange. + var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = hasNamedVectors }); + + // Act. + var actual = sut.MapFromDataToStorageModel(CreateSinglePropsModel(5ul)); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(5ul, actual.Id.Num); + Assert.Single(actual.Payload); + Assert.Equal("data", actual.Payload["Data"].StringValue); + + if (hasNamedVectors) + { + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vectors_.Vectors["Vector"].Data.ToArray()); + } + else + { + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vector.Data.ToArray()); + } + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void MapsSinglePropsFromDataToStorageModelWithGuid(bool hasNamedVectors) + { + // Arrange. + var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = hasNamedVectors }); + + // Act. + var actual = sut.MapFromDataToStorageModel(CreateSinglePropsModel(Guid.Parse("11111111-1111-1111-1111-111111111111"))); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(Guid.Parse("11111111-1111-1111-1111-111111111111"), Guid.Parse(actual.Id.Uuid)); + Assert.Single(actual.Payload); + Assert.Equal("data", actual.Payload["Data"].StringValue); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public void MapsSinglePropsFromStorageToDataModelWithUlong(bool hasNamedVectors, bool includeVectors) + { + // Arrange. + var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = hasNamedVectors }); + + // Act. + var actual = sut.MapFromStorageToDataModel(CreateSinglePropsPointStruct(5, hasNamedVectors), new() { IncludeVectors = includeVectors }); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(5ul, actual.Key); + Assert.Equal("data", actual.Data); + + if (includeVectors) + { + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + } + else + { + Assert.Null(actual.Vector); + } + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public void MapsSinglePropsFromStorageToDataModelWithGuid(bool hasNamedVectors, bool includeVectors) + { + // Arrange. + var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = hasNamedVectors }); + + // Act. + var actual = sut.MapFromStorageToDataModel(CreateSinglePropsPointStruct(Guid.Parse("11111111-1111-1111-1111-111111111111"), hasNamedVectors), new() { IncludeVectors = includeVectors }); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(Guid.Parse("11111111-1111-1111-1111-111111111111"), actual.Key); + Assert.Equal("data", actual.Data); + + if (includeVectors) + { + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + } + else + { + Assert.Null(actual.Vector); + } + } + + [Fact] + public void MapsMultiPropsFromDataToStorageModelWithUlong() + { + // Arrange. + var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = true }); + + // Act. + var actual = sut.MapFromDataToStorageModel(CreateMultiPropsModel(5ul)); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(5ul, actual.Id.Num); + Assert.Equal(7, actual.Payload.Count); + Assert.Equal("data 1", actual.Payload["DataString"].StringValue); + Assert.Equal(5, actual.Payload["DataInt"].IntegerValue); + Assert.Equal(5, actual.Payload["DataLong"].IntegerValue); + Assert.Equal(5.5f, actual.Payload["DataFloat"].DoubleValue); + Assert.Equal(5.5d, actual.Payload["DataDouble"].DoubleValue); + Assert.True(actual.Payload["DataBool"].BoolValue); + Assert.Equal(new int[] { 1, 2, 3, 4 }, actual.Payload["DataArrayInt"].ListValue.Values.Select(x => (int)x.IntegerValue).ToArray()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vectors_.Vectors["Vector1"].Data.ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vectors.Vectors_.Vectors["Vector2"].Data.ToArray()); + } + + [Fact] + public void MapsMultiPropsFromDataToStorageModelWithGuid() + { + // Arrange. + var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = true }); + + // Act. + var actual = sut.MapFromDataToStorageModel(CreateMultiPropsModel(Guid.Parse("11111111-1111-1111-1111-111111111111"))); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(Guid.Parse("11111111-1111-1111-1111-111111111111"), Guid.Parse(actual.Id.Uuid)); + Assert.Equal(7, actual.Payload.Count); + Assert.Equal("data 1", actual.Payload["DataString"].StringValue); + Assert.Equal(5, actual.Payload["DataInt"].IntegerValue); + Assert.Equal(5, actual.Payload["DataLong"].IntegerValue); + Assert.Equal(5.5f, actual.Payload["DataFloat"].DoubleValue); + Assert.Equal(5.5d, actual.Payload["DataDouble"].DoubleValue); + Assert.True(actual.Payload["DataBool"].BoolValue); + Assert.Equal(new int[] { 1, 2, 3, 4 }, actual.Payload["DataArrayInt"].ListValue.Values.Select(x => (int)x.IntegerValue).ToArray()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vectors_.Vectors["Vector1"].Data.ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vectors.Vectors_.Vectors["Vector2"].Data.ToArray()); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void MapsMultiPropsFromStorageToDataModelWithUlong(bool includeVectors) + { + // Arrange. + var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = true }); + + // Act. + var actual = sut.MapFromStorageToDataModel(CreateMultiPropsPointStruct(5), new() { IncludeVectors = includeVectors }); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(5ul, actual.Key); + Assert.Equal("data 1", actual.DataString); + Assert.Equal(5, actual.DataInt); + Assert.Equal(5L, actual.DataLong); + Assert.Equal(5.5f, actual.DataFloat); + Assert.Equal(5.5d, actual.DataDouble); + Assert.True(actual.DataBool); + Assert.Equal(new int[] { 1, 2, 3, 4 }, actual.DataArrayInt); + + if (includeVectors) + { + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector1!.Value.ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vector2!.Value.ToArray()); + } + else + { + Assert.Null(actual.Vector1); + Assert.Null(actual.Vector2); + } + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void MapsMultiPropsFromStorageToDataModelWithGuid(bool includeVectors) + { + // Arrange. + var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = true }); + + // Act. + var actual = sut.MapFromStorageToDataModel(CreateMultiPropsPointStruct(Guid.Parse("11111111-1111-1111-1111-111111111111")), new() { IncludeVectors = includeVectors }); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(Guid.Parse("11111111-1111-1111-1111-111111111111"), actual.Key); + Assert.Equal("data 1", actual.DataString); + Assert.Equal(5, actual.DataInt); + Assert.Equal(5L, actual.DataLong); + Assert.Equal(5.5f, actual.DataFloat); + Assert.Equal(5.5d, actual.DataDouble); + Assert.True(actual.DataBool); + Assert.Equal(new int[] { 1, 2, 3, 4 }, actual.DataArrayInt); + + if (includeVectors) + { + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector1!.Value.ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vector2!.Value.ToArray()); + } + else + { + Assert.Null(actual.Vector1); + Assert.Null(actual.Vector2); + } + } + + private static SinglePropsModel CreateSinglePropsModel(TKey key) + { + return new SinglePropsModel + { + Key = key, + Data = "data", + Vector = new float[] { 1, 2, 3, 4 }, + NotAnnotated = "notAnnotated", + }; + } + + private static MultiPropsModel CreateMultiPropsModel(TKey key) + { + return new MultiPropsModel + { + Key = key, + DataString = "data 1", + DataInt = 5, + DataLong = 5L, + DataFloat = 5.5f, + DataDouble = 5.5d, + DataBool = true, + DataArrayInt = new List { 1, 2, 3, 4 }, + Vector1 = new float[] { 1, 2, 3, 4 }, + Vector2 = new float[] { 5, 6, 7, 8 }, + NotAnnotated = "notAnnotated", + }; + } + + private static PointStruct CreateSinglePropsPointStruct(ulong id, bool hasNamedVectors) + { + var pointStruct = new PointStruct(); + pointStruct.Id = new PointId() { Num = id }; + AddDataToSinglePropsPointStruct(pointStruct, hasNamedVectors); + return pointStruct; + } + + private static PointStruct CreateSinglePropsPointStruct(Guid id, bool hasNamedVectors) + { + var pointStruct = new PointStruct(); + pointStruct.Id = new PointId() { Uuid = id.ToString() }; + AddDataToSinglePropsPointStruct(pointStruct, hasNamedVectors); + return pointStruct; + } + + private static void AddDataToSinglePropsPointStruct(PointStruct pointStruct, bool hasNamedVectors) + { + pointStruct.Payload.Add("Data", "data"); + + if (hasNamedVectors) + { + var namedVectors = new NamedVectors(); + namedVectors.Vectors.Add("Vector", new[] { 1f, 2f, 3f, 4f }); + pointStruct.Vectors = new Vectors() { Vectors_ = namedVectors }; + } + else + { + pointStruct.Vectors = new[] { 1f, 2f, 3f, 4f }; + } + } + + private static PointStruct CreateMultiPropsPointStruct(ulong id) + { + var pointStruct = new PointStruct(); + pointStruct.Id = new PointId() { Num = id }; + AddDataToMultiPropsPointStruct(pointStruct); + return pointStruct; + } + + private static PointStruct CreateMultiPropsPointStruct(Guid id) + { + var pointStruct = new PointStruct(); + pointStruct.Id = new PointId() { Uuid = id.ToString() }; + AddDataToMultiPropsPointStruct(pointStruct); + return pointStruct; + } + + private static void AddDataToMultiPropsPointStruct(PointStruct pointStruct) + { + pointStruct.Payload.Add("DataString", "data 1"); + pointStruct.Payload.Add("DataInt", 5); + pointStruct.Payload.Add("DataLong", 5L); + pointStruct.Payload.Add("DataFloat", 5.5f); + pointStruct.Payload.Add("DataDouble", 5.5d); + pointStruct.Payload.Add("DataBool", true); + + var dataIntArray = new ListValue(); + dataIntArray.Values.Add(1); + dataIntArray.Values.Add(2); + dataIntArray.Values.Add(3); + dataIntArray.Values.Add(4); + pointStruct.Payload.Add("DataArrayInt", new Value { ListValue = dataIntArray }); + + var namedVectors = new NamedVectors(); + namedVectors.Vectors.Add("Vector1", new[] { 1f, 2f, 3f, 4f }); + namedVectors.Vectors.Add("Vector2", new[] { 5f, 6f, 7f, 8f }); + pointStruct.Vectors = new Vectors() { Vectors_ = namedVectors }; + } + + private sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public TKey? Key { get; set; } = default; + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector] + public ReadOnlyMemory? Vector { get; set; } + + public string NotAnnotated { get; set; } = string.Empty; + } + + private sealed class MultiPropsModel + { + [VectorStoreRecordKey] + public TKey? Key { get; set; } = default; + + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1")] + public string DataString { get; set; } = string.Empty; + + [VectorStoreRecordData] + public int DataInt { get; set; } = 0; + + [VectorStoreRecordData] + public long DataLong { get; set; } = 0; + + [VectorStoreRecordData] + public float DataFloat { get; set; } = 0; + + [VectorStoreRecordData] + public double DataDouble { get; set; } = 0; + + [VectorStoreRecordData] + public bool DataBool { get; set; } = false; + + [VectorStoreRecordData] + public List? DataArrayInt { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory? Vector1 { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory? Vector2 { get; set; } + + public string NotAnnotated { get; set; } = string.Empty; + } +} diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs new file mode 100644 index 000000000000..b8d320f62876 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using System.Text.Json.Nodes; +using Microsoft.SemanticKernel.Connectors.Redis; +using Microsoft.SemanticKernel.Memory; +using Xunit; + +namespace SemanticKernel.Connectors.Redis.UnitTests; + +/// +/// Contains tests for the class. +/// +public sealed class RedisVectorStoreRecordMapperTests +{ + [Fact] + public void MapsAllFieldsFromDataToStorageModel() + { + // Arrange. + var sut = new RedisVectorStoreRecordMapper("Key"); + + // Act. + var actual = sut.MapFromDataToStorageModel(CreateModel("test key")); + + // Assert. + Assert.NotNull(actual.Node); + Assert.Equal("test key", actual.Key); + var jsonObject = actual.Node.AsObject(); + Assert.Equal("data 1", jsonObject?["Data1"]?.ToString()); + Assert.Equal("data 2", jsonObject?["Data2"]?.ToString()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, jsonObject?["Vector1"]?.AsArray().GetValues().ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, jsonObject?["Vector2"]?.AsArray().GetValues().ToArray()); + } + + [Fact] + public void MapsAllFieldsFromStorageToDataModel() + { + // Arrange. + var sut = new RedisVectorStoreRecordMapper("Key"); + + // Act. + var actual = sut.MapFromStorageToDataModel(("test key", CreateJsonNode()), new()); + + // Assert. + Assert.NotNull(actual); + Assert.Equal("test key", actual.Key); + Assert.Equal("data 1", actual.Data1); + Assert.Equal("data 2", actual.Data2); + + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector1!.Value.ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vector2!.Value.ToArray()); + } + + private static MultiPropsModel CreateModel(string key) + { + return new MultiPropsModel + { + Key = key, + Data1 = "data 1", + Data2 = "data 2", + Vector1 = new float[] { 1, 2, 3, 4 }, + Vector2 = new float[] { 5, 6, 7, 8 }, + NotAnnotated = "notAnnotated", + }; + } + + private static JsonObject CreateJsonNode() + { + var jsonObject = new JsonObject(); + jsonObject.Add("Data1", "data 1"); + jsonObject.Add("Data2", "data 2"); + jsonObject.Add("Vector1", new JsonArray(new[] { 1, 2, 3, 4 }.Select(x => JsonValue.Create(x)).ToArray())); + jsonObject.Add("Vector2", new JsonArray(new[] { 5, 6, 7, 8 }.Select(x => JsonValue.Create(x)).ToArray())); + return jsonObject; + } + + private sealed class MultiPropsModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1")] + public string Data1 { get; set; } = string.Empty; + + [VectorStoreRecordData] + public string Data2 { get; set; } = string.Empty; + + [VectorStoreRecordVector] + public ReadOnlyMemory? Vector1 { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory? Vector2 { get; set; } + + public string NotAnnotated { get; set; } = string.Empty; + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs index 747076ba9779..948dec0a796a 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs @@ -278,7 +278,7 @@ public JsonObject MapFromDataToStorageModel(Hotel dataModel) throw new NotImplementedException(); } - public Hotel MapFromStorageToDataModel(JsonObject storageModel, GetRecordOptions? options = null) + public Hotel MapFromStorageToDataModel(JsonObject storageModel, StorageToDataModelMapperOptions options) { throw new NotImplementedException(); } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs index 78b3b7dfeba0..3864b9a9082e 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs @@ -299,7 +299,7 @@ public PointStruct MapFromDataToStorageModel(HotelInfo dataModel) throw new NotImplementedException(); } - public HotelInfo MapFromStorageToDataModel(PointStruct storageModel, GetRecordOptions? options = null) + public HotelInfo MapFromStorageToDataModel(PointStruct storageModel, StorageToDataModelMapperOptions options) { throw new NotImplementedException(); } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs index 02cd55df0a41..143a4b41c447 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs @@ -280,7 +280,7 @@ private sealed class FailingMapper : IVectorStoreRecordMapper /// Map from the storage model to the consumer record data model. /// /// The storage data model record to map. - /// The of the operation that this mapping is needed for. + /// Options to control the mapping behavior. /// The mapped result. - TRecordDataModel MapFromStorageToDataModel(TStorageModel storageModel, GetRecordOptions? options = default); + TRecordDataModel MapFromStorageToDataModel(TStorageModel storageModel, StorageToDataModelMapperOptions options); } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/StorageToDataModelMapperOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/StorageToDataModelMapperOptions.cs new file mode 100644 index 000000000000..c350751c153d --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/StorageToDataModelMapperOptions.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Options to use with the method. +/// +public class StorageToDataModelMapperOptions +{ + /// + /// Get or sets a value indicating whether to include vectors in the retrieval result. + /// + public bool IncludeVectors { get; init; } = false; +} From ef5bef530190a3680f71433f2fbf01a6ec264803 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 27 Jun 2024 18:15:21 +0100 Subject: [PATCH 08/48] .Net: Add unit tests for AzureAISearch and Redis Vector Record Stores. (#6968) ### Motivation and Context As part of updating the design of the memory connectors to allow custom schemas, adding unit tests for AzureAISearch and Redis Vector Record Stores. ### Description Add unit tests for AzureAISearch and Redis Vector Record Stores. ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- .../AzureAISearchVectorRecordStoreTests.cs | 464 ++++++++++++++++++ .../RedisVectorRecordStoreTests.cs | 441 +++++++++++++++++ .../Data/VectorStoreRecordPropertyReader.cs | 5 +- 3 files changed, 908 insertions(+), 2 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs create mode 100644 dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs new file mode 100644 index 000000000000..9d1bc5d0f244 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs @@ -0,0 +1,464 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.Json; +using System.Text.Json.Nodes; +using System.Threading; +using System.Threading.Tasks; +using Azure; +using Azure.Search.Documents; +using Azure.Search.Documents.Indexes; +using Azure.Search.Documents.Models; +using Microsoft.SemanticKernel.Connectors.AzureAISearch; +using Microsoft.SemanticKernel.Memory; +using Moq; +using Xunit; + +namespace SemanticKernel.Connectors.AzureAISearch.UnitTests; + +/// +/// Contains tests for the class. +/// +public class AzureAISearchVectorRecordStoreTests +{ + private const string TestCollectionName = "testcollection"; + private const string TestRecordKey1 = "testid1"; + private const string TestRecordKey2 = "testid2"; + + private readonly Mock _searchIndexClientMock; + private readonly Mock _searchClientMock; + + private readonly CancellationToken _testCancellationToken = new(false); + + public AzureAISearchVectorRecordStoreTests() + { + this._searchClientMock = new Mock(MockBehavior.Strict); + this._searchIndexClientMock = new Mock(MockBehavior.Strict); + this._searchIndexClientMock.Setup(x => x.GetSearchClient(TestCollectionName)).Returns(this._searchClientMock.Object); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange. + this._searchClientMock.Setup( + x => x.GetDocumentAsync( + TestRecordKey1, + It.Is(x => !x.SelectedFields.Any()), + this._testCancellationToken)) + .ReturnsAsync(Response.FromValue(CreateModel(TestRecordKey1, true), Mock.Of())); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act. + var actual = await sut.GetAsync( + TestRecordKey1, + new() + { + IncludeVectors = true, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange. + var storageObject = JsonSerializer.SerializeToNode(CreateModel(TestRecordKey1, false))!.AsObject(); + + this._searchClientMock.Setup( + x => x.GetDocumentAsync( + TestRecordKey1, + It.Is(x => x.SelectedFields.Contains("Key") && x.SelectedFields.Contains("Data")), + this._testCancellationToken)) + .ReturnsAsync(Response.FromValue(CreateModel(TestRecordKey1, true), Mock.Of())); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act. + var actual = await sut.GetAsync( + TestRecordKey1, + new() + { + IncludeVectors = false, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.Data); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange. + this._searchClientMock.Setup( + x => x.GetDocumentAsync( + It.IsAny(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync((string id, GetDocumentOptions options, CancellationToken cancellationToken) => + { + return Response.FromValue(CreateModel(id, true), Mock.Of()); + }); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act. + var actual = await sut.GetBatchAsync( + [TestRecordKey1, TestRecordKey2], + new() + { + IncludeVectors = true, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken).ToListAsync(); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(TestRecordKey1, actual[0].Key); + Assert.Equal(TestRecordKey2, actual[1].Key); + } + + [Fact] + public async Task CanGetRecordWithCustomMapperAsync() + { + // Arrange. + var storageObject = JsonSerializer.SerializeToNode(CreateModel(TestRecordKey1, true))!.AsObject(); + + // Arrange GetDocumentAsync mock returning JsonObject. + this._searchClientMock.Setup( + x => x.GetDocumentAsync( + TestRecordKey1, + It.Is(x => !x.SelectedFields.Any()), + this._testCancellationToken)) + .ReturnsAsync(Response.FromValue(storageObject, Mock.Of())); + + // Arrange mapper mock from JsonObject to data model. + var mapperMock = new Mock>(MockBehavior.Strict); + mapperMock.Setup( + x => x.MapFromStorageToDataModel( + storageObject, + It.Is(x => x.IncludeVectors))) + .Returns(CreateModel(TestRecordKey1, true)); + + // Arrange target with custom mapper. + var sut = new AzureAISearchVectorRecordStore( + this._searchIndexClientMock.Object, + new() + { + DefaultCollectionName = TestCollectionName, + MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, + JsonObjectCustomMapper = mapperMock.Object + }); + + // Act. + var actual = await sut.GetAsync(TestRecordKey1, new() { IncludeVectors = true }, this._testCancellationToken); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteRecordAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange. +#pragma warning disable Moq1002 // Moq: No matching constructor + var indexDocumentsResultMock = new Mock(MockBehavior.Strict, new List()); +#pragma warning restore Moq1002 // Moq: No matching constructor + + this._searchClientMock.Setup( + x => x.DeleteDocumentsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act. + await sut.DeleteAsync( + TestRecordKey1, + new() { CollectionName = passCollectionToMethod ? TestCollectionName : null }, + this._testCancellationToken); + + // Assert. + this._searchClientMock.Verify( + x => x.DeleteDocumentsAsync( + "Key", + It.Is>(x => x.Count() == 1 && x.Contains(TestRecordKey1)), + It.IsAny(), + this._testCancellationToken), + Times.Once); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange. +#pragma warning disable Moq1002 // Moq: No matching constructor + var indexDocumentsResultMock = new Mock(MockBehavior.Strict, new List()); +#pragma warning restore Moq1002 // Moq: No matching constructor + + this._searchClientMock.Setup( + x => x.DeleteDocumentsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act. + await sut.DeleteBatchAsync( + [TestRecordKey1, TestRecordKey2], + new() { CollectionName = passCollectionToMethod ? TestCollectionName : null }, + this._testCancellationToken); + + // Assert. + this._searchClientMock.Verify( + x => x.DeleteDocumentsAsync( + "Key", + It.Is>(x => x.Count() == 2 && x.Contains(TestRecordKey1) && x.Contains(TestRecordKey2)), + It.IsAny(), + this._testCancellationToken), + Times.Once); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange upload result object. +#pragma warning disable Moq1002 // Moq: No matching constructor + var indexingResult = new Mock(MockBehavior.Strict, TestRecordKey1, true, 200); + var indexingResults = new List(); + indexingResults.Add(indexingResult.Object); + var indexDocumentsResultMock = new Mock(MockBehavior.Strict, indexingResults); +#pragma warning restore Moq1002 // Moq: No matching constructor + + // Arrange upload. + this._searchClientMock.Setup( + x => x.UploadDocumentsAsync( + It.IsAny>(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); + + // Arrange sut. + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + var model = CreateModel(TestRecordKey1, true); + + // Act. + var actual = await sut.UpsertAsync( + model, + new() { CollectionName = passCollectionToMethod ? TestCollectionName : null }, + this._testCancellationToken); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual); + this._searchClientMock.Verify( + x => x.UploadDocumentsAsync( + It.Is>(x => x.Count() == 1 && x.First().Key == TestRecordKey1), + It.Is(x => x.ThrowOnAnyError == true), + this._testCancellationToken), + Times.Once); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange upload result object. +#pragma warning disable Moq1002 // Moq: No matching constructor + var indexingResult1 = new Mock(MockBehavior.Strict, TestRecordKey1, true, 200); + var indexingResult2 = new Mock(MockBehavior.Strict, TestRecordKey2, true, 200); + + var indexingResults = new List(); + indexingResults.Add(indexingResult1.Object); + indexingResults.Add(indexingResult2.Object); + var indexDocumentsResultMock = new Mock(MockBehavior.Strict, indexingResults); +#pragma warning restore Moq1002 // Moq: No matching constructor + + // Arrange upload. + this._searchClientMock.Setup( + x => x.UploadDocumentsAsync( + It.IsAny>(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); + + // Arrange sut. + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + var model1 = CreateModel(TestRecordKey1, true); + var model2 = CreateModel(TestRecordKey2, true); + + // Act. + var actual = await sut.UpsertBatchAsync( + [model1, model2], + new() { CollectionName = passCollectionToMethod ? TestCollectionName : null }, + this._testCancellationToken).ToListAsync(); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(TestRecordKey1, actual[0]); + Assert.Equal(TestRecordKey2, actual[1]); + + this._searchClientMock.Verify( + x => x.UploadDocumentsAsync( + It.Is>(x => x.Count() == 2 && x.First().Key == TestRecordKey1 && x.ElementAt(1).Key == TestRecordKey2), + It.Is(x => x.ThrowOnAnyError == true), + this._testCancellationToken), + Times.Once); + } + + [Fact] + public async Task CanUpsertRecordWithCustomMapperAsync() + { + // Arrange. +#pragma warning disable Moq1002 // Moq: No matching constructor + var indexingResult = new Mock(MockBehavior.Strict, TestRecordKey1, true, 200); + var indexingResults = new List(); + indexingResults.Add(indexingResult.Object); + var indexDocumentsResultMock = new Mock(MockBehavior.Strict, indexingResults); +#pragma warning restore Moq1002 // Moq: No matching constructor + + var model = CreateModel(TestRecordKey1, true); + var storageObject = JsonSerializer.SerializeToNode(model)!.AsObject(); + + // Arrange UploadDocumentsAsync mock returning upsert result. + this._searchClientMock.Setup( + x => x.UploadDocumentsAsync( + It.IsAny>(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync((IEnumerable documents, IndexDocumentsOptions options, CancellationToken cancellationToken) => + { + // Need to force a materialization of the documents enumerable here, otherwise the mapper (and therefore its mock) doesn't get invoked. + var materializedDocuments = documents.ToList(); + return Response.FromValue(indexDocumentsResultMock.Object, Mock.Of()); + }); + + // Arrange mapper mock from data model to JsonObject. + var mapperMock = new Mock>(MockBehavior.Strict); + mapperMock + .Setup(x => x.MapFromDataToStorageModel(It.IsAny())) + .Returns(storageObject); + + // Arrange target with custom mapper. + var sut = new AzureAISearchVectorRecordStore( + this._searchIndexClientMock.Object, + new() + { + DefaultCollectionName = TestCollectionName, + MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, + JsonObjectCustomMapper = mapperMock.Object + }); + + // Act. + await sut.UpsertAsync( + model, + null, + this._testCancellationToken); + + // Assert. + mapperMock + .Verify( + x => x.MapFromDataToStorageModel(It.Is(x => x.Key == TestRecordKey1)), + Times.Once); + } + + private AzureAISearchVectorRecordStore CreateVectorRecordStore(bool useDefinition, bool passCollectionToMethod) + { + return new AzureAISearchVectorRecordStore( + this._searchIndexClientMock.Object, + new() + { + DefaultCollectionName = passCollectionToMethod ? null : TestCollectionName, + VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null + }); + } + + private static SinglePropsModel CreateModel(string key, bool withVectors) + { + return new SinglePropsModel + { + Key = key, + Data = "data 1", + Vector = withVectors ? new float[] { 1, 2, 3, 4 } : null, + NotAnnotated = null, + }; + } + + private readonly VectorStoreRecordDefinition _singlePropsDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key"), + new VectorStoreRecordDataProperty("Data"), + new VectorStoreRecordVectorProperty("Vector") + ] + }; + + public sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector] + public ReadOnlyMemory? Vector { get; set; } + + public string? NotAnnotated { get; set; } + } +} diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs new file mode 100644 index 000000000000..b57bdbec02b1 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs @@ -0,0 +1,441 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.Json.Nodes; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Memory; +using Moq; +using NRedisStack; +using StackExchange.Redis; +using Xunit; + +namespace Microsoft.SemanticKernel.Connectors.Redis.UnitTests; + +/// +/// Contains tests for the class. +/// +public class RedisVectorRecordStoreTests +{ + private const string TestCollectionName = "testcollection"; + private const string TestRecordKey1 = "testid1"; + private const string TestRecordKey2 = "testid2"; + + private readonly Mock _redisDatabaseMock; + + private readonly CancellationToken _testCancellationToken = new(false); + + public RedisVectorRecordStoreTests() + { + this._redisDatabaseMock = new Mock(MockBehavior.Strict); + + var batchMock = new Mock(); + this._redisDatabaseMock.Setup(x => x.CreateBatch(It.IsAny())).Returns(batchMock.Object); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + var redisResultString = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; + SetupExecuteMock(this._redisDatabaseMock, redisResultString); + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + var actual = await sut.GetAsync( + TestRecordKey1, + new() + { + IncludeVectors = true, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + var expectedArgs = new object[] { TestRecordKey1 }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "JSON.GET", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + var redisResultString = """{ "Data": "data 1" }"""; + SetupExecuteMock(this._redisDatabaseMock, redisResultString); + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + var actual = await sut.GetAsync( + TestRecordKey1, + new() + { + IncludeVectors = false, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + var expectedArgs = new object[] { TestRecordKey1, "Data" }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "JSON.GET", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.Data); + Assert.False(actual.Vector.HasValue); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + var redisResultString1 = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; + var redisResultString2 = """{ "Data": "data 2", "Vector": [5, 6, 7, 8] }"""; + SetupExecuteMock(this._redisDatabaseMock, [redisResultString1, redisResultString2]); + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + var actual = await sut.GetBatchAsync( + [TestRecordKey1, TestRecordKey2], + new() + { + IncludeVectors = true, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken).ToListAsync(); + + // Assert + var expectedArgs = new object[] { TestRecordKey1, TestRecordKey2, "$" }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "JSON.MGET", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(TestRecordKey1, actual[0].Key); + Assert.Equal("data 1", actual[0].Data); + Assert.Equal(TestRecordKey2, actual[1].Key); + Assert.Equal("data 2", actual[1].Data); + } + + [Fact] + public async Task CanGetRecordWithCustomMapperAsync() + { + // Arrange. + var redisResultString = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; + SetupExecuteMock(this._redisDatabaseMock, redisResultString); + + // Arrange mapper mock from JsonNode to data model. + var mapperMock = new Mock>(MockBehavior.Strict); + mapperMock.Setup( + x => x.MapFromStorageToDataModel( + It.IsAny<(string key, JsonNode node)>(), + It.IsAny())) + .Returns(CreateModel(TestRecordKey1, true)); + + // Arrange target with custom mapper. + var sut = new RedisVectorRecordStore( + this._redisDatabaseMock.Object, + new() + { + DefaultCollectionName = TestCollectionName, + MapperType = RedisRecordMapperType.JsonNodeCustomMapper, + JsonNodeCustomMapper = mapperMock.Object + }); + + // Act + var actual = await sut.GetAsync( + TestRecordKey1, + new() { IncludeVectors = true }, + this._testCancellationToken); + + // Assert + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + + mapperMock + .Verify( + x => x.MapFromStorageToDataModel( + It.Is<(string key, JsonNode node)>(x => x.key == TestRecordKey1), + It.Is(x => x.IncludeVectors)), + Times.Once); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteRecordAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + SetupExecuteMock(this._redisDatabaseMock, "200"); + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + await sut.DeleteAsync( + TestRecordKey1, + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + var expectedArgs = new object[] { TestRecordKey1 }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "JSON.DEL", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + SetupExecuteMock(this._redisDatabaseMock, "200"); + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + await sut.DeleteBatchAsync( + [TestRecordKey1, TestRecordKey2], + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + var expectedArgs1 = new object[] { TestRecordKey1 }; + var expectedArgs2 = new object[] { TestRecordKey2 }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "JSON.DEL", + It.Is(x => x.SequenceEqual(expectedArgs1))), + Times.Once); + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "JSON.DEL", + It.Is(x => x.SequenceEqual(expectedArgs2))), + Times.Once); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + SetupExecuteMock(this._redisDatabaseMock, "OK"); + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var model = CreateModel(TestRecordKey1, true); + + // Act + await sut.UpsertAsync( + model, + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + // TODO: Fix issue where NotAnnotated is being included in the JSON. + var expectedArgs = new object[] { TestRecordKey1, "$", """{"Data":"data 1","Vector":[1,2,3,4],"NotAnnotated":null}""" }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "JSON.SET", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + SetupExecuteMock(this._redisDatabaseMock, "OK"); + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + var model1 = CreateModel(TestRecordKey1, true); + var model2 = CreateModel(TestRecordKey2, true); + + // Act + var actual = await sut.UpsertBatchAsync( + [model1, model2], + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken).ToListAsync(); + + // Assert + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(TestRecordKey1, actual[0]); + Assert.Equal(TestRecordKey2, actual[1]); + + // TODO: Fix issue where NotAnnotated is being included in the JSON. + var expectedArgs = new object[] { TestRecordKey1, "$", """{"Data":"data 1","Vector":[1,2,3,4],"NotAnnotated":null}""", TestRecordKey2, "$", """{"Data":"data 1","Vector":[1,2,3,4],"NotAnnotated":null}""" }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "JSON.MSET", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + } + + [Fact] + public async Task CanUpsertRecordWithCustomMapperAsync() + { + // Arrange. + SetupExecuteMock(this._redisDatabaseMock, "OK"); + + // Arrange mapper mock from data model to JsonNode. + var mapperMock = new Mock>(MockBehavior.Strict); + var jsonNode = """{"Data":"data 1","Vector":[1,2,3,4],"NotAnnotated":null}"""; + mapperMock + .Setup(x => x.MapFromDataToStorageModel(It.IsAny())) + .Returns((TestRecordKey1, JsonNode.Parse(jsonNode)!)); + + // Arrange target with custom mapper. + var sut = new RedisVectorRecordStore( + this._redisDatabaseMock.Object, + new() + { + DefaultCollectionName = TestCollectionName, + MapperType = RedisRecordMapperType.JsonNodeCustomMapper, + JsonNodeCustomMapper = mapperMock.Object + }); + + var model = CreateModel(TestRecordKey1, true); + + // Act + await sut.UpsertAsync( + model, + null, + this._testCancellationToken); + + // Assert + mapperMock + .Verify( + x => x.MapFromDataToStorageModel(It.Is(x => x == model)), + Times.Once); + } + + private RedisVectorRecordStore CreateVectorRecordStore(bool useDefinition, bool passCollectionToMethod) + { + return new RedisVectorRecordStore( + this._redisDatabaseMock.Object, + new() + { + DefaultCollectionName = passCollectionToMethod ? null : TestCollectionName, + VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null + }); + } + + private static void SetupExecuteMock(Mock redisDatabaseMock, IEnumerable redisResultStrings) + { + var results = redisResultStrings + .Select(x => RedisResult.Create(new RedisValue(x))) + .ToArray(); + redisDatabaseMock + .Setup( + x => x.ExecuteAsync( + It.IsAny(), + It.IsAny())) + .ReturnsAsync(RedisResult.Create(results)); + } + + private static void SetupExecuteMock(Mock redisDatabaseMock, string redisResultString) + { + redisDatabaseMock + .Setup( + x => x.ExecuteAsync( + It.IsAny(), + It.IsAny())) + .ReturnsAsync(RedisResult.Create(new RedisValue(redisResultString))); + } + + private static SinglePropsModel CreateModel(string key, bool withVectors) + { + return new SinglePropsModel + { + Key = key, + Data = "data 1", + Vector = withVectors ? new float[] { 1, 2, 3, 4 } : null, + NotAnnotated = null, + }; + } + + private readonly VectorStoreRecordDefinition _singlePropsDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key"), + new VectorStoreRecordDataProperty("Data"), + new VectorStoreRecordVectorProperty("Vector") + ] + }; + + public sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector] + public ReadOnlyMemory? Vector { get; set; } + + public string? NotAnnotated { get; set; } + } +} diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index 20318c8d385b..0d347673b370 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Linq; @@ -17,10 +18,10 @@ namespace Microsoft.SemanticKernel; internal static class VectorStoreRecordPropertyReader { /// Cache of property enumerations so that we don't incur reflection costs with each invocation. - private static readonly Dictionary dataProperties, List vectorProperties)> s_singleVectorPropertiesCache = new(); + private static readonly ConcurrentDictionary dataProperties, List vectorProperties)> s_singleVectorPropertiesCache = new(); /// Cache of property enumerations so that we don't incur reflection costs with each invocation. - private static readonly Dictionary dataProperties, List vectorProperties)> s_multipleVectorsPropertiesCache = new(); + private static readonly ConcurrentDictionary dataProperties, List vectorProperties)> s_multipleVectorsPropertiesCache = new(); /// /// Find the properties with , and attributes From 251218cd9948902922c04e8bec25d0900986b77a Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Fri, 28 Jun 2024 10:31:39 +0100 Subject: [PATCH 09/48] .Net: VectorStore: Adding unit tests for the Qdrant vector record store. (#6992) ### Motivation and Context As part of updating the design of the memory connectors to allow custom schemas, adding unit tests for Qdrant Vector Record Store. ### Description Add unit tests for Qdrant Vector Record Store. ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- .../Connectors.Memory.Qdrant.csproj | 1 + .../MockableQdrantClient.cs | 160 +++++ .../QdrantVectorRecordStore.cs | 18 +- .../RedisVectorRecordStore.cs | 2 +- .../QdrantVectorRecordStoreTests.cs | 659 ++++++++++++++++++ 5 files changed, 836 insertions(+), 4 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs create mode 100644 dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj b/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj index 7e8e40b3a783..322a58d22400 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/Connectors.Memory.Qdrant.csproj @@ -25,6 +25,7 @@ + diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs new file mode 100644 index 000000000000..c12d51bcf028 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs @@ -0,0 +1,160 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +using Qdrant.Client; +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Decorator class for that exposes the required methods as virtual allowing for mocking in unit tests. +/// +internal class MockableQdrantClient +{ + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + private readonly QdrantClient _qdrantClient; + + /// + /// Initializes a new instance of the class. + /// + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + public MockableQdrantClient(QdrantClient qdrantClient) + { + Verify.NotNull(qdrantClient); + this._qdrantClient = qdrantClient; + } + +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + /// + /// Constructor for mocking purposes only. + /// + internal MockableQdrantClient() + { + } +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + + /// + /// Delete a point. + /// + /// The name of the collection. + /// The ID to delete. + /// Whether to wait until the changes have been applied. Defaults to true. + /// Write ordering guarantees. Defaults to Weak. + /// Option for custom sharding to specify used shard keys. + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task DeleteAsync( + string collectionName, + ulong id, + bool wait = true, + WriteOrderingType? ordering = null, + ShardKeySelector? shardKeySelector = null, + CancellationToken cancellationToken = default) + => this._qdrantClient.DeleteAsync(collectionName, id, wait, ordering, shardKeySelector, cancellationToken: cancellationToken); + + /// + /// Delete a point. + /// + /// The name of the collection. + /// The ID to delete. + /// Whether to wait until the changes have been applied. Defaults to true. + /// Write ordering guarantees. Defaults to Weak. + /// Option for custom sharding to specify used shard keys. + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task DeleteAsync( + string collectionName, + Guid id, + bool wait = true, + WriteOrderingType? ordering = null, + ShardKeySelector? shardKeySelector = null, + CancellationToken cancellationToken = default) + => this._qdrantClient.DeleteAsync(collectionName, id, wait, ordering, shardKeySelector, cancellationToken: cancellationToken); + + /// + /// Delete a point. + /// + /// The name of the collection. + /// The IDs to delete. + /// Whether to wait until the changes have been applied. Defaults to true. + /// Write ordering guarantees. Defaults to Weak. + /// Option for custom sharding to specify used shard keys. + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task DeleteAsync( + string collectionName, + IReadOnlyList ids, + bool wait = true, + WriteOrderingType? ordering = null, + ShardKeySelector? shardKeySelector = null, + CancellationToken cancellationToken = default) + => this._qdrantClient.DeleteAsync(collectionName, ids, wait, ordering, shardKeySelector, cancellationToken: cancellationToken); + + /// + /// Delete a point. + /// + /// The name of the collection. + /// The IDs to delete. + /// Whether to wait until the changes have been applied. Defaults to true. + /// Write ordering guarantees. Defaults to Weak. + /// Option for custom sharding to specify used shard keys. + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task DeleteAsync( + string collectionName, + IReadOnlyList ids, + bool wait = true, + WriteOrderingType? ordering = null, + ShardKeySelector? shardKeySelector = null, + CancellationToken cancellationToken = default) + => this._qdrantClient.DeleteAsync(collectionName, ids, wait, ordering, shardKeySelector, cancellationToken: cancellationToken); + + /// + /// Perform insert and updates on points. If a point with a given ID already exists, it will be overwritten. + /// + /// The name of the collection. + /// The points to be upserted. + /// Whether to wait until the changes have been applied. Defaults to true. + /// Write ordering guarantees. + /// Option for custom sharding to specify used shard keys. + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task UpsertAsync( + string collectionName, + IReadOnlyList points, + bool wait = true, + WriteOrderingType? ordering = null, + ShardKeySelector? shardKeySelector = null, + CancellationToken cancellationToken = default) + => this._qdrantClient.UpsertAsync(collectionName, points, wait, ordering, shardKeySelector, cancellationToken); + + /// + /// Retrieve points. + /// + /// The name of the collection. + /// List of points to retrieve. + /// Whether to include the payload or not. + /// Whether to include the vectors or not. + /// Options for specifying read consistency guarantees. + /// Option for custom sharding to specify used shard keys. + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task> RetrieveAsync( + string collectionName, + IReadOnlyList ids, + bool withPayload = true, + bool withVectors = false, + ReadConsistency? readConsistency = null, + ShardKeySelector? shardKeySelector = null, + CancellationToken cancellationToken = default) + => this._qdrantClient.RetrieveAsync(collectionName, ids, withPayload, withVectors, readConsistency, shardKeySelector, cancellationToken); +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs index d18754741817..6e15df2d46f3 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs @@ -30,7 +30,7 @@ public sealed class QdrantVectorRecordStore : IVectorRecordStoreQdrant client that can be used to manage the collections and points in a Qdrant store. - private readonly QdrantClient _qdrantClient; + private readonly MockableQdrantClient _qdrantClient; /// Optional configuration options for this class. private readonly QdrantVectorRecordStoreOptions _options; @@ -43,9 +43,21 @@ public sealed class QdrantVectorRecordStore : IVectorRecordStore /// Qdrant client that can be used to manage the collections and points in a Qdrant store. /// Optional configuration options for this class. - /// - /// + /// Thrown if the is null. + /// Thrown for any misconfigured options. public QdrantVectorRecordStore(QdrantClient qdrantClient, QdrantVectorRecordStoreOptions? options = null) + : this(new MockableQdrantClient(qdrantClient), options) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + /// Optional configuration options for this class. + /// Thrown if the is null. + /// Thrown for any misconfigured options. + internal QdrantVectorRecordStore(MockableQdrantClient qdrantClient, QdrantVectorRecordStoreOptions? options = null) { // Verify. Verify.NotNull(qdrantClient); diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs index 9a21339f9a8d..26b0cd69ac48 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -65,7 +65,7 @@ public sealed class RedisVectorRecordStore : IVectorRecordStoreThe redis database to read/write records from. /// Optional configuration options for this class. /// Throw when parameters are invalid. - public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions? options) + public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions? options = null) { // Verify. Verify.NotNull(database); diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs new file mode 100644 index 000000000000..456684daa51c --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs @@ -0,0 +1,659 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Memory; +using Moq; +using Qdrant.Client.Grpc; +using Xunit; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant.UnitTests; + +/// +/// Contains tests for the class. +/// +public class QdrantVectorRecordStoreTests +{ + private const string TestCollectionName = "testcollection"; + private const ulong UlongTestRecordKey1 = 1; + private const ulong UlongTestRecordKey2 = 2; + private static readonly Guid s_guidTestRecordKey1 = Guid.Parse("11111111-1111-1111-1111-111111111111"); + private static readonly Guid s_guidTestRecordKey2 = Guid.Parse("22222222-2222-2222-2222-222222222222"); + + private readonly Mock _qdrantClientMock; + + private readonly CancellationToken _testCancellationToken = new(false); + + public QdrantVectorRecordStoreTests() + { + this._qdrantClientMock = new Mock(MockBehavior.Strict); + } + + [Theory] + [MemberData(nameof(TestOptions))] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey testRecordKey) + { + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + + // Arrange. + var retrievedPoint = CreateRetrievedPoint(hasNamedVectors, testRecordKey); + this.SetupRetrieveMock([retrievedPoint]); + + // Act. + var actual = await sut.GetAsync( + testRecordKey, + new() + { + IncludeVectors = true, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert. + this._qdrantClientMock + .Verify( + x => x.RetrieveAsync( + TestCollectionName, + It.Is>(x => x.Count == 1 && (testRecordKey!.GetType() == typeof(ulong) && x[0].Num == (testRecordKey as ulong?) || testRecordKey!.GetType() == typeof(Guid) && x[0].Uuid == (testRecordKey as Guid?).ToString())), + true, + true, + null, + null, + this._testCancellationToken), + Times.Once); + + Assert.NotNull(actual); + Assert.Equal(testRecordKey, actual.Key); + Assert.Equal("data 1", actual.Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + } + + [Theory] + [MemberData(nameof(TestOptions))] + public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey testRecordKey) + { + // Arrange. + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var retrievedPoint = CreateRetrievedPoint(hasNamedVectors, testRecordKey); + this.SetupRetrieveMock([retrievedPoint]); + + // Act. + var actual = await sut.GetAsync( + testRecordKey, + new() + { + IncludeVectors = false, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert. + this._qdrantClientMock + .Verify( + x => x.RetrieveAsync( + TestCollectionName, + It.Is>(x => x.Count == 1 && (testRecordKey!.GetType() == typeof(ulong) && x[0].Num == (testRecordKey as ulong?) || testRecordKey!.GetType() == typeof(Guid) && x[0].Uuid == (testRecordKey as Guid?).ToString())), + true, + false, + null, + null, + this._testCancellationToken), + Times.Once); + + Assert.NotNull(actual); + Assert.Equal(testRecordKey, actual.Key); + Assert.Equal("data 1", actual.Data); + Assert.Null(actual.Vector); + } + + [Theory] + [MemberData(nameof(MultiRecordTestOptions))] + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey[] testRecordKeys) + { + // Arrange. + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var retrievedPoint1 = CreateRetrievedPoint(hasNamedVectors, UlongTestRecordKey1); + var retrievedPoint2 = CreateRetrievedPoint(hasNamedVectors, UlongTestRecordKey2); + this.SetupRetrieveMock(testRecordKeys.Select(x => CreateRetrievedPoint(hasNamedVectors, x)).ToList()); + + // Act. + var actual = await sut.GetBatchAsync( + testRecordKeys, + new() + { + IncludeVectors = true, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken).ToListAsync(); + + // Assert. + this._qdrantClientMock + .Verify( + x => x.RetrieveAsync( + TestCollectionName, + It.Is>(x => + x.Count == 2 && + (testRecordKeys[0]!.GetType() == typeof(ulong) && x[0].Num == (testRecordKeys[0] as ulong?) || testRecordKeys[0]!.GetType() == typeof(Guid) && x[0].Uuid == (testRecordKeys[0] as Guid?).ToString()) && + (testRecordKeys[1]!.GetType() == typeof(ulong) && x[1].Num == (testRecordKeys[1] as ulong?) || testRecordKeys[1]!.GetType() == typeof(Guid) && x[1].Uuid == (testRecordKeys[1] as Guid?).ToString())), + true, + true, + null, + null, + this._testCancellationToken), + Times.Once); + + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(testRecordKeys[0], actual[0].Key); + Assert.Equal(testRecordKeys[1], actual[1].Key); + } + + [Fact] + public async Task CanGetRecordWithCustomMapperAsync() + { + // Arrange. + var retrievedPoint = CreateRetrievedPoint(true, UlongTestRecordKey1); + this.SetupRetrieveMock([retrievedPoint]); + + // Arrange mapper mock from PointStruct to data model. + var mapperMock = new Mock, PointStruct>>(MockBehavior.Strict); + mapperMock.Setup( + x => x.MapFromStorageToDataModel( + It.IsAny(), + It.IsAny())) + .Returns(CreateModel(UlongTestRecordKey1, true)); + + // Arrange target with custom mapper. + var sut = new QdrantVectorRecordStore>( + this._qdrantClientMock.Object, + new() + { + DefaultCollectionName = TestCollectionName, + HasNamedVectors = true, + MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, + PointStructCustomMapper = mapperMock.Object + }); + + // Act + var actual = await sut.GetAsync( + UlongTestRecordKey1, + new() { IncludeVectors = true }, + this._testCancellationToken); + + // Assert + Assert.NotNull(actual); + Assert.Equal(UlongTestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + + mapperMock + .Verify( + x => x.MapFromStorageToDataModel( + It.Is(x => x.Id.Num == UlongTestRecordKey1), + It.Is(x => x.IncludeVectors)), + Times.Once); + } + + [Theory] + [InlineData(true, true, true)] + [InlineData(true, true, false)] + [InlineData(true, false, true)] + [InlineData(true, false, false)] + [InlineData(false, true, true)] + [InlineData(false, true, false)] + [InlineData(false, false, true)] + [InlineData(false, false, false)] + public async Task CanDeleteUlongRecordAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + { + // Arrange + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + this.SetupDeleteMocks(); + + // Act + await sut.DeleteAsync( + UlongTestRecordKey1, + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + this._qdrantClientMock + .Verify( + x => x.DeleteAsync( + TestCollectionName, + It.Is(x => x == UlongTestRecordKey1), + true, + null, + null, + this._testCancellationToken), + Times.Once); + } + + [Theory] + [InlineData(true, true, true)] + [InlineData(true, true, false)] + [InlineData(true, false, true)] + [InlineData(true, false, false)] + [InlineData(false, true, true)] + [InlineData(false, true, false)] + [InlineData(false, false, true)] + [InlineData(false, false, false)] + public async Task CanDeleteGuidRecordAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + { + // Arrange + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + this.SetupDeleteMocks(); + + // Act + await sut.DeleteAsync( + s_guidTestRecordKey1, + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + this._qdrantClientMock + .Verify( + x => x.DeleteAsync( + TestCollectionName, + It.Is(x => x == s_guidTestRecordKey1), + true, + null, + null, + this._testCancellationToken), + Times.Once); + } + + [Theory] + [InlineData(true, true, true)] + [InlineData(true, true, false)] + [InlineData(true, false, true)] + [InlineData(true, false, false)] + [InlineData(false, true, true)] + [InlineData(false, true, false)] + [InlineData(false, false, true)] + [InlineData(false, false, false)] + public async Task CanDeleteManyUlongRecordsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + { + // Arrange + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + this.SetupDeleteMocks(); + + // Act + await sut.DeleteBatchAsync( + [UlongTestRecordKey1, UlongTestRecordKey2], + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + this._qdrantClientMock + .Verify( + x => x.DeleteAsync( + TestCollectionName, + It.Is>(x => x.Count == 2 && x.Contains(UlongTestRecordKey1) && x.Contains(UlongTestRecordKey2)), + true, + null, + null, + this._testCancellationToken), + Times.Once); + } + + [Theory] + [InlineData(true, true, true)] + [InlineData(true, true, false)] + [InlineData(true, false, true)] + [InlineData(true, false, false)] + [InlineData(false, true, true)] + [InlineData(false, true, false)] + [InlineData(false, false, true)] + [InlineData(false, false, false)] + public async Task CanDeleteManyGuidRecordsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + { + // Arrange + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + this.SetupDeleteMocks(); + + // Act + await sut.DeleteBatchAsync( + [s_guidTestRecordKey1, s_guidTestRecordKey2], + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + this._qdrantClientMock + .Verify( + x => x.DeleteAsync( + TestCollectionName, + It.Is>(x => x.Count == 2 && x.Contains(s_guidTestRecordKey1) && x.Contains(s_guidTestRecordKey2)), + true, + null, + null, + this._testCancellationToken), + Times.Once); + } + + [Theory] + [MemberData(nameof(TestOptions))] + public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey testRecordKey) + { + // Arrange + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + this.SetupUpsertMock(); + + // Act + await sut.UpsertAsync( + CreateModel(testRecordKey, true), + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + this._qdrantClientMock + .Verify( + x => x.UpsertAsync( + TestCollectionName, + It.Is>(x => x.Count == 1 && (testRecordKey!.GetType() == typeof(ulong) && x[0].Id.Num == (testRecordKey as ulong?) || testRecordKey!.GetType() == typeof(Guid) && x[0].Id.Uuid == (testRecordKey as Guid?).ToString())), + true, + null, + null, + this._testCancellationToken), + Times.Once); + } + + [Theory] + [MemberData(nameof(MultiRecordTestOptions))] + public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey[] testRecordKeys) + { + // Arrange + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + this.SetupUpsertMock(); + + var models = testRecordKeys.Select(x => CreateModel(x, true)); + + // Act + var actual = await sut.UpsertBatchAsync( + models, + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken).ToListAsync(); + + // Assert + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(testRecordKeys[0], actual[0]); + Assert.Equal(testRecordKeys[1], actual[1]); + + this._qdrantClientMock + .Verify( + x => x.UpsertAsync( + TestCollectionName, + It.Is>(x => + x.Count == 2 && + (testRecordKeys[0]!.GetType() == typeof(ulong) && x[0].Id.Num == (testRecordKeys[0] as ulong?) || testRecordKeys[0]!.GetType() == typeof(Guid) && x[0].Id.Uuid == (testRecordKeys[0] as Guid?).ToString()) && + (testRecordKeys[1]!.GetType() == typeof(ulong) && x[1].Id.Num == (testRecordKeys[1] as ulong?) || testRecordKeys[1]!.GetType() == typeof(Guid) && x[1].Id.Uuid == (testRecordKeys[1] as Guid?).ToString())), + true, + null, + null, + this._testCancellationToken), + Times.Once); + } + + [Fact] + public async Task CanUpsertRecordWithCustomMapperAsync() + { + // Arrange. + this.SetupUpsertMock(); + var pointStruct = new PointStruct + { + Id = new() { Num = UlongTestRecordKey1 }, + Payload = { ["Data"] = "data 1" }, + Vectors = new[] { 1f, 2f, 3f, 4f } + }; + + // Arrange mapper mock from data model to PointStruct. + var mapperMock = new Mock, PointStruct>>(MockBehavior.Strict); + mapperMock + .Setup(x => x.MapFromDataToStorageModel(It.IsAny>())) + .Returns(pointStruct); + + // Arrange target with custom mapper. + var sut = new QdrantVectorRecordStore>( + this._qdrantClientMock.Object, + new() + { + DefaultCollectionName = TestCollectionName, + HasNamedVectors = false, + MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, + PointStructCustomMapper = mapperMock.Object + }); + + var model = CreateModel(UlongTestRecordKey1, true); + + // Act + await sut.UpsertAsync( + model, + null, + this._testCancellationToken); + + // Assert + mapperMock + .Verify( + x => x.MapFromDataToStorageModel(It.Is>(x => x == model)), + Times.Once); + } + + private void SetupRetrieveMock(List retrievedPoints) + { + this._qdrantClientMock + .Setup(x => x.RetrieveAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), // With Payload + It.IsAny(), // With Vectors + It.IsAny(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(retrievedPoints); + } + + private void SetupDeleteMocks() + { + this._qdrantClientMock + .Setup(x => x.DeleteAsync( + It.IsAny(), + It.IsAny(), + It.IsAny(), // wait + It.IsAny(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(new UpdateResult()); + + this._qdrantClientMock + .Setup(x => x.DeleteAsync( + It.IsAny(), + It.IsAny(), + It.IsAny(), // wait + It.IsAny(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(new UpdateResult()); + + this._qdrantClientMock + .Setup(x => x.DeleteAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), // wait + It.IsAny(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(new UpdateResult()); + + this._qdrantClientMock + .Setup(x => x.DeleteAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), // wait + It.IsAny(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(new UpdateResult()); + } + + private void SetupUpsertMock() + { + this._qdrantClientMock + .Setup(x => x.UpsertAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), // wait + It.IsAny(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(new UpdateResult()); + } + + private static RetrievedPoint CreateRetrievedPoint(bool hasNamedVectors, TKey recordKey) + { + RetrievedPoint point; + if (hasNamedVectors) + { + var namedVectors = new NamedVectors(); + namedVectors.Vectors.Add("Vector", new[] { 1f, 2f, 3f, 4f }); + point = new RetrievedPoint() + { + Payload = { ["Data"] = "data 1" }, + Vectors = new Vectors { Vectors_ = namedVectors } + }; + } + else + { + point = new RetrievedPoint() + { + Payload = { ["Data"] = "data 1" }, + Vectors = new[] { 1f, 2f, 3f, 4f } + }; + } + + if (recordKey is ulong ulongKey) + { + point.Id = ulongKey; + } + + if (recordKey is Guid guidKey) + { + point.Id = guidKey; + } + + return point; + } + + private IVectorRecordStore> CreateVectorRecordStore(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + { + var store = new QdrantVectorRecordStore>( + this._qdrantClientMock.Object, + new() + { + DefaultCollectionName = passCollectionToMethod ? null : TestCollectionName, + VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null, + HasNamedVectors = hasNamedVectors + }) as IVectorRecordStore>; + return store!; + } + + private static SinglePropsModel CreateModel(T key, bool withVectors) + { + return new SinglePropsModel + { + Key = key, + Data = "data 1", + Vector = withVectors ? new float[] { 1, 2, 3, 4 } : null, + NotAnnotated = null, + }; + } + + private readonly VectorStoreRecordDefinition _singlePropsDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key"), + new VectorStoreRecordDataProperty("Data"), + new VectorStoreRecordVectorProperty("Vector") + ] + }; + + public sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public required T Key { get; set; } + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector] + public ReadOnlyMemory? Vector { get; set; } + + public string? NotAnnotated { get; set; } + } + + public static IEnumerable TestOptions + => GenerateAllCombinations(new object[][] { + new object[] { true, false }, + new object[] { true, false }, + new object[] { true, false }, + new object[] { UlongTestRecordKey1, s_guidTestRecordKey1 } + }); + + public static IEnumerable MultiRecordTestOptions + => GenerateAllCombinations(new object[][] { + new object[] { true, false }, + new object[] { true, false }, + new object[] { true, false }, + new object[] { new ulong[] { UlongTestRecordKey1, UlongTestRecordKey2 }, new Guid[] { s_guidTestRecordKey1, s_guidTestRecordKey2 } } + }); + + private static object[][] GenerateAllCombinations(object[][] input) + { + var counterArray = Enumerable.Range(0, input.Length).Select(x => 0).ToArray(); + + // Add each item from the first option set as a separate row. + object[][] currentCombinations = input[0].Select(x => new object[1] { x }).ToArray(); + + // Loop through each additional option set. + for (int currentOptionSetIndex = 1; currentOptionSetIndex < input.Length; currentOptionSetIndex++) + { + var iterationCombinations = new List(); + var currentOptionSet = input[currentOptionSetIndex]; + + // Loop through each row we have already. + foreach (var currentCombination in currentCombinations) + { + // Add each of the values from the new options set to the current row to generate a new row. + for (var currentColumnRow = 0; currentColumnRow < currentOptionSet.Length; currentColumnRow++) + { + iterationCombinations.Add(currentCombination.Append(currentOptionSet[currentColumnRow]).ToArray()); + } + } + + currentCombinations = iterationCombinations.ToArray(); + } + + return currentCombinations; + } +} From 2bfb66a64c2b2ca5bfd004182754c57235e4e36a Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 1 Jul 2024 15:31:23 +0100 Subject: [PATCH 10/48] .Net: VectorStore: Add strongly typed properties for vector store exception metadata. (#7002) ### Motivation and Context As part of the new vector store implementation, we need to add additional metadata to exceptions to share information about the operation that failed. ### Description Changing the way in which the metadata is stored to strongly typed properties. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../AzureAISearchVectorRecordStore.cs | 30 +++++------ .../QdrantVectorRecordStore.cs | 15 +++--- .../RedisVectorRecordStore.cs | 15 +++--- .../src/Data/VectorStoreErrorHandler.cs | 15 +++--- .../Memory/VectorStoreException.cs | 52 +++++++++++++++++++ .../Memory/VectorStoreOperationException.cs | 4 +- .../VectorStoreRecordMappingException.cs | 4 +- 7 files changed, 88 insertions(+), 47 deletions(-) create mode 100644 dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreException.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index 612a93ba64c1..7704613f3ef9 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -386,27 +386,21 @@ private static async Task RunOperationAsync(string collectionName, string } catch (AggregateException ex) when (ex.InnerException is RequestFailedException innerEx) { - var wrapperException = new VectorStoreOperationException("Call to vector store failed.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", DatabaseName); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = collectionName, + OperationName = operationName + }; } catch (RequestFailedException ex) { - var wrapperException = new VectorStoreOperationException("Call to vector store failed.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", DatabaseName); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = collectionName, + OperationName = operationName + }; } } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs index 6e15df2d46f3..e89492dbc78e 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs @@ -368,15 +368,12 @@ private static async Task RunOperationAsync(string collectionName, string } catch (RpcException ex) { - var wrapperException = new VectorStoreOperationException("Call to vector store failed.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", DatabaseName); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = collectionName, + OperationName = operationName + }; } } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs index 26b0cd69ac48..4552339b9977 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -359,15 +359,12 @@ private static async Task RunOperationAsync(string collectionName, string } catch (RedisConnectionException ex) { - var wrapperException = new VectorStoreOperationException("Call to vector store failed.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", DatabaseName); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = collectionName, + OperationName = operationName + }; } } } diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs index f2fc3f992de7..aaec06207ef1 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs @@ -31,15 +31,12 @@ public static T RunModelConversion(string databaseSystemName, string collecti } catch (Exception ex) { - var wrapperException = new VectorStoreRecordMappingException("Failed to convert vector store record.", ex); - - // Using Open Telemetry standard for naming of these entries. - // https://opentelemetry.io/docs/specs/semconv/attributes-registry/db/ - wrapperException.Data.Add("db.system", databaseSystemName); - wrapperException.Data.Add("db.collection.name", collectionName); - wrapperException.Data.Add("db.operation.name", operationName); - - throw wrapperException; + throw new VectorStoreRecordMappingException("Failed to convert vector store record.", ex) + { + VectorStoreType = databaseSystemName, + CollectionName = collectionName, + OperationName = operationName + }; } } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreException.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreException.cs new file mode 100644 index 000000000000..9c481b5b8d55 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreException.cs @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Memory; + +/// +/// Base exception type thrown for any type of failure when using vector stores. +/// +[Experimental("SKEXP0001")] +public abstract class VectorStoreException : KernelException +{ + /// + /// Initializes a new instance of the class. + /// + protected VectorStoreException() + { + } + + /// + /// Initializes a new instance of the class with a specified error message. + /// + /// The error message that explains the reason for the exception. + protected VectorStoreException(string? message) : base(message) + { + } + + /// + /// Initializes a new instance of the class with a specified error message and a reference to the inner exception that is the cause of this exception. + /// + /// The error message that explains the reason for the exception. + /// The exception that is the cause of the current exception, or a null reference if no inner exception is specified. + protected VectorStoreException(string? message, Exception? innerException) : base(message, innerException) + { + } + + /// + /// Gets or sets the type of vector store that the failing operation was performed on. + /// + public string? VectorStoreType { get; init; } + + /// + /// Gets or sets the name of the vector store collection that the failing operation was performed on. + /// + public string? CollectionName { get; init; } + + /// + /// Gets or sets the name of the vector store operation that failed. + /// + public string? OperationName { get; init; } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs index 1390f613324c..9533982a96ae 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs @@ -1,13 +1,15 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Diagnostics.CodeAnalysis; namespace Microsoft.SemanticKernel.Memory; /// /// Exception thrown when a vector store command fails, such as upserting a record or deleting a collection. /// -public class VectorStoreOperationException : KernelException +[Experimental("SKEXP0001")] +public class VectorStoreOperationException : VectorStoreException { /// /// Initializes a new instance of the class. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs index 8955175737e9..6683f3412a17 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs @@ -1,13 +1,15 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Diagnostics.CodeAnalysis; namespace Microsoft.SemanticKernel.Memory; /// /// Exception thrown when a failure occurs while trying to convert models for storage or retrieval. /// -public class VectorStoreRecordMappingException : KernelException +[Experimental("SKEXP0001")] +public class VectorStoreRecordMappingException : VectorStoreException { /// /// Initializes a new instance of the class. From 4d8dcfd77e0271f7eb716b88839d0cb5dbb2e48a Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:20:48 +0100 Subject: [PATCH 11/48] .Net: VectorStore: Add custom serialization options / naming support. (#7028) ### Motivation and Context As part of the new vector store implementation, we need to allow developers to provide their own names or json serialization options for storage properties. ### Description Adding support for this to AzureAISearch, Redis and Qdrant implementations in the following way: AzureAISearch: via JsonPropertyName attributes or JsonSerializerOptions. Redis: via JsonPropertyName attributes or JsonSerializerOptions. Qdrant: via StoragePropertyName on VectorRecord definitions. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../AzureAISearchVectorRecordStore.cs | 16 +-- .../AzureAISearchVectorRecordStoreOptions.cs | 12 ++- .../QdrantVectorStoreRecordMapper.cs | 23 ++-- .../RedisVectorRecordStore.cs | 54 ++++++---- .../RedisVectorRecordStoreOptions.cs | 14 ++- .../AzureAISearchVectorStoreFixture.cs | 2 + .../Memory/Qdrant/QdrantVectorStoreFixture.cs | 16 +-- .../Memory/Redis/RedisVectorStoreFixture.cs | 10 +- .../Data/VectorStoreRecordPropertyReader.cs | 100 ++++++++++++++++-- .../VectorStoreRecordDataAttribute.cs | 6 ++ .../VectorStoreRecordKeyAttribute.cs | 5 + .../VectorStoreRecordVectorAttribute.cs | 5 + .../VectorStoreRecordProperty.cs | 6 ++ .../VectorStoreRecordPropertyReaderTests.cs | 59 ++++++++++- 14 files changed, 266 insertions(+), 62 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index 7704613f3ef9..a6dabed753c9 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -6,6 +6,7 @@ using System.Linq; using System.Reflection; using System.Runtime.CompilerServices; +using System.Text.Json; using System.Text.Json.Nodes; using System.Threading; using System.Threading.Tasks; @@ -94,12 +95,13 @@ public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, Azure } // Validate property types and store for later use. + var jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); - this._keyPropertyName = properties.keyProperty.Name; + this._keyPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, properties.keyProperty); // Build the list of property names from the current model that are either key or data fields. - this._nonVectorPropertyNames = properties.dataProperties.Concat([properties.keyProperty]).Select(x => x.Name).ToList(); + this._nonVectorPropertyNames = properties.dataProperties.Concat([properties.keyProperty]).Select(x => VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, x)).ToList(); } /// @@ -212,7 +214,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco /// The name of the collection to retrieve the record from. /// The key of the record to get. /// A value indicating whether to include vectors in the result or not. - /// The azure ai search sdk options for getting a document. + /// The Azure AI Search sdk options for getting a document. /// The to monitor for cancellation requests. The default is . /// The retrieved document, mapped to the consumer data model. private async Task GetDocumentAndMapToDataModelAsync( @@ -258,7 +260,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco /// The search client to use when uploading the document. /// The name of the collection to upsert the records to. /// The records to upload. - /// The azure ai search sdk options for uploading a document. + /// The Azure AI Search sdk options for uploading a document. /// The to monitor for cancellation requests. The default is . /// The document upload result. private Task> MapToStorageModelAndUploadDocumentAsync( @@ -330,10 +332,10 @@ private string ChooseCollectionName(string? operationCollectionName) } /// - /// Convert the public options model to the azure ai search options model. + /// Convert the public options model to the Azure AI Search options model. /// /// The public options model. - /// The azure ai search options model. + /// The Azure AI Search options model. private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options) { var innerOptions = new GetDocumentOptions(); @@ -351,7 +353,7 @@ private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options) /// The type to deserialize the document to. /// The search client to use when fetching the document. /// The key of the record to get. - /// The azure ai search sdk options for getting a document. + /// The Azure AI Search sdk options for getting a document. /// The to monitor for cancellation requests. The default is . /// The retrieved document, mapped to the consumer data model, or null if not found. private static async Task GetDocumentWithNotFoundHandlingAsync( diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs index 37bc65495dfc..6f9e0441fb98 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs @@ -1,6 +1,8 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Text.Json; using System.Text.Json.Nodes; +using Azure.Search.Documents.Indexes; using Microsoft.SemanticKernel.Memory; namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; @@ -18,12 +20,12 @@ public sealed class AzureAISearchVectorRecordStoreOptions public string? DefaultCollectionName { get; init; } = null; /// - /// Gets or sets the choice of mapper to use when converting between the data model and the azure ai search record. + /// Gets or sets the choice of mapper to use when converting between the data model and the Azure AI Search record. /// public AzureAISearchRecordMapperType MapperType { get; init; } = AzureAISearchRecordMapperType.Default; /// - /// Gets or sets an optional custom mapper to use when converting between the data model and the azure ai search record. + /// Gets or sets an optional custom mapper to use when converting between the data model and the Azure AI Search record. /// /// /// Set to to use this mapper."/> @@ -39,4 +41,10 @@ public sealed class AzureAISearchVectorRecordStoreOptions /// See , and . /// public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; + + /// + /// Gets or sets the JSON serializer options to use when converting between the data model and the Azure AI Search record. + /// Note that when using the default mapper, you will need to provide the same set of both here and when constructing the . + /// + public JsonSerializerOptions? JsonSerializerOptions { get; init; } = null; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index 7b439616db5c..c88b7c7c3a29 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -70,6 +70,9 @@ internal sealed class QdrantVectorStoreRecordMapper : IVectorStoreRecor /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. private readonly PropertyInfo _keyPropertyInfo; + /// A dictionary that maps from a property name to the configured name that should be used when storing it. + private readonly Dictionary _storagePropertyNames = new(); + /// Configuration options for this class. private readonly QdrantVectorStoreRecordMapperOptions _options; @@ -101,6 +104,8 @@ public QdrantVectorStoreRecordMapper(QdrantVectorStoreRecordMapperOptions option this._keyPropertyInfo = properties.keyProperty; this._payloadPropertiesInfo = properties.dataProperties; this._vectorPropertiesInfo = properties.vectorProperties; + + this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._options.VectorStoreRecordDefinition); } /// @@ -133,7 +138,7 @@ public PointStruct MapFromDataToStorageModel(TRecord dataModel) // Add point payload. foreach (var payloadPropertyInfo in this._payloadPropertiesInfo) { - var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(payloadPropertyInfo); + var propertyName = this._storagePropertyNames[payloadPropertyInfo.Name]; var propertyValue = payloadPropertyInfo.GetValue(dataModel); pointStruct.Payload.Add(propertyName, ConvertToGrpcFieldValue(propertyValue)); } @@ -144,7 +149,7 @@ public PointStruct MapFromDataToStorageModel(TRecord dataModel) var namedVectors = new NamedVectors(); foreach (var vectorPropertyInfo in this._vectorPropertiesInfo) { - var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(vectorPropertyInfo); + var propertyName = this._storagePropertyNames[vectorPropertyInfo.Name]; var propertyValue = vectorPropertyInfo.GetValue(dataModel); if (propertyValue is not null) { @@ -176,13 +181,13 @@ public PointStruct MapFromDataToStorageModel(TRecord dataModel) public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToDataModelMapperOptions options) { // Get the key property name and value. - var keyPropertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(this._keyPropertyInfo); + var keyPropertyName = this._storagePropertyNames[this._keyPropertyInfo.Name]; var keyPropertyValue = storageModel.Id.HasNum ? storageModel.Id.Num as object : storageModel.Id.Uuid as object; // Create a json object to represent the point. var outputJsonObject = new JsonObject { - { keyPropertyName, JsonValue.Create(keyPropertyValue) }, + { this._keyPropertyInfo.Name, JsonValue.Create(keyPropertyValue) }, }; // Add each vector property if embeddings are included in the point. @@ -190,18 +195,18 @@ public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToData { foreach (var vectorProperty in this._vectorPropertiesInfo) { - var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(vectorProperty); + var propertyName = this._storagePropertyNames[vectorProperty.Name]; if (this._options.HasNamedVectors) { if (storageModel.Vectors.Vectors_.Vectors.TryGetValue(propertyName, out var vector)) { - outputJsonObject.Add(propertyName, new JsonArray(vector.Data.Select(x => JsonValue.Create(x)).ToArray())); + outputJsonObject.Add(vectorProperty.Name, new JsonArray(vector.Data.Select(x => JsonValue.Create(x)).ToArray())); } } else { - outputJsonObject.Add(propertyName, new JsonArray(storageModel.Vectors.Vector.Data.Select(x => JsonValue.Create(x)).ToArray())); + outputJsonObject.Add(vectorProperty.Name, new JsonArray(storageModel.Vectors.Vector.Data.Select(x => JsonValue.Create(x)).ToArray())); } } } @@ -209,10 +214,10 @@ public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToData // Add each payload property. foreach (var payloadProperty in this._payloadPropertiesInfo) { - var propertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(payloadProperty); + var propertyName = this._storagePropertyNames[payloadProperty.Name]; if (storageModel.Payload.TryGetValue(propertyName, out var value)) { - outputJsonObject.Add(propertyName, ConvertFromGrpcFieldValueToJsonNode(value)); + outputJsonObject.Add(payloadProperty.Name, ConvertFromGrpcFieldValueToJsonNode(value)); } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs index 4552339b9977..c80dd3a2ea0d 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -41,7 +41,7 @@ public sealed class RedisVectorRecordStore : IVectorRecordStore?) ]; - /// The redis database to read/write records from. + /// The Redis database to read/write records from. private readonly IDatabase _database; /// Optional configuration options for this class. @@ -50,19 +50,22 @@ public sealed class RedisVectorRecordStore : IVectorRecordStoreA property info object that points at the key property for the current model, allowing easy reading and writing of this property. private readonly PropertyInfo _keyPropertyInfo; - /// The name of the temporary json property that the key property will be serialized / parsed from. + /// The name of the temporary JSON property that the key property will be serialized / parsed from. private readonly string _keyJsonPropertyName; - /// An array of the names of all the data properties that are part of the redis payload, i.e. all properties except the key and vector properties. + /// An array of the names of all the data properties that are part of the Redis payload, i.e. all properties except the key and vector properties. private readonly string[] _dataPropertyNames; - /// The mapper to use when mapping between the consumer data model and the redis record. + /// The mapper to use when mapping between the consumer data model and the Redis record. private readonly IVectorStoreRecordMapper _mapper; + /// The JSON serializer options to use when converting between the data model and the Redis record. + private readonly JsonSerializerOptions _jsonSerializerOptions; + /// /// Initializes a new instance of the class. /// - /// The redis database to read/write records from. + /// The Redis database to read/write records from. /// Optional configuration options for this class. /// Throw when parameters are invalid. public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions? options = null) @@ -73,6 +76,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< // Assign. this._database = database; this._options = options ?? new RedisVectorRecordStoreOptions(); + this._jsonSerializerOptions = this._options.jsonSerializerOptions ?? JsonSerializerOptions.Default; // Enumerate public properties using configuration or attributes. (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; @@ -90,11 +94,11 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); this._keyPropertyInfo = properties.keyProperty; - this._keyJsonPropertyName = VectorStoreRecordPropertyReader.GetSerializedPropertyName(this._keyPropertyInfo); + this._keyJsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(this._jsonSerializerOptions, this._keyPropertyInfo); this._dataPropertyNames = properties .dataProperties - .Select(VectorStoreRecordPropertyReader.GetSerializedPropertyName) + .Select(x => VectorStoreRecordPropertyReader.GetJsonPropertyName(this._jsonSerializerOptions, x)) .ToArray(); // Assign Mapper. @@ -123,7 +127,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< var maybePrefixedKey = this.PrefixKeyIfNeeded(key, collectionName); var includeVectors = options?.IncludeVectors ?? false; - // Get the redis value. + // Get the Redis value. var redisResult = await RunOperationAsync( collectionName, "GET", @@ -141,7 +145,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< return null; } - // Check if the value contained any json text before trying to parse the result. + // Check if the value contained any JSON text before trying to parse the result. var redisResultString = redisResult.ToString(); if (redisResultString is null) { @@ -155,7 +159,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< "GET", () => { - var node = JsonSerializer.Deserialize(redisResultString)!; + var node = JsonSerializer.Deserialize(redisResultString, this._jsonSerializerOptions)!; return this._mapper.MapFromStorageToDataModel((key, node), new() { IncludeVectors = includeVectors }); }); } @@ -172,7 +176,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G var redisKeys = maybePrefixedKeys.Select(x => new RedisKey(x)).ToArray(); var includeVectors = options?.IncludeVectors ?? false; - // Get the list of redis results. + // Get the list of Redis results. var redisResults = await RunOperationAsync( collectionName, "MGET", @@ -192,7 +196,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G continue; } - // Check if the value contained any json text before trying to parse the result. + // Check if the value contained any JSON text before trying to parse the result. var redisResultString = redisResult.ToString(); if (redisResultString is null) { @@ -206,7 +210,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G "MGET", () => { - var node = JsonSerializer.Deserialize(redisResultString)!; + var node = JsonSerializer.Deserialize(redisResultString, this._jsonSerializerOptions)!; return this._mapper.MapFromStorageToDataModel((key, node), new() { IncludeVectors = includeVectors }); }); } @@ -253,7 +257,12 @@ public async Task UpsertAsync(TRecord record, UpsertRecordOptions? optio DatabaseName, collectionName, "SET", - () => this._mapper.MapFromDataToStorageModel(record)); + () => + { + var mapResult = this._mapper.MapFromDataToStorageModel(record); + var serializedRecord = JsonSerializer.Serialize(mapResult.Node, this._jsonSerializerOptions); + return new { Key = mapResult.Key, SerializedRecord = serializedRecord }; + }); // Upsert. var maybePrefixedKey = this.PrefixKeyIfNeeded(redisJsonRecord.Key, collectionName); @@ -265,7 +274,7 @@ await RunOperationAsync( .SetAsync( maybePrefixedKey, "$", - redisJsonRecord.Node)).ConfigureAwait(false); + redisJsonRecord.SerializedRecord)).ConfigureAwait(false); return redisJsonRecord.Key; } @@ -279,21 +288,26 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco var collectionName = this.ChooseCollectionName(options?.CollectionName); // Map. - var redisRecords = new List<(string maybePrefixedKey, string originalKey, JsonNode jsonNode)>(); + var redisRecords = new List<(string maybePrefixedKey, string originalKey, string serializedRecord)>(); foreach (var record in records) { var redisJsonRecord = VectorStoreErrorHandler.RunModelConversion( DatabaseName, collectionName, "MSET", - () => this._mapper.MapFromDataToStorageModel(record)); + () => + { + var mapResult = this._mapper.MapFromDataToStorageModel(record); + var serializedRecord = JsonSerializer.Serialize(mapResult.Node, this._jsonSerializerOptions); + return new { Key = mapResult.Key, SerializedRecord = serializedRecord }; + }); var maybePrefixedKey = this.PrefixKeyIfNeeded(redisJsonRecord.Key, collectionName); - redisRecords.Add((maybePrefixedKey, redisJsonRecord.Key, redisJsonRecord.Node)); + redisRecords.Add((maybePrefixedKey, redisJsonRecord.Key, redisJsonRecord.SerializedRecord)); } // Upsert. - var keyPathValues = redisRecords.Select(x => new KeyPathValue(x.maybePrefixedKey, "$", x.jsonNode)).ToArray(); + var keyPathValues = redisRecords.Select(x => new KeyPathValue(x.maybePrefixedKey, "$", x.serializedRecord)).ToArray(); await RunOperationAsync( collectionName, "MSET", @@ -344,7 +358,7 @@ private string ChooseCollectionName(string? operationCollectionName) } /// - /// Run the given operation and wrap any redis exceptions with ."/> + /// Run the given operation and wrap any Redis exceptions with ."/> /// /// The response type of the operation. /// The name of the collection the operation is being run on. diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs index 5cd7d5a33538..f56d22d247d8 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Text.Json; using System.Text.Json.Nodes; using Microsoft.SemanticKernel.Memory; @@ -19,21 +20,21 @@ public sealed class RedisVectorRecordStoreOptions /// /// Gets or sets a value indicating whether the collection name should be prefixed to the - /// key names before reading or writing to the redis store. Default is false. + /// key names before reading or writing to the Redis store. Default is false. /// /// - /// For a record to be indexed by a specific redis index, the key name must be prefixed with the matching prefix configured on the redis index. + /// For a record to be indexed by a specific Redis index, the key name must be prefixed with the matching prefix configured on the Redis index. /// You can either pass in keys that are already prefixed, or set this option to true to have the collection name prefixed to the key names automatically. /// public bool PrefixCollectionNameToKeyNames { get; init; } = false; /// - /// Gets or sets the choice of mapper to use when converting between the data model and the redis record. + /// Gets or sets the choice of mapper to use when converting between the data model and the Redis record. /// public RedisRecordMapperType MapperType { get; init; } = RedisRecordMapperType.Default; /// - /// Gets or sets an optional custom mapper to use when converting between the data model and the redis record. + /// Gets or sets an optional custom mapper to use when converting between the data model and the Redis record. /// /// /// Set to to use this mapper."/> @@ -49,4 +50,9 @@ public sealed class RedisVectorRecordStoreOptions /// See , and . /// public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; + + /// + /// Gets or sets the JSON serializer options to use when converting between the data model and the Redis record. + /// + public JsonSerializerOptions? jsonSerializerOptions { get; init; } = null; } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs index 664143b60080..0b9fc0c23f17 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text.Json.Serialization; using System.Text.RegularExpressions; using System.Threading.Tasks; using Azure; @@ -248,6 +249,7 @@ public class Hotel public string[] Tags { get; set; } #pragma warning restore CA1819 // Properties should not return arrays + [JsonPropertyName("parking_is_included")] [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] [VectorStoreRecordData] public bool? ParkingIncluded { get; set; } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs index 80316f1bd6fd..83109699c127 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs @@ -37,7 +37,7 @@ public QdrantVectorStoreFixture() new VectorStoreRecordKeyProperty("HotelId"), new VectorStoreRecordDataProperty("HotelName"), new VectorStoreRecordDataProperty("HotelCode"), - new VectorStoreRecordDataProperty("ParkingIncluded"), + new VectorStoreRecordDataProperty("ParkingIncluded") { StoragePropertyName = "parking_is_included" }, new VectorStoreRecordDataProperty("HotelRating"), new VectorStoreRecordDataProperty("Tags"), new VectorStoreRecordDataProperty("Description"), @@ -137,19 +137,19 @@ await this.QdrantClient.CreateCollectionAsync( { Id = 11, Vectors = new Vectors { Vectors_ = namedVectors1 }, - Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["ParkingIncluded"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } + Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["parking_is_included"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } }, new PointStruct { Id = 12, Vectors = new Vectors { Vectors_ = namedVectors2 }, - Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." } }, new PointStruct { Id = 13, Vectors = new Vectors { Vectors_ = namedVectors3 }, - Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." } }, ]; @@ -162,19 +162,19 @@ await this.QdrantClient.CreateCollectionAsync( { Id = 11, Vectors = embedding, - Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["ParkingIncluded"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } + Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["parking_is_included"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } }, new PointStruct { Id = 12, Vectors = embedding, - Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." } }, new PointStruct { Id = 13, Vectors = embedding, - Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["ParkingIncluded"] = false, ["Description"] = "This is a great hotel." } + Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." } }, ]; @@ -284,7 +284,7 @@ public record HotelInfo() public float? HotelRating { get; set; } /// A bool metadata field. - [VectorStoreRecordData] + [VectorStoreRecordData(StoragePropertyName = "parking_is_included")] public bool ParkingIncluded { get; set; } [VectorStoreRecordData] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs index 186b20a4b703..11c0ecb23317 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; +using System.Text.Json.Serialization; using System.Threading.Tasks; using Docker.DotNet; using Docker.DotNet.Models; @@ -94,14 +95,14 @@ public async Task InitializeAsync() Description = "This is a great hotel.", DescriptionEmbedding = embedding, Tags = new[] { "pool", "air conditioning", "concierge" }, - ParkingIncluded = true, + parking_is_included = true, LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), Rating = 3.6, Address = address }); - await this.Database.JSON().SetAsync("hotels:BaseSet-2", "$", new { HotelName = "My Hotel 2", HotelCode = 2, Description = "This is a great hotel.", DescriptionEmbedding = embedding, ParkingIncluded = false }); - await this.Database.JSON().SetAsync("hotels:BaseSet-3", "$", new { HotelName = "My Hotel 3", HotelCode = 3, Description = "This is a great hotel.", DescriptionEmbedding = embedding, ParkingIncluded = false }); - await this.Database.JSON().SetAsync("hotels:BaseSet-4-Invalid", "$", new { HotelId = "AnotherId", HotelName = "My Invalid Hotel", HotelCode = 4, Description = "This is an invalid hotel.", DescriptionEmbedding = embedding, ParkingIncluded = false }); + await this.Database.JSON().SetAsync("hotels:BaseSet-2", "$", new { HotelName = "My Hotel 2", HotelCode = 2, Description = "This is a great hotel.", DescriptionEmbedding = embedding, parking_is_included = false }); + await this.Database.JSON().SetAsync("hotels:BaseSet-3", "$", new { HotelName = "My Hotel 3", HotelCode = 3, Description = "This is a great hotel.", DescriptionEmbedding = embedding, parking_is_included = false }); + await this.Database.JSON().SetAsync("hotels:BaseSet-4-Invalid", "$", new { HotelId = "AnotherId", HotelName = "My Invalid Hotel", HotelCode = 4, Description = "This is an invalid hotel.", DescriptionEmbedding = embedding, parking_is_included = false }); } /// @@ -182,6 +183,7 @@ public class Hotel public string[] Tags { get; init; } #pragma warning restore CA1819 // Properties should not return arrays + [JsonPropertyName("parking_is_included")] [VectorStoreRecordData] public bool ParkingIncluded { get; init; } diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index 0d347673b370..4034ea959f22 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -6,6 +6,7 @@ using System.Diagnostics.CodeAnalysis; using System.Linq; using System.Reflection; +using System.Text.Json; using System.Text.Json.Serialization; using Microsoft.SemanticKernel.Memory; @@ -204,8 +205,11 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT var properties = FindProperties(type, supportsMultipleVectors); var definitionProperties = new List(); - definitionProperties.Add(new VectorStoreRecordKeyProperty(properties.keyProperty.Name)); + // Key property. + var keyAttribute = properties.keyProperty.GetCustomAttribute(); + definitionProperties.Add(new VectorStoreRecordKeyProperty(properties.keyProperty.Name) { StoragePropertyName = keyAttribute!.StoragePropertyName }); + // Data properties. foreach (var dataProperty in properties.dataProperties) { var dataAttribute = dataProperty.GetCustomAttribute(); @@ -215,16 +219,21 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT { HasEmbedding = dataAttribute.HasEmbedding, EmbeddingPropertyName = dataAttribute.EmbeddingPropertyName, + StoragePropertyName = dataAttribute.StoragePropertyName }); } } + // Vector properties. foreach (var vectorProperty in properties.vectorProperties) { var vectorAttribute = vectorProperty.GetCustomAttribute(); if (vectorAttribute is not null) { - definitionProperties.Add(new VectorStoreRecordVectorProperty(vectorProperty.Name)); + definitionProperties.Add(new VectorStoreRecordVectorProperty(vectorProperty.Name) + { + StoragePropertyName = vectorAttribute.StoragePropertyName + }); } } @@ -251,12 +260,89 @@ public static void VerifyPropertyTypes(List properties, HashSet - /// Get the serialized name of a property by first checking the and then falling back to the property name. + /// Get the JSON property name of a property by using the if available, otherwise + /// using the if available, otherwise falling back to the property name. + /// + /// The options used for JSON serialization. + /// The property to retrieve a storage name for. + /// The JSON storage property name. + public static string GetJsonPropertyName(JsonSerializerOptions options, PropertyInfo property) + { + var jsonPropertyNameAttribute = property.GetCustomAttribute(); + if (jsonPropertyNameAttribute is not null) + { + return jsonPropertyNameAttribute.Name; + } + + if (options.PropertyNamingPolicy is not null) + { + return options.PropertyNamingPolicy.ConvertName(property.Name); + } + + return property.Name; + } + + /// + /// Get the storage name of a property by first checking the , if one is available, + /// otherwise falling back to the attributes on the property and finally, the property name. /// - /// The property to retrieve a serialized name for. - /// The serialized name for the property. - public static string GetSerializedPropertyName(PropertyInfo property) + /// The property to retrieve a storage name for. + /// The property configuration, if available. + /// The storage name for the property. + public static string GetStoragePropertyName(PropertyInfo property, VectorStoreRecordDefinition? vectorStoreRecordDefinition) { - return property.GetCustomAttribute()?.Name ?? property.Name; + if (vectorStoreRecordDefinition is not null) + { + // First check to see if the developer configured a storage property name on the record definition. + if (vectorStoreRecordDefinition.Properties.FirstOrDefault(p => p.PropertyName == property.Name) is VectorStoreRecordProperty recordProperty && recordProperty.StoragePropertyName is not null) + { + return recordProperty.StoragePropertyName; + } + + // Otherwise, return just the property name. + return property.Name; + } + // If no definition was supplied, check the attributes. + else if (property.GetCustomAttribute() is VectorStoreRecordDataAttribute dataAttribute) + { + return dataAttribute.StoragePropertyName ?? property.Name; + } + else if (property.GetCustomAttribute() is VectorStoreRecordVectorAttribute vectorAttribute) + { + return vectorAttribute.StoragePropertyName ?? property.Name; + } + else if (property.GetCustomAttribute() is VectorStoreRecordKeyAttribute keyAttribute) + { + return keyAttribute.StoragePropertyName ?? property.Name; + } + + // Otherwise, return just the property name. + return property.Name; + } + + /// + /// Build a map of property names to the names under which they should be saved in storage, for the given properties. + /// + /// The properties to build the map for. + /// The property configuration, if available. + /// The map from property names to the names under which they should be saved in storage. + public static Dictionary BuildPropertyNameToStorageNameMap( + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties, + VectorStoreRecordDefinition? vectorStoreRecordDefinition) + { + var storagePropertyNameMap = new Dictionary(); + storagePropertyNameMap.Add(properties.keyProperty.Name, GetStoragePropertyName(properties.keyProperty, vectorStoreRecordDefinition)); + + foreach (var dataProperty in properties.dataProperties) + { + storagePropertyNameMap.Add(dataProperty.Name, GetStoragePropertyName(dataProperty, vectorStoreRecordDefinition)); + } + + foreach (var vectorProperty in properties.vectorProperties) + { + storagePropertyNameMap.Add(vectorProperty.Name, GetStoragePropertyName(vectorProperty, vectorStoreRecordDefinition)); + } + + return storagePropertyNameMap; } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs index 8c054765196c..8d72ea1d817c 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs @@ -22,4 +22,10 @@ public sealed class VectorStoreRecordDataAttribute : Attribute /// Gets or sets the name of the property that contains the embedding for this data field. /// public string? EmbeddingPropertyName { get; init; } + + /// + /// Gets or sets an optional name to use for the property in storage, if different from the property name. + /// E.g. the property name might be "MyProperty" but the storage name might be "my_property". + /// + public string? StoragePropertyName { get; set; } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs index c0a7cf5a75a1..fd56d992a1f6 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs @@ -12,4 +12,9 @@ namespace Microsoft.SemanticKernel.Memory; [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] public sealed class VectorStoreRecordKeyAttribute : Attribute { + /// + /// Gets or sets an optional name to use for the property in storage, if different from the property name. + /// E.g. the property name might be "MyProperty" but the storage name might be "my_property". + /// + public string? StoragePropertyName { get; set; } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs index 7f34448710f3..d5c0f59da103 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs @@ -12,4 +12,9 @@ namespace Microsoft.SemanticKernel.Memory; [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] public sealed class VectorStoreRecordVectorAttribute : Attribute { + /// + /// Gets or sets an optional name to use for the property in storage, if different from the property name. + /// E.g. the property name might be "MyProperty" but the storage name might be "my_property". + /// + public string? StoragePropertyName { get; set; } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs index e4ca8661c786..01056a8ef89a 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs @@ -23,4 +23,10 @@ private protected VectorStoreRecordProperty(string propertyName) /// Gets or sets the name of the property. /// public string PropertyName { get; set; } + + /// + /// Gets or sets an optional name to use for the property in storage, if different from the property name. + /// E.g. the property name might be "MyProperty" but the storage name might be "my_property". + /// + public string? StoragePropertyName { get; set; } } diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index b702f2b799a0..4faa74cf15fe 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -1,6 +1,10 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Linq; +using System.Reflection; +using System.Text.Json; +using System.Text.Json.Serialization; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.Memory; using Xunit; @@ -163,6 +167,8 @@ public void CreateVectorStoreRecordDefinitionFromTypeConvertsAllProps() Assert.True(data1.HasEmbedding); Assert.False(data2.HasEmbedding); + + Assert.Equal("Vector1", data1.EmbeddingPropertyName); } [Fact] @@ -188,6 +194,56 @@ public void VerifyPropertyTypesFailsForDisallowedTypes() Assert.Equal("Data properties must be one of the supported types: System.Int32, System.Single. Type of Data is System.String.", ex.Message); } + [Fact] + public void VerifyStoragePropertyNameMapChecksAttributeAndFallsBackToPropertyName() + { + // Arrange. + var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), true); + + // Act. + var storageNameMap = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._multiPropsDefinition); + + // Assert. + Assert.Equal(5, storageNameMap.Count); + + // From Property Names. + Assert.Equal("Key", storageNameMap["Key"]); + Assert.Equal("Data1", storageNameMap["Data1"]); + Assert.Equal("Vector1", storageNameMap["Vector1"]); + Assert.Equal("Vector2", storageNameMap["Vector2"]); + + // From storage property name on vector store record property attribute. + Assert.Equal("data_2", storageNameMap["Data2"]); + } + + [Fact] + public void VerifyGetJsonPropertyNameChecksJsonOptionsAndJsonAttributesAndFallsBackToPropertyName() + { + // Arrange. + var options = new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseUpper }; + var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), true); + var allProperties = (new PropertyInfo[] { properties.keyProperty }) + .Concat(properties.dataProperties) + .Concat(properties.vectorProperties); + + // Act. + var jsonNameMap = allProperties + .Select(p => new { PropertyName = p.Name, JsonName = VectorStoreRecordPropertyReader.GetJsonPropertyName(options, p) }) + .ToDictionary(p => p.PropertyName, p => p.JsonName); + + // Assert. + Assert.Equal(5, jsonNameMap.Count); + + // From JsonNamingPolicy. + Assert.Equal("KEY", jsonNameMap["Key"]); + Assert.Equal("DATA1", jsonNameMap["Data1"]); + Assert.Equal("DATA2", jsonNameMap["Data2"]); + Assert.Equal("VECTOR1", jsonNameMap["Vector1"]); + + // From JsonPropertyName attribute. + Assert.Equal("vector-2", jsonNameMap["Vector2"]); + } + #pragma warning disable CA1812 // Invalid unused classes error, since I am using these for testing purposes above. private sealed class NoKeyModel { @@ -266,6 +322,7 @@ private sealed class MultiPropsModel public ReadOnlyMemory Vector1 { get; set; } [VectorStoreRecordVector] + [JsonPropertyName("vector-2")] public ReadOnlyMemory Vector2 { get; set; } public string NotAnnotated { get; set; } = string.Empty; @@ -277,7 +334,7 @@ private sealed class MultiPropsModel [ new VectorStoreRecordKeyProperty("Key"), new VectorStoreRecordDataProperty("Data1") { HasEmbedding = true, EmbeddingPropertyName = "Vector1" }, - new VectorStoreRecordDataProperty("Data2"), + new VectorStoreRecordDataProperty("Data2") { StoragePropertyName = "data_2" }, new VectorStoreRecordVectorProperty("Vector1"), new VectorStoreRecordVectorProperty("Vector2") ] From 210d188854145b9c2b5475b7fb512c5d3f346b1b Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 3 Jul 2024 17:23:33 +0100 Subject: [PATCH 12/48] .Net: VectorStore: Moving all vector store interfaces to data folder and namespace. (#7081) ### Motivation and Context We agreed to put all vector store core code in the Microsoft.SementicKernel.Data namespace, so moving all abstractions into this folder structure and namespace. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../AzureAISearchVectorRecordStoreTests.cs | 2 +- .../AzureAISearchVectorRecordStore.cs | 2 +- .../AzureAISearchVectorRecordStoreOptions.cs | 2 +- .../Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs | 2 +- .../QdrantVectorRecordStoreOptions.cs | 2 +- .../QdrantVectorStoreRecordMapper.cs | 2 +- .../QdrantVectorStoreRecordMapperOptions.cs | 2 +- .../Connectors.Memory.Redis/RedisVectorRecordStore.cs | 4 ++-- .../Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs | 4 ++-- .../Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs | 2 +- .../QdrantVectorRecordStoreTests.cs | 2 +- .../QdrantVectorStoreRecordMapperTests.cs | 2 +- .../RedisVectorRecordStoreTests.cs | 2 +- .../RedisVectorStoreRecordMapperTests.cs | 2 +- .../AzureAISearch/AzureAISearchVectorRecordStoreTests.cs | 2 +- .../Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs | 2 +- .../Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs | 2 +- .../Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs | 2 +- .../Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs | 2 +- .../Connectors/Memory/Redis/RedisVectorStoreFixture.cs | 2 +- .../InternalUtilities/src/Data/VectorStoreErrorHandler.cs | 3 +-- .../src/Data/VectorStoreRecordPropertyReader.cs | 3 +-- .../{Memory => Data}/IVectorRecordStore.cs | 2 +- .../{Memory => Data}/IVectorStoreRecordMapper.cs | 5 ++++- .../RecordAttributes/VectorStoreRecordDataAttribute.cs | 2 +- .../RecordAttributes/VectorStoreRecordKeyAttribute.cs | 2 +- .../RecordAttributes/VectorStoreRecordVectorAttribute.cs | 2 +- .../RecordDefinition/VectorStoreRecordDataProperty.cs | 2 +- .../RecordDefinition/VectorStoreRecordDefinition.cs | 2 +- .../RecordDefinition/VectorStoreRecordKeyProperty.cs | 2 +- .../RecordDefinition/VectorStoreRecordProperty.cs | 2 +- .../RecordDefinition/VectorStoreRecordVectorProperty.cs | 2 +- .../{Memory => Data}/RecordOptions/DeleteRecordOptions.cs | 2 +- .../{Memory => Data}/RecordOptions/GetRecordOptions.cs | 2 +- .../{Memory => Data}/RecordOptions/UpsertRecordOptions.cs | 2 +- .../{Memory => Data}/StorageToDataModelMapperOptions.cs | 5 ++++- .../{Memory => Data}/VectorStoreException.cs | 2 +- .../{Memory => Data}/VectorStoreOperationException.cs | 2 +- .../{Memory => Data}/VectorStoreRecordMappingException.cs | 2 +- .../Data/VectorStoreRecordPropertyReaderTests.cs | 3 +-- 40 files changed, 48 insertions(+), 45 deletions(-) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/IVectorRecordStore.cs (99%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/IVectorStoreRecordMapper.cs (91%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordAttributes/VectorStoreRecordDataAttribute.cs (96%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordAttributes/VectorStoreRecordKeyAttribute.cs (94%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordAttributes/VectorStoreRecordVectorAttribute.cs (93%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordDefinition/VectorStoreRecordDataProperty.cs (97%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordDefinition/VectorStoreRecordDefinition.cs (94%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordDefinition/VectorStoreRecordKeyProperty.cs (95%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordDefinition/VectorStoreRecordProperty.cs (95%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordDefinition/VectorStoreRecordVectorProperty.cs (95%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordOptions/DeleteRecordOptions.cs (95%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordOptions/GetRecordOptions.cs (96%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/RecordOptions/UpsertRecordOptions.cs (95%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/StorageToDataModelMapperOptions.cs (80%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/VectorStoreException.cs (97%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/VectorStoreOperationException.cs (97%) rename dotnet/src/SemanticKernel.Abstractions/{Memory => Data}/VectorStoreRecordMappingException.cs (97%) diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs index 9d1bc5d0f244..f14868b498aa 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs @@ -12,7 +12,7 @@ using Azure.Search.Documents.Indexes; using Azure.Search.Documents.Models; using Microsoft.SemanticKernel.Connectors.AzureAISearch; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Moq; using Xunit; diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index a6dabed753c9..39c5166bc1ca 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -14,7 +14,7 @@ using Azure.Search.Documents; using Azure.Search.Documents.Indexes; using Azure.Search.Documents.Models; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs index 6f9e0441fb98..cf73ecbf1784 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs @@ -3,7 +3,7 @@ using System.Text.Json; using System.Text.Json.Nodes; using Azure.Search.Documents.Indexes; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs index e89492dbc78e..d9cf89944e3a 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs @@ -7,7 +7,7 @@ using System.Threading; using System.Threading.Tasks; using Grpc.Core; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Qdrant.Client; using Qdrant.Client.Grpc; diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs index d3e568057976..d16fad7c33b7 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs @@ -1,6 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Qdrant.Client.Grpc; namespace Microsoft.SemanticKernel.Connectors.Qdrant; diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index c88b7c7c3a29..1881e38293c4 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -7,7 +7,7 @@ using System.Reflection; using System.Text.Json; using System.Text.Json.Nodes; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Qdrant.Client.Grpc; namespace Microsoft.SemanticKernel.Connectors.Qdrant; diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs index c5a9ffa46865..bb6d5d837d05 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs @@ -1,6 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; namespace Microsoft.SemanticKernel.Connectors.Qdrant; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs index c80dd3a2ea0d..480230f6f65c 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -9,7 +9,7 @@ using System.Text.Json.Nodes; using System.Threading; using System.Threading.Tasks; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using NRedisStack.Json.DataTypes; using NRedisStack.RedisStackCommands; using StackExchange.Redis; @@ -76,7 +76,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< // Assign. this._database = database; this._options = options ?? new RedisVectorRecordStoreOptions(); - this._jsonSerializerOptions = this._options.jsonSerializerOptions ?? JsonSerializerOptions.Default; + this._jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; // Enumerate public properties using configuration or attributes. (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs index f56d22d247d8..3786070ef298 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs @@ -2,7 +2,7 @@ using System.Text.Json; using System.Text.Json.Nodes; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; namespace Microsoft.SemanticKernel.Connectors.Redis; @@ -54,5 +54,5 @@ public sealed class RedisVectorRecordStoreOptions /// /// Gets or sets the JSON serializer options to use when converting between the data model and the Redis record. /// - public JsonSerializerOptions? jsonSerializerOptions { get; init; } = null; + public JsonSerializerOptions? JsonSerializerOptions { get; init; } = null; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs index 61c90e5eda07..bf9c65bdccfa 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs @@ -2,7 +2,7 @@ using System.Text.Json; using System.Text.Json.Nodes; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; namespace Microsoft.SemanticKernel.Connectors.Redis; diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs index 456684daa51c..e39085e959e6 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs @@ -5,7 +5,7 @@ using System.Linq; using System.Threading; using System.Threading.Tasks; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Moq; using Qdrant.Client.Grpc; using Xunit; diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs index 8623dbab9f2b..e3ce7c19cbfd 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs @@ -4,7 +4,7 @@ using System.Collections.Generic; using System.Linq; using Microsoft.SemanticKernel.Connectors.Qdrant; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Qdrant.Client.Grpc; using Xunit; diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs index b57bdbec02b1..21438b3726e2 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs @@ -6,7 +6,7 @@ using System.Text.Json.Nodes; using System.Threading; using System.Threading.Tasks; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Moq; using NRedisStack; using StackExchange.Redis; diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs index b8d320f62876..4c8e9c5cc792 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs @@ -4,7 +4,7 @@ using System.Linq; using System.Text.Json.Nodes; using Microsoft.SemanticKernel.Connectors.Redis; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Xunit; namespace SemanticKernel.Connectors.Redis.UnitTests; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs index 948dec0a796a..7e97c97d33ae 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs @@ -7,7 +7,7 @@ using Azure; using Azure.Search.Documents.Indexes; using Microsoft.SemanticKernel.Connectors.AzureAISearch; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Xunit; using Xunit.Abstractions; using static SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch.AzureAISearchVectorStoreFixture; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs index 0b9fc0c23f17..3d94b67571bf 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs @@ -12,7 +12,7 @@ using Azure.Search.Documents.Indexes.Models; using Azure.Search.Documents.Models; using Microsoft.Extensions.Configuration; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using SemanticKernel.IntegrationTests.TestSettings.Memory; using Xunit; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs index 3864b9a9082e..13ffe798cef0 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs @@ -5,7 +5,7 @@ using System.Linq; using System.Threading.Tasks; using Microsoft.SemanticKernel.Connectors.Qdrant; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Qdrant.Client.Grpc; using Xunit; using Xunit.Abstractions; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs index 83109699c127..26c82c311228 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs @@ -6,7 +6,7 @@ using Docker.DotNet; using Docker.DotNet.Models; using Grpc.Core; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Qdrant.Client; using Qdrant.Client.Grpc; using Xunit; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs index 143a4b41c447..af1c089fb870 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs @@ -5,7 +5,7 @@ using System.Text.Json.Nodes; using System.Threading.Tasks; using Microsoft.SemanticKernel.Connectors.Redis; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Xunit; using Xunit.Abstractions; using static SemanticKernel.IntegrationTests.Connectors.Memory.Redis.RedisVectorStoreFixture; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs index 11c0ecb23317..478ad52b9f56 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs @@ -6,7 +6,7 @@ using System.Threading.Tasks; using Docker.DotNet; using Docker.DotNet.Models; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using NRedisStack.RedisStackCommands; using NRedisStack.Search; using StackExchange.Redis; diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs index aaec06207ef1..1aa2e6f479ad 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreErrorHandler.cs @@ -3,9 +3,8 @@ using System; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; -using Microsoft.SemanticKernel.Memory; -namespace Microsoft.SemanticKernel; +namespace Microsoft.SemanticKernel.Data; /// /// Contains helpers for reading vector store model properties and their attributes. diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index 4034ea959f22..4cbfe15622bc 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -8,9 +8,8 @@ using System.Reflection; using System.Text.Json; using System.Text.Json.Serialization; -using Microsoft.SemanticKernel.Memory; -namespace Microsoft.SemanticKernel; +namespace Microsoft.SemanticKernel.Data; /// /// Contains helpers for reading vector store model properties and their attributes. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorRecordStore.cs similarity index 99% rename from dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/IVectorRecordStore.cs index 1caa5fd59018..c88821ccb106 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorRecordStore.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorRecordStore.cs @@ -5,7 +5,7 @@ using System.Threading; using System.Threading.Tasks; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// An interface for adding, updating, deleting and retrieving records from a vector store. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorStoreRecordMapper.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordMapper.cs similarity index 91% rename from dotnet/src/SemanticKernel.Abstractions/Memory/IVectorStoreRecordMapper.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordMapper.cs index 92d367fc12e8..4125c4a1b3ad 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/IVectorStoreRecordMapper.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordMapper.cs @@ -1,12 +1,15 @@ // Copyright (c) Microsoft. All rights reserved. -namespace Microsoft.SemanticKernel.Memory; +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Data; /// /// Interface for mapping between a storage model, and the consumer record data model. /// /// The consumer record data model to map to or from. /// The storage model to map to or from. +[Experimental("SKEXP0001")] public interface IVectorStoreRecordMapper where TRecordDataModel : class { diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs similarity index 96% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs index 8d72ea1d817c..9df2365c78e0 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordDataAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs @@ -3,7 +3,7 @@ using System; using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// Attribute to mark a property on a record class as data. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordKeyAttribute.cs similarity index 94% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordKeyAttribute.cs index fd56d992a1f6..d85a3dff6da2 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordKeyAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordKeyAttribute.cs @@ -3,7 +3,7 @@ using System; using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// Attribute to mark a property on a record class as the key under which data is stored in a vector store. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs similarity index 93% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs index d5c0f59da103..b7f059173c20 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordAttributes/VectorStoreRecordVectorAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs @@ -3,7 +3,7 @@ using System; using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// Attribute to mark a property on a record class as the vector. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDataProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs similarity index 97% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDataProperty.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs index eb216825a56e..d7ac00b5ad4c 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDataProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs @@ -2,7 +2,7 @@ using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// A description of a data property on a record for storage in a vector store. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDefinition.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDefinition.cs similarity index 94% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDefinition.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDefinition.cs index b475663a168a..558bfc77b953 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordDefinition.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDefinition.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// A description of the properties of a record stored in a vector store, plus how the properties are used. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordKeyProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs similarity index 95% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordKeyProperty.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs index 62b4f4d172c6..51fcf5a5af3d 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordKeyProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs @@ -2,7 +2,7 @@ using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// A description of a key property on a record for storage in a vector store. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs similarity index 95% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs index 01056a8ef89a..fc500b1a2936 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs @@ -2,7 +2,7 @@ using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// A description of a property on a record for storage in a vector store. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordVectorProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs similarity index 95% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordVectorProperty.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs index 7f4a7ddd78e3..f4c90ef319cc 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordDefinition/VectorStoreRecordVectorProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs @@ -2,7 +2,7 @@ using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// A description of a vector property on a record for storage in a vector store. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs similarity index 95% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs index 357f344799b4..2169c6a5051b 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/DeleteRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs @@ -2,7 +2,7 @@ using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// Optional options when calling . diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs similarity index 96% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs index 5a7daa8a6b42..83ea8ee5b359 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/GetRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs @@ -2,7 +2,7 @@ using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// Optional options when calling . diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs similarity index 95% rename from dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs index 860630fba16c..8447b2fc82eb 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/RecordOptions/UpsertRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs @@ -2,7 +2,7 @@ using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// Optional options when calling . diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/StorageToDataModelMapperOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/StorageToDataModelMapperOptions.cs similarity index 80% rename from dotnet/src/SemanticKernel.Abstractions/Memory/StorageToDataModelMapperOptions.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/StorageToDataModelMapperOptions.cs index c350751c153d..bdee284b0f14 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/StorageToDataModelMapperOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/StorageToDataModelMapperOptions.cs @@ -1,10 +1,13 @@ // Copyright (c) Microsoft. All rights reserved. -namespace Microsoft.SemanticKernel.Memory; +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Data; /// /// Options to use with the method. /// +[Experimental("SKEXP0001")] public class StorageToDataModelMapperOptions { /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreException.cs b/dotnet/src/SemanticKernel.Abstractions/Data/VectorStoreException.cs similarity index 97% rename from dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreException.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/VectorStoreException.cs index 9c481b5b8d55..5a0183e85d83 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreException.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/VectorStoreException.cs @@ -3,7 +3,7 @@ using System; using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// Base exception type thrown for any type of failure when using vector stores. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs b/dotnet/src/SemanticKernel.Abstractions/Data/VectorStoreOperationException.cs similarity index 97% rename from dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/VectorStoreOperationException.cs index 9533982a96ae..2830c1b22646 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreOperationException.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/VectorStoreOperationException.cs @@ -3,7 +3,7 @@ using System; using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// Exception thrown when a vector store command fails, such as upserting a record or deleting a collection. diff --git a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs b/dotnet/src/SemanticKernel.Abstractions/Data/VectorStoreRecordMappingException.cs similarity index 97% rename from dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/VectorStoreRecordMappingException.cs index 6683f3412a17..6b912b233ceb 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Memory/VectorStoreRecordMappingException.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/VectorStoreRecordMappingException.cs @@ -3,7 +3,7 @@ using System; using System.Diagnostics.CodeAnalysis; -namespace Microsoft.SemanticKernel.Memory; +namespace Microsoft.SemanticKernel.Data; /// /// Exception thrown when a failure occurs while trying to convert models for storage or retrieval. diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 4faa74cf15fe..0fe82c113d35 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -5,8 +5,7 @@ using System.Reflection; using System.Text.Json; using System.Text.Json.Serialization; -using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Data; using Xunit; namespace SemanticKernel.UnitTests.Data; From 3776975f361c44483ba300aca0f43bbb950a6f44 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 8 Jul 2024 09:54:06 +0100 Subject: [PATCH 13/48] .Net: Adding volatile vector record store implementation. (#7109) ### Motivation and Context We need an in memory implementation of new vector store design, so allow simple usage of the pattern without needing to have an external database. ### Description This adds an In Memory / Volatile VectorRecordStore implementation and unit tests. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../Data/VolatileVectorRecordStore.cs | 170 +++++++++++ .../Data/VolatileVectorRecordStoreOptions.cs | 28 ++ .../Data/VolatileVectorRecordStoreTests.cs | 272 ++++++++++++++++++ 3 files changed, 470 insertions(+) create mode 100644 dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs create mode 100644 dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs create mode 100644 dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs new file mode 100644 index 000000000000..dadaa6e76937 --- /dev/null +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs @@ -0,0 +1,170 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +namespace Microsoft.SemanticKernel.Data; + +/// +/// Service for storing and retrieving vector records, that uses an in memory dictionary as the underlying storage. +/// +/// The data model to use for adding, updating and retrieving data from storage. +[Experimental("SKEXP0001")] +public sealed class VolatileVectorRecordStore : IVectorRecordStore + where TRecord : class +{ + /// Internal storage for the record store. + private readonly ConcurrentDictionary> _internalCollection; + + /// Optional configuration options for this class. + private readonly VolatileVectorRecordStoreOptions _options; + + /// A set of types that a key on the provided model may have. + private static readonly HashSet s_supportedKeyTypes = + [ + typeof(string) + ]; + + /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. + private readonly PropertyInfo _keyPropertyInfo; + + /// + /// Initializes a new instance of the class. + /// + /// Optional configuration options for this class. + public VolatileVectorRecordStore(VolatileVectorRecordStoreOptions? options = default) + { + // Assign. + this._internalCollection = new(); + this._options = options ?? new VolatileVectorRecordStoreOptions(); + + // Enumerate public properties using configuration or attributes. + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; + if (this._options.VectorStoreRecordDefinition is not null) + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: true); + } + else + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); + } + + // Validate property types and store for later use. + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + this._keyPropertyInfo = properties.keyProperty; + } + + /// + /// Initializes a new instance of the class. + /// + /// Allows passing in the dictionary used for storage, for testing purposes. + /// Optional configuration options for this class. + internal VolatileVectorRecordStore(ConcurrentDictionary> internalCollection, VolatileVectorRecordStoreOptions? options = default) + : this(options) + { + this._internalCollection = internalCollection; + } + + /// + public Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + var collectionDictionary = this.GetCollectionDictionary(options?.CollectionName); + + if (collectionDictionary.TryGetValue(key, out var record)) + { + return Task.FromResult(record); + } + + return Task.FromResult(null); + } + + /// + public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + foreach (var key in keys) + { + var record = await this.GetAsync(key, options, cancellationToken).ConfigureAwait(false); + + if (record is not null) + { + yield return record; + } + } + } + + /// + public Task DeleteAsync(string key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + { + var collectionDictionary = this.GetCollectionDictionary(options?.CollectionName); + + collectionDictionary.TryRemove(key, out _); + return Task.CompletedTask; + } + + /// + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + { + var collectionDictionary = this.GetCollectionDictionary(options?.CollectionName); + + foreach (var key in keys) + { + collectionDictionary.TryRemove(key, out _); + } + + return Task.CompletedTask; + } + + /// + public Task UpsertAsync(TRecord record, UpsertRecordOptions? options = null, CancellationToken cancellationToken = default) + { + var collectionDictionary = this.GetCollectionDictionary(options?.CollectionName); + + var key = this._keyPropertyInfo.GetValue(record) as string; + collectionDictionary.AddOrUpdate(key!, record, (key, currentValue) => record); + + return Task.FromResult(key!); + } + + /// + public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + foreach (var record in records) + { + yield return await this.UpsertAsync(record, options, cancellationToken).ConfigureAwait(false); + } + } + + /// + /// Get a collection dictionary from the internal storage, creating it if it does not exist. + /// Use the provided collection name if not null, and fall back to the default collection name otherwise. + /// + /// The collection name passed to the operation. + /// The retrieved collection dictionary. + private ConcurrentDictionary GetCollectionDictionary(string? collectionName) + { + string? chosenCollectionName = null; + + if (collectionName is not null) + { + chosenCollectionName = collectionName; + } + else if (this._options.DefaultCollectionName is not null) + { + chosenCollectionName = this._options.DefaultCollectionName; + } + else + { +#pragma warning disable CA2208 // Instantiate argument exceptions correctly + throw new ArgumentException("Collection name must be provided in the operation options, since no default was provided at construction time.", "options"); +#pragma warning restore CA2208 // Instantiate argument exceptions correctly + } + + return this._internalCollection.GetOrAdd(chosenCollectionName, _ => new()); + } +} diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs new file mode 100644 index 000000000000..80506d02ede8 --- /dev/null +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +namespace Microsoft.SemanticKernel.Data; + +/// +/// Options when creating a . +/// +[Experimental("SKEXP0001")] +public sealed class VolatileVectorRecordStoreOptions +{ + /// + /// Gets or sets the default collection name to use. + /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. + /// + public string? DefaultCollectionName { get; init; } = null; + + /// + /// Gets or sets an optional record definition that defines the schema of the record type. + /// + /// + /// If not provided, the schema will be inferred from the record model class using reflection. + /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; +} diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs new file mode 100644 index 000000000000..e6dc633bed02 --- /dev/null +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs @@ -0,0 +1,272 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Concurrent; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Data; +using Xunit; + +namespace SemanticKernel.UnitTests.Data; + +/// +/// Contains tests for the class. +/// +public class VolatileVectorRecordStoreTests +{ + private const string TestCollectionName = "testcollection"; + private const string TestRecordKey1 = "testid1"; + private const string TestRecordKey2 = "testid2"; + + private readonly CancellationToken _testCancellationToken = new(false); + + private readonly ConcurrentDictionary> _collectionStore; + + public VolatileVectorRecordStoreTests() + { + this._collectionStore = new(); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + var record = CreateModel(TestRecordKey1, withVectors: true); + var collection = new ConcurrentDictionary(); + collection.TryAdd(TestRecordKey1, record); + this._collectionStore.TryAdd(TestCollectionName, collection); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + var actual = await sut.GetAsync( + TestRecordKey1, + new() + { + IncludeVectors = true, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + var expectedArgs = new object[] { TestRecordKey1 }; + + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data testid1", actual.Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + var record1 = CreateModel(TestRecordKey1, withVectors: true); + var record2 = CreateModel(TestRecordKey2, withVectors: true); + var collection = new ConcurrentDictionary(); + collection.TryAdd(TestRecordKey1, record1); + collection.TryAdd(TestRecordKey2, record2); + this._collectionStore.TryAdd(TestCollectionName, collection); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + var actual = await sut.GetBatchAsync( + [TestRecordKey1, TestRecordKey2], + new() + { + IncludeVectors = true, + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken).ToListAsync(); + + // Assert + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(TestRecordKey1, actual[0].Key); + Assert.Equal("data testid1", actual[0].Data); + Assert.Equal(TestRecordKey2, actual[1].Key); + Assert.Equal("data testid2", actual[1].Data); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteRecordAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + var record1 = CreateModel(TestRecordKey1, withVectors: true); + var record2 = CreateModel(TestRecordKey2, withVectors: true); + var collection = new ConcurrentDictionary(); + collection.TryAdd(TestRecordKey1, record1); + collection.TryAdd(TestRecordKey2, record2); + this._collectionStore.TryAdd(TestCollectionName, collection); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + await sut.DeleteAsync( + TestRecordKey1, + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + Assert.False(collection.ContainsKey(TestRecordKey1)); + Assert.True(collection.ContainsKey(TestRecordKey2)); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + var record1 = CreateModel(TestRecordKey1, withVectors: true); + var record2 = CreateModel(TestRecordKey2, withVectors: true); + var collection = new ConcurrentDictionary(); + collection.TryAdd(TestRecordKey1, record1); + collection.TryAdd(TestRecordKey2, record2); + this._collectionStore.TryAdd(TestCollectionName, collection); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + await sut.DeleteBatchAsync( + [TestRecordKey1, TestRecordKey2], + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + Assert.False(collection.ContainsKey(TestRecordKey1)); + Assert.False(collection.ContainsKey(TestRecordKey2)); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + var record1 = CreateModel(TestRecordKey1, withVectors: true); + var collection = new ConcurrentDictionary(); + this._collectionStore.TryAdd(TestCollectionName, collection); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + var upsertResult = await sut.UpsertAsync( + record1, + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken); + + // Assert + Assert.Equal(TestRecordKey1, upsertResult); + Assert.True(collection.ContainsKey(TestRecordKey1)); + Assert.Equal("data testid1", collection[TestRecordKey1].Data); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollectionToMethod) + { + // Arrange + var record1 = CreateModel(TestRecordKey1, withVectors: true); + var record2 = CreateModel(TestRecordKey2, withVectors: true); + + var collection = new ConcurrentDictionary(); + this._collectionStore.TryAdd(TestCollectionName, collection); + + var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + + // Act + var actual = await sut.UpsertBatchAsync( + [record1, record2], + new() + { + CollectionName = passCollectionToMethod ? TestCollectionName : null + }, + this._testCancellationToken).ToListAsync(); + + // Assert + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(TestRecordKey1, actual[0]); + Assert.Equal(TestRecordKey2, actual[1]); + + Assert.True(collection.ContainsKey(TestRecordKey1)); + Assert.Equal("data testid1", collection[TestRecordKey1].Data); + } + + private static SinglePropsModel CreateModel(string key, bool withVectors) + { + return new SinglePropsModel + { + Key = key, + Data = "data " + key, + Vector = withVectors ? new float[] { 1, 2, 3, 4 } : null, + NotAnnotated = null, + }; + } + + private VolatileVectorRecordStore CreateVectorRecordStore(bool useDefinition, bool passCollectionToMethod) + { + return new VolatileVectorRecordStore( + this._collectionStore, + new() + { + DefaultCollectionName = passCollectionToMethod ? null : TestCollectionName, + VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null + }); + } + + private readonly VectorStoreRecordDefinition _singlePropsDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key"), + new VectorStoreRecordDataProperty("Data"), + new VectorStoreRecordVectorProperty("Vector") + ] + }; + + public sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector] + public ReadOnlyMemory? Vector { get; set; } + + public string? NotAnnotated { get; set; } + } +} From 21c94a7ff32430fd9c805284a34af2b28cd40a83 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 10 Jul 2024 16:26:00 +0100 Subject: [PATCH 14/48] .Net: Switching all vector record store instances to be tied to a single collection (#7178) ### Motivation and Context As part of the new vector store design, we have decided to follow a pattern where each vector record store instance is tied to a single collection and we will have a separate factory interface for getting instances by collection name. ### Description - Removing optional collection names from all options classes. - Adding mandatory collection name to vector record store constructors. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../AzureAISearchVectorRecordStoreTests.cs | 108 +++++-------- .../AzureAISearchVectorRecordStore.cs | 128 ++++----------- .../AzureAISearchVectorRecordStoreOptions.cs | 6 - .../QdrantVectorRecordStore.cs | 109 +++++-------- .../QdrantVectorRecordStoreOptions.cs | 6 - .../RedisVectorRecordStore.cs | 79 +++------- .../RedisVectorRecordStoreOptions.cs | 6 - .../QdrantVectorRecordStoreTests.cs | 148 ++++++------------ .../RedisVectorRecordStoreTests.cs | 142 +++++------------ .../AzureAISearchVectorRecordStoreTests.cs | 40 ++--- .../Qdrant/QdrantVectorRecordStoreTests.cs | 31 ++-- .../Redis/RedisVectorRecordStoreTests.cs | 31 ++-- .../Data/RecordOptions/DeleteRecordOptions.cs | 7 +- .../Data/RecordOptions/GetRecordOptions.cs | 6 - .../Data/RecordOptions/UpsertRecordOptions.cs | 7 +- .../Data/VolatileVectorRecordStore.cs | 47 +++--- .../Data/VolatileVectorRecordStoreOptions.cs | 6 - .../Data/VolatileVectorRecordStoreTests.cs | 94 ++++------- 18 files changed, 312 insertions(+), 689 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs index f14868b498aa..8994a8271e81 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs @@ -40,11 +40,9 @@ public AzureAISearchVectorRecordStoreTests() } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition) { // Arrange. this._searchClientMock.Setup( @@ -54,16 +52,12 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passColl this._testCancellationToken)) .ReturnsAsync(Response.FromValue(CreateModel(TestRecordKey1, true), Mock.Of())); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act. var actual = await sut.GetAsync( TestRecordKey1, - new() - { - IncludeVectors = true, - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, + new() { IncludeVectors = true }, this._testCancellationToken); // Assert. @@ -74,11 +68,9 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passColl } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) { // Arrange. var storageObject = JsonSerializer.SerializeToNode(CreateModel(TestRecordKey1, false))!.AsObject(); @@ -90,16 +82,12 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool passC this._testCancellationToken)) .ReturnsAsync(Response.FromValue(CreateModel(TestRecordKey1, true), Mock.Of())); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act. var actual = await sut.GetAsync( TestRecordKey1, - new() - { - IncludeVectors = false, - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, + new() { IncludeVectors = false }, this._testCancellationToken); // Assert. @@ -109,11 +97,9 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool passC } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange. this._searchClientMock.Setup( @@ -126,16 +112,12 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool pas return Response.FromValue(CreateModel(id, true), Mock.Of()); }); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act. var actual = await sut.GetBatchAsync( [TestRecordKey1, TestRecordKey2], - new() - { - IncludeVectors = true, - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, + new() { IncludeVectors = true }, this._testCancellationToken).ToListAsync(); // Assert. @@ -170,9 +152,9 @@ public async Task CanGetRecordWithCustomMapperAsync() // Arrange target with custom mapper. var sut = new AzureAISearchVectorRecordStore( this._searchIndexClientMock.Object, + TestCollectionName, new() { - DefaultCollectionName = TestCollectionName, MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = mapperMock.Object }); @@ -188,11 +170,9 @@ public async Task CanGetRecordWithCustomMapperAsync() } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanDeleteRecordAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanDeleteRecordAsync(bool useDefinition) { // Arrange. #pragma warning disable Moq1002 // Moq: No matching constructor @@ -207,13 +187,12 @@ public async Task CanDeleteRecordAsync(bool useDefinition, bool passCollectionTo this._testCancellationToken)) .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act. await sut.DeleteAsync( TestRecordKey1, - new() { CollectionName = passCollectionToMethod ? TestCollectionName : null }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert. this._searchClientMock.Verify( @@ -226,11 +205,9 @@ await sut.DeleteAsync( } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange. #pragma warning disable Moq1002 // Moq: No matching constructor @@ -245,13 +222,12 @@ public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition, bool this._testCancellationToken)) .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act. await sut.DeleteBatchAsync( [TestRecordKey1, TestRecordKey2], - new() { CollectionName = passCollectionToMethod ? TestCollectionName : null }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert. this._searchClientMock.Verify( @@ -264,11 +240,9 @@ await sut.DeleteBatchAsync( } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanUpsertRecordAsync(bool useDefinition) { // Arrange upload result object. #pragma warning disable Moq1002 // Moq: No matching constructor @@ -287,15 +261,14 @@ public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionTo .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); // Arrange sut. - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); var model = CreateModel(TestRecordKey1, true); // Act. var actual = await sut.UpsertAsync( model, - new() { CollectionName = passCollectionToMethod ? TestCollectionName : null }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert. Assert.NotNull(actual); @@ -309,11 +282,9 @@ public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionTo } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanUpsertManyRecordsAsync(bool useDefinition) { // Arrange upload result object. #pragma warning disable Moq1002 // Moq: No matching constructor @@ -335,7 +306,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollect .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); // Arrange sut. - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); var model1 = CreateModel(TestRecordKey1, true); var model2 = CreateModel(TestRecordKey2, true); @@ -343,8 +314,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollect // Act. var actual = await sut.UpsertBatchAsync( [model1, model2], - new() { CollectionName = passCollectionToMethod ? TestCollectionName : null }, - this._testCancellationToken).ToListAsync(); + cancellationToken: this._testCancellationToken).ToListAsync(); // Assert. Assert.NotNull(actual); @@ -396,9 +366,9 @@ public async Task CanUpsertRecordWithCustomMapperAsync() // Arrange target with custom mapper. var sut = new AzureAISearchVectorRecordStore( this._searchIndexClientMock.Object, + TestCollectionName, new() { - DefaultCollectionName = TestCollectionName, MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = mapperMock.Object }); @@ -416,13 +386,13 @@ await sut.UpsertAsync( Times.Once); } - private AzureAISearchVectorRecordStore CreateVectorRecordStore(bool useDefinition, bool passCollectionToMethod) + private AzureAISearchVectorRecordStore CreateVectorRecordStore(bool useDefinition) { return new AzureAISearchVectorRecordStore( this._searchIndexClientMock.Object, + TestCollectionName, new() { - DefaultCollectionName = passCollectionToMethod ? null : TestCollectionName, VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null }); } diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index 39c5166bc1ca..785d18fa6f7a 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. using System; -using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; using System.Reflection; @@ -49,15 +48,18 @@ public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. private readonly SearchIndexClient _searchIndexClient; - /// The name of the key field for the collections that this class is used with. - private readonly string _keyPropertyName; + /// Azure AI Search client that can be used to manage data in an Azure AI Search Service index. + private readonly SearchClient _searchClient; - /// Azure AI Search clients that can be used to manage data in an Azure AI Search Service index. - private readonly ConcurrentDictionary _searchClientsByIndex = new(); + /// The name of the collection that this will access. + private readonly string _collectionName; /// Optional configuration options for this class. private readonly AzureAISearchVectorRecordStoreOptions _options; + /// The name of the key field for the collections that this class is used with. + private readonly string _keyPropertyName; + /// The names of all non vector fields on the current model. private readonly List _nonVectorPropertyNames; @@ -65,17 +67,21 @@ public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore /// Initializes a new instance of the class. /// /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. + /// The name of the collection that this will access. /// Optional configuration options for this class. /// Thrown when is null. /// Thrown when options are misconfigured. - public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, AzureAISearchVectorRecordStoreOptions? options = default) + public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, string collectionName, AzureAISearchVectorRecordStoreOptions? options = default) { // Verify. Verify.NotNull(searchIndexClient); + Verify.NotNullOrWhiteSpace(collectionName); // Assign. this._searchIndexClient = searchIndexClient; + this._collectionName = collectionName; this._options = options ?? new AzureAISearchVectorRecordStoreOptions(); + this._searchClient = this._searchIndexClient.GetSearchClient(collectionName); // Verify custom mapper. if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper && this._options.JsonObjectCustomMapper is null) @@ -111,12 +117,10 @@ public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, Azure // Create Options. var innerOptions = this.ConvertGetDocumentOptions(options); - var collectionName = this.ChooseCollectionName(options?.CollectionName); var includeVectors = options?.IncludeVectors ?? false; // Get record. - var searchClient = this.GetSearchClient(collectionName); - return this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, includeVectors, innerOptions, cancellationToken); + return this.GetDocumentAndMapToDataModelAsync(key, includeVectors, innerOptions, cancellationToken); } /// @@ -126,12 +130,10 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G // Create Options var innerOptions = this.ConvertGetDocumentOptions(options); - var collectionName = this.ChooseCollectionName(options?.CollectionName); var includeVectors = options?.IncludeVectors ?? false; // Get records in parallel. - var searchClient = this.GetSearchClient(collectionName); - var tasks = keys.Select(key => this.GetDocumentAndMapToDataModelAsync(searchClient, collectionName, key, includeVectors, innerOptions, cancellationToken)); + var tasks = keys.Select(key => this.GetDocumentAndMapToDataModelAsync(key, includeVectors, innerOptions, cancellationToken)); var results = await Task.WhenAll(tasks).ConfigureAwait(false); foreach (var result in results) { @@ -147,15 +149,10 @@ public Task DeleteAsync(string key, DeleteRecordOptions? options = default, Canc { Verify.NotNullOrWhiteSpace(key); - // Create options. - var collectionName = this.ChooseCollectionName(options?.CollectionName); - // Remove record. - var searchClient = this.GetSearchClient(collectionName); - return RunOperationAsync( - collectionName, + return this.RunOperationAsync( "DeleteDocuments", - () => searchClient.DeleteDocumentsAsync(this._keyPropertyName, [key], new IndexDocumentsOptions(), cancellationToken)); + () => this._searchClient.DeleteDocumentsAsync(this._keyPropertyName, [key], new IndexDocumentsOptions(), cancellationToken)); } /// @@ -163,15 +160,10 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? opti { Verify.NotNull(keys); - // Create options. - var collectionName = this.ChooseCollectionName(options?.CollectionName); - // Remove records. - var searchClient = this.GetSearchClient(collectionName); - return RunOperationAsync( - collectionName, + return this.RunOperationAsync( "DeleteDocuments", - () => searchClient.DeleteDocumentsAsync(this._keyPropertyName, keys, new IndexDocumentsOptions(), cancellationToken)); + () => this._searchClient.DeleteDocumentsAsync(this._keyPropertyName, keys, new IndexDocumentsOptions(), cancellationToken)); } /// @@ -180,12 +172,10 @@ public async Task UpsertAsync(TRecord record, UpsertRecordOptions? optio Verify.NotNull(record); // Create options. - var collectionName = this.ChooseCollectionName(options?.CollectionName); var innerOptions = new IndexDocumentsOptions { ThrowOnAnyError = true }; // Upsert record. - var searchClient = this.GetSearchClient(collectionName); - var results = await this.MapToStorageModelAndUploadDocumentAsync(searchClient, collectionName, [record], innerOptions, cancellationToken).ConfigureAwait(false); + var results = await this.MapToStorageModelAndUploadDocumentAsync([record], innerOptions, cancellationToken).ConfigureAwait(false); return results.Value.Results[0].Key; } @@ -195,12 +185,10 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco Verify.NotNull(records); // Create Options - var collectionName = this.ChooseCollectionName(options?.CollectionName); var innerOptions = new IndexDocumentsOptions { ThrowOnAnyError = true }; // Upsert records - var searchClient = this.GetSearchClient(collectionName); - var results = await this.MapToStorageModelAndUploadDocumentAsync(searchClient, collectionName, records, innerOptions, cancellationToken).ConfigureAwait(false); + var results = await this.MapToStorageModelAndUploadDocumentAsync(records, innerOptions, cancellationToken).ConfigureAwait(false); // Get results var resultKeys = results.Value.Results.Select(x => x.Key).ToList(); @@ -210,16 +198,12 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco /// /// Get the document with the given key and map it to the data model using the configured mapper type. /// - /// The search client to use when fetching the document. - /// The name of the collection to retrieve the record from. /// The key of the record to get. /// A value indicating whether to include vectors in the result or not. /// The Azure AI Search sdk options for getting a document. /// The to monitor for cancellation requests. The default is . /// The retrieved document, mapped to the consumer data model. private async Task GetDocumentAndMapToDataModelAsync( - SearchClient searchClient, - string collectionName, string key, bool includeVectors, GetDocumentOptions innerOptions, @@ -230,10 +214,9 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco // Use the user provided mapper. if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) { - var jsonObject = await RunOperationAsync( - collectionName, + var jsonObject = await this.RunOperationAsync( OperationName, - () => GetDocumentWithNotFoundHandlingAsync(searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); + () => GetDocumentWithNotFoundHandlingAsync(this._searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); if (jsonObject is null) { @@ -242,30 +225,25 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco return VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, OperationName, () => this._options.JsonObjectCustomMapper!.MapFromStorageToDataModel(jsonObject, new() { IncludeVectors = includeVectors })); } // Use the built in Azure AI Search mapper. - return await RunOperationAsync( - collectionName, + return await this.RunOperationAsync( OperationName, - () => GetDocumentWithNotFoundHandlingAsync(searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); + () => GetDocumentWithNotFoundHandlingAsync(this._searchClient, key, innerOptions, cancellationToken)).ConfigureAwait(false); } /// /// Map the data model to the storage model and upload the document. /// - /// The search client to use when uploading the document. - /// The name of the collection to upsert the records to. /// The records to upload. /// The Azure AI Search sdk options for uploading a document. /// The to monitor for cancellation requests. The default is . /// The document upload result. private Task> MapToStorageModelAndUploadDocumentAsync( - SearchClient searchClient, - string collectionName, IEnumerable records, IndexDocumentsOptions innerOptions, CancellationToken cancellationToken) @@ -277,58 +255,19 @@ private Task> MapToStorageModelAndUploadDocumentA { var jsonObjects = VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, OperationName, () => records.Select(this._options.JsonObjectCustomMapper!.MapFromDataToStorageModel)); - return RunOperationAsync( - collectionName, + return this.RunOperationAsync( OperationName, - () => searchClient.UploadDocumentsAsync(jsonObjects, innerOptions, cancellationToken)); + () => this._searchClient.UploadDocumentsAsync(jsonObjects, innerOptions, cancellationToken)); } // Use the built in Azure AI Search mapper. - return RunOperationAsync( - collectionName, + return this.RunOperationAsync( OperationName, - () => searchClient.UploadDocumentsAsync(records, innerOptions, cancellationToken)); - } - - /// - /// Get a search client for the index specified. - /// Note: the index might not exist, but we avoid checking everytime and the extra latency. - /// - /// Index name - /// Search client ready to read/write - private SearchClient GetSearchClient(string indexName) - { - // Check the local cache first, if not found create a new one. - if (!this._searchClientsByIndex.TryGetValue(indexName, out SearchClient? client)) - { - client = this._searchIndexClient.GetSearchClient(indexName); - this._searchClientsByIndex[indexName] = client; - } - - return client; - } - - /// - /// Choose the right collection name to use for the operation by using the one provided - /// as part of the operation options, or the default one provided at construction time. - /// - /// The collection name provided on the operation options. - /// The collection name to use. - private string ChooseCollectionName(string? operationCollectionName) - { - var collectionName = operationCollectionName ?? this._options.DefaultCollectionName; - if (collectionName is null) - { -#pragma warning disable CA2208 // Instantiate argument exceptions correctly - throw new ArgumentException("Collection name must be provided in the operation options, since no default was provided at construction time.", "options"); -#pragma warning restore CA2208 // Instantiate argument exceptions correctly - } - - return collectionName; + () => this._searchClient.UploadDocumentsAsync(records, innerOptions, cancellationToken)); } /// @@ -376,11 +315,10 @@ private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options) /// Run the given operation and wrap any with ."/> /// /// The response type of the operation. - /// The name of the collection the operation is being run on. /// The type of database operation being run. /// The operation to run. /// The result of the operation. - private static async Task RunOperationAsync(string collectionName, string operationName, Func> operation) + private async Task RunOperationAsync(string operationName, Func> operation) { try { @@ -391,7 +329,7 @@ private static async Task RunOperationAsync(string collectionName, string throw new VectorStoreOperationException("Call to vector store failed.", ex) { VectorStoreType = DatabaseName, - CollectionName = collectionName, + CollectionName = this._collectionName, OperationName = operationName }; } @@ -400,7 +338,7 @@ private static async Task RunOperationAsync(string collectionName, string throw new VectorStoreOperationException("Call to vector store failed.", ex) { VectorStoreType = DatabaseName, - CollectionName = collectionName, + CollectionName = this._collectionName, OperationName = operationName }; } diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs index cf73ecbf1784..2f9ca0257132 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs @@ -13,12 +13,6 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; public sealed class AzureAISearchVectorRecordStoreOptions where TRecord : class { - /// - /// Gets or sets the default collection name to use. - /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. - /// - public string? DefaultCollectionName { get; init; } = null; - /// /// Gets or sets the choice of mapper to use when converting between the data model and the Azure AI Search record. /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs index d9cf89944e3a..63d38d3f6e9c 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs @@ -32,6 +32,9 @@ public sealed class QdrantVectorRecordStore : IVectorRecordStoreQdrant client that can be used to manage the collections and points in a Qdrant store. private readonly MockableQdrantClient _qdrantClient; + /// The name of the collection that this will access. + private readonly string _collectionName; + /// Optional configuration options for this class. private readonly QdrantVectorRecordStoreOptions _options; @@ -42,11 +45,12 @@ public sealed class QdrantVectorRecordStore : IVectorRecordStore class. /// /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + /// The name of the collection that this will access. /// Optional configuration options for this class. /// Thrown if the is null. /// Thrown for any misconfigured options. - public QdrantVectorRecordStore(QdrantClient qdrantClient, QdrantVectorRecordStoreOptions? options = null) - : this(new MockableQdrantClient(qdrantClient), options) + public QdrantVectorRecordStore(QdrantClient qdrantClient, string collectionName, QdrantVectorRecordStoreOptions? options = null) + : this(new MockableQdrantClient(qdrantClient), collectionName, options) { } @@ -54,16 +58,19 @@ public QdrantVectorRecordStore(QdrantClient qdrantClient, QdrantVectorRecordStor /// Initializes a new instance of the class. /// /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + /// The name of the collection that this will access. /// Optional configuration options for this class. /// Thrown if the is null. /// Thrown for any misconfigured options. - internal QdrantVectorRecordStore(MockableQdrantClient qdrantClient, QdrantVectorRecordStoreOptions? options = null) + internal QdrantVectorRecordStore(MockableQdrantClient qdrantClient, string collectionName, QdrantVectorRecordStoreOptions? options = null) { // Verify. Verify.NotNull(qdrantClient); + Verify.NotNullOrWhiteSpace(collectionName); // Assign. this._qdrantClient = qdrantClient; + this._collectionName = collectionName; this._options = options ?? new QdrantVectorRecordStoreOptions(); // Assign Mapper. @@ -123,12 +130,10 @@ public Task DeleteAsync(ulong key, DeleteRecordOptions? options = null, Cancella { Verify.NotNull(key); - var collectionName = this.ChooseCollectionName(options?.CollectionName); - return RunOperationAsync( - collectionName, + return this.RunOperationAsync( DeleteName, () => this._qdrantClient.DeleteAsync( - collectionName, + this._collectionName, key, wait: true, cancellationToken: cancellationToken)); @@ -139,12 +144,10 @@ public Task DeleteAsync(Guid key, DeleteRecordOptions? options = null, Cancellat { Verify.NotNull(key); - var collectionName = this.ChooseCollectionName(options?.CollectionName); - return RunOperationAsync( - collectionName, + return this.RunOperationAsync( DeleteName, () => this._qdrantClient.DeleteAsync( - collectionName, + this._collectionName, key, wait: true, cancellationToken: cancellationToken)); @@ -155,12 +158,10 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? optio { Verify.NotNull(keys); - var collectionName = this.ChooseCollectionName(options?.CollectionName); - return RunOperationAsync( - collectionName, + return this.RunOperationAsync( DeleteName, () => this._qdrantClient.DeleteAsync( - collectionName, + this._collectionName, keys.ToList(), wait: true, cancellationToken: cancellationToken)); @@ -171,12 +172,10 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? option { Verify.NotNull(keys); - var collectionName = this.ChooseCollectionName(options?.CollectionName); - return RunOperationAsync( - collectionName, + return this.RunOperationAsync( DeleteName, () => this._qdrantClient.DeleteAsync( - collectionName, + this._collectionName, keys.ToList(), wait: true, cancellationToken: cancellationToken)); @@ -187,21 +186,17 @@ public async Task UpsertAsync(TRecord record, UpsertRecordOptions? option { Verify.NotNull(record); - // Create options. - var collectionName = this.ChooseCollectionName(options?.CollectionName); - // Create point from record. var pointStruct = VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, UpsertName, () => this._mapper.MapFromDataToStorageModel(record)); // Upsert. - await RunOperationAsync( - collectionName, + await this.RunOperationAsync( UpsertName, - () => this._qdrantClient.UpsertAsync(collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); + () => this._qdrantClient.UpsertAsync(this._collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); return pointStruct.Id.Num; } @@ -210,21 +205,17 @@ async Task IVectorRecordStore.UpsertAsync(TRecord record, U { Verify.NotNull(record); - // Create options. - var collectionName = this.ChooseCollectionName(options?.CollectionName); - // Create point from record. var pointStruct = VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, UpsertName, () => this._mapper.MapFromDataToStorageModel(record)); // Upsert. - await RunOperationAsync( - collectionName, + await this.RunOperationAsync( UpsertName, - () => this._qdrantClient.UpsertAsync(collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); + () => this._qdrantClient.UpsertAsync(this._collectionName, [pointStruct], true, cancellationToken: cancellationToken)).ConfigureAwait(false); return Guid.Parse(pointStruct.Id.Uuid); } @@ -233,21 +224,17 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable recor { Verify.NotNull(records); - // Create Options - var collectionName = this.ChooseCollectionName(options?.CollectionName); - // Create points from records. var pointStructs = VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, UpsertName, () => records.Select(this._mapper.MapFromDataToStorageModel).ToList()); // Upsert. - await RunOperationAsync( - collectionName, + await this.RunOperationAsync( UpsertName, - () => this._qdrantClient.UpsertAsync(collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); + () => this._qdrantClient.UpsertAsync(this._collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); foreach (var pointStruct in pointStructs) { @@ -260,21 +247,17 @@ async IAsyncEnumerable IVectorRecordStore.UpsertBatchAsync( { Verify.NotNull(records); - // Create Options - var collectionName = this.ChooseCollectionName(options?.CollectionName); - // Create points from records. var pointStructs = VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, UpsertName, () => records.Select(this._mapper.MapFromDataToStorageModel).ToList()); // Upsert. - await RunOperationAsync( - collectionName, + await this.RunOperationAsync( UpsertName, - () => this._qdrantClient.UpsertAsync(collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); + () => this._qdrantClient.UpsertAsync(this._collectionName, pointStructs, true, cancellationToken: cancellationToken)).ConfigureAwait(false); foreach (var pointStruct in pointStructs) { @@ -300,15 +283,13 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( Verify.NotNull(keys); // Create options. - var collectionName = this.ChooseCollectionName(options?.CollectionName); var pointsIds = keys.Select(key => keyConverter(key)).ToArray(); var includeVectors = options?.IncludeVectors ?? false; // Retrieve data points. - var retrievedPoints = await RunOperationAsync( - collectionName, + var retrievedPoints = await this.RunOperationAsync( OperationName, - () => this._qdrantClient.RetrieveAsync(collectionName, pointsIds, true, includeVectors, cancellationToken: cancellationToken)).ConfigureAwait(false); + () => this._qdrantClient.RetrieveAsync(this._collectionName, pointsIds, true, includeVectors, cancellationToken: cancellationToken)).ConfigureAwait(false); // Convert the retrieved points to the target data model. foreach (var retrievedPoint in retrievedPoints) @@ -327,40 +308,20 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( yield return VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, OperationName, () => this._mapper.MapFromStorageToDataModel(pointStruct, new() { IncludeVectors = includeVectors })); } } - /// - /// Choose the right collection name to use for the operation by using the one provided - /// as part of the operation options, or the default one provided at construction time. - /// - /// The collection name provided on the operation options. - /// The collection name to use. - private string ChooseCollectionName(string? operationCollectionName) - { - var collectionName = operationCollectionName ?? this._options.DefaultCollectionName; - if (collectionName is null) - { -#pragma warning disable CA2208 // Instantiate argument exceptions correctly - throw new ArgumentException("Collection name must be provided in the operation options, since no default was provided at construction time.", "options"); -#pragma warning restore CA2208 // Instantiate argument exceptions correctly - } - - return collectionName; - } - /// /// Run the given operation and wrap any with ."/> /// /// The response type of the operation. - /// The name of the collection the operation is being run on. /// The type of database operation being run. /// The operation to run. /// The result of the operation. - private static async Task RunOperationAsync(string collectionName, string operationName, Func> operation) + private async Task RunOperationAsync(string operationName, Func> operation) { try { @@ -371,7 +332,7 @@ private static async Task RunOperationAsync(string collectionName, string throw new VectorStoreOperationException("Call to vector store failed.", ex) { VectorStoreType = DatabaseName, - CollectionName = collectionName, + CollectionName = this._collectionName, OperationName = operationName }; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs index d16fad7c33b7..2eb6f6c3f53c 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs @@ -11,12 +11,6 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant; public sealed class QdrantVectorRecordStoreOptions where TRecord : class { - /// - /// Gets or sets the default collection name to use. - /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. - /// - public string? DefaultCollectionName { get; init; } = null; - /// /// Gets or sets a value indicating whether the vectors in the store are named and multiple vectors are supported, or whether there is just a single unnamed vector per qdrant point. /// Defaults to single vector per point. diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs index 480230f6f65c..c82ec9e8d296 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs @@ -44,6 +44,9 @@ public sealed class RedisVectorRecordStore : IVectorRecordStoreThe Redis database to read/write records from. private readonly IDatabase _database; + /// The name of the collection that this will access. + private readonly string _collectionName; + /// Optional configuration options for this class. private readonly RedisVectorRecordStoreOptions _options; @@ -66,15 +69,18 @@ public sealed class RedisVectorRecordStore : IVectorRecordStore class. /// /// The Redis database to read/write records from. + /// The name of the collection that this will access. /// Optional configuration options for this class. /// Throw when parameters are invalid. - public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions? options = null) + public RedisVectorRecordStore(IDatabase database, string collectionName, RedisVectorRecordStoreOptions? options = null) { // Verify. Verify.NotNull(database); + Verify.NotNullOrWhiteSpace(collectionName); // Assign. this._database = database; + this._collectionName = collectionName; this._options = options ?? new RedisVectorRecordStoreOptions(); this._jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; @@ -123,13 +129,11 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< Verify.NotNullOrWhiteSpace(key); // Create Options - var collectionName = this.ChooseCollectionName(options?.CollectionName); - var maybePrefixedKey = this.PrefixKeyIfNeeded(key, collectionName); + var maybePrefixedKey = this.PrefixKeyIfNeeded(key); var includeVectors = options?.IncludeVectors ?? false; // Get the Redis value. - var redisResult = await RunOperationAsync( - collectionName, + var redisResult = await this.RunOperationAsync( "GET", () => options?.IncludeVectors is true ? this._database @@ -155,7 +159,7 @@ public RedisVectorRecordStore(IDatabase database, RedisVectorRecordStoreOptions< // Convert to the caller's data model. return VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, "GET", () => { @@ -171,14 +175,12 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G var keysList = keys.ToList(); // Create Options - var collectionName = this.ChooseCollectionName(options?.CollectionName); - var maybePrefixedKeys = keysList.Select(key => this.PrefixKeyIfNeeded(key, collectionName)); + var maybePrefixedKeys = keysList.Select(key => this.PrefixKeyIfNeeded(key)); var redisKeys = maybePrefixedKeys.Select(x => new RedisKey(x)).ToArray(); var includeVectors = options?.IncludeVectors ?? false; // Get the list of Redis results. - var redisResults = await RunOperationAsync( - collectionName, + var redisResults = await this.RunOperationAsync( "MGET", () => this._database .JSON() @@ -206,7 +208,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G // Convert to the caller's data model. yield return VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, "MGET", () => { @@ -222,12 +224,10 @@ public Task DeleteAsync(string key, DeleteRecordOptions? options = default, Canc Verify.NotNullOrWhiteSpace(key); // Create Options - var collectionName = this.ChooseCollectionName(options?.CollectionName); - var maybePrefixedKey = this.PrefixKeyIfNeeded(key, collectionName); + var maybePrefixedKey = this.PrefixKeyIfNeeded(key); // Remove. - return RunOperationAsync( - collectionName, + return this.RunOperationAsync( "DEL", () => this._database .JSON() @@ -249,13 +249,10 @@ public async Task UpsertAsync(TRecord record, UpsertRecordOptions? optio { Verify.NotNull(record); - // Create Options - var collectionName = this.ChooseCollectionName(options?.CollectionName); - // Map. var redisJsonRecord = VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, "SET", () => { @@ -265,9 +262,8 @@ public async Task UpsertAsync(TRecord record, UpsertRecordOptions? optio }); // Upsert. - var maybePrefixedKey = this.PrefixKeyIfNeeded(redisJsonRecord.Key, collectionName); - await RunOperationAsync( - collectionName, + var maybePrefixedKey = this.PrefixKeyIfNeeded(redisJsonRecord.Key); + await this.RunOperationAsync( "SET", () => this._database .JSON() @@ -284,16 +280,13 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco { Verify.NotNull(records); - // Create Options - var collectionName = this.ChooseCollectionName(options?.CollectionName); - // Map. var redisRecords = new List<(string maybePrefixedKey, string originalKey, string serializedRecord)>(); foreach (var record in records) { var redisJsonRecord = VectorStoreErrorHandler.RunModelConversion( DatabaseName, - collectionName, + this._collectionName, "MSET", () => { @@ -302,14 +295,13 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco return new { Key = mapResult.Key, SerializedRecord = serializedRecord }; }); - var maybePrefixedKey = this.PrefixKeyIfNeeded(redisJsonRecord.Key, collectionName); + var maybePrefixedKey = this.PrefixKeyIfNeeded(redisJsonRecord.Key); redisRecords.Add((maybePrefixedKey, redisJsonRecord.Key, redisJsonRecord.SerializedRecord)); } // Upsert. var keyPathValues = redisRecords.Select(x => new KeyPathValue(x.maybePrefixedKey, "$", x.serializedRecord)).ToArray(); - await RunOperationAsync( - collectionName, + await this.RunOperationAsync( "MSET", () => this._database .JSON() @@ -326,46 +318,25 @@ await RunOperationAsync( /// Prefix the key with the collection name if the option is set. /// /// The key to prefix. - /// The collection name that was provided as part of an operation to override the default or the default if not. /// The updated key if updating is required, otherwise the input key. - private string PrefixKeyIfNeeded(string key, string? collectionName) + private string PrefixKeyIfNeeded(string key) { if (this._options.PrefixCollectionNameToKeyNames) { - return $"{collectionName}:{key}"; + return $"{this._collectionName}:{key}"; } return key; } - /// - /// Choose the right collection name to use for the operation by using the one provided - /// as part of the operation options, or the default one provided at construction time. - /// - /// The collection name provided on the operation options. - /// The collection name to use. - private string ChooseCollectionName(string? operationCollectionName) - { - var collectionName = operationCollectionName ?? this._options.DefaultCollectionName; - if (collectionName is null) - { -#pragma warning disable CA2208 // Instantiate argument exceptions correctly - throw new ArgumentException("Collection name must be provided in the operation options, since no default was provided at construction time.", "options"); -#pragma warning restore CA2208 // Instantiate argument exceptions correctly - } - - return collectionName; - } - /// /// Run the given operation and wrap any Redis exceptions with ."/> /// /// The response type of the operation. - /// The name of the collection the operation is being run on. /// The type of database operation being run. /// The operation to run. /// The result of the operation. - private static async Task RunOperationAsync(string collectionName, string operationName, Func> operation) + private async Task RunOperationAsync(string operationName, Func> operation) { try { @@ -376,7 +347,7 @@ private static async Task RunOperationAsync(string collectionName, string throw new VectorStoreOperationException("Call to vector store failed.", ex) { VectorStoreType = DatabaseName, - CollectionName = collectionName, + CollectionName = this._collectionName, OperationName = operationName }; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs index 3786070ef298..5cdf6496e628 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs @@ -12,12 +12,6 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; public sealed class RedisVectorRecordStoreOptions where TRecord : class { - /// - /// Gets or sets the default collection name to use. - /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. - /// - public string? DefaultCollectionName { get; init; } = null; - /// /// Gets or sets a value indicating whether the collection name should be prefixed to the /// key names before reading or writing to the Redis store. Default is false. diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs index e39085e959e6..cd17cdf48e04 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs @@ -34,9 +34,9 @@ public QdrantVectorRecordStoreTests() [Theory] [MemberData(nameof(TestOptions))] - public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey testRecordKey) + public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool hasNamedVectors, TKey testRecordKey) { - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); // Arrange. var retrievedPoint = CreateRetrievedPoint(hasNamedVectors, testRecordKey); @@ -45,11 +45,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool pa // Act. var actual = await sut.GetAsync( testRecordKey, - new() - { - IncludeVectors = true, - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, + new() { IncludeVectors = true }, this._testCancellationToken); // Assert. @@ -73,21 +69,17 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool pa [Theory] [MemberData(nameof(TestOptions))] - public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey testRecordKey) + public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool hasNamedVectors, TKey testRecordKey) { // Arrange. - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); var retrievedPoint = CreateRetrievedPoint(hasNamedVectors, testRecordKey); this.SetupRetrieveMock([retrievedPoint]); // Act. var actual = await sut.GetAsync( testRecordKey, - new() - { - IncludeVectors = false, - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, + new() { IncludeVectors = false }, this._testCancellationToken); // Assert. @@ -111,10 +103,10 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool [Theory] [MemberData(nameof(MultiRecordTestOptions))] - public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey[] testRecordKeys) + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool hasNamedVectors, TKey[] testRecordKeys) { // Arrange. - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); var retrievedPoint1 = CreateRetrievedPoint(hasNamedVectors, UlongTestRecordKey1); var retrievedPoint2 = CreateRetrievedPoint(hasNamedVectors, UlongTestRecordKey2); this.SetupRetrieveMock(testRecordKeys.Select(x => CreateRetrievedPoint(hasNamedVectors, x)).ToList()); @@ -122,11 +114,7 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bo // Act. var actual = await sut.GetBatchAsync( testRecordKeys, - new() - { - IncludeVectors = true, - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, + new() { IncludeVectors = true }, this._testCancellationToken).ToListAsync(); // Assert. @@ -169,9 +157,9 @@ public async Task CanGetRecordWithCustomMapperAsync() // Arrange target with custom mapper. var sut = new QdrantVectorRecordStore>( this._qdrantClientMock.Object, + TestCollectionName, new() { - DefaultCollectionName = TestCollectionName, HasNamedVectors = true, MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = mapperMock.Object @@ -198,28 +186,20 @@ public async Task CanGetRecordWithCustomMapperAsync() } [Theory] - [InlineData(true, true, true)] - [InlineData(true, true, false)] - [InlineData(true, false, true)] - [InlineData(true, false, false)] - [InlineData(false, true, true)] - [InlineData(false, true, false)] - [InlineData(false, false, true)] - [InlineData(false, false, false)] - public async Task CanDeleteUlongRecordAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteUlongRecordAsync(bool useDefinition, bool hasNamedVectors) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); this.SetupDeleteMocks(); // Act await sut.DeleteAsync( UlongTestRecordKey1, - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert this._qdrantClientMock @@ -235,28 +215,20 @@ await sut.DeleteAsync( } [Theory] - [InlineData(true, true, true)] - [InlineData(true, true, false)] - [InlineData(true, false, true)] - [InlineData(true, false, false)] - [InlineData(false, true, true)] - [InlineData(false, true, false)] - [InlineData(false, false, true)] - [InlineData(false, false, false)] - public async Task CanDeleteGuidRecordAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteGuidRecordAsync(bool useDefinition, bool hasNamedVectors) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); this.SetupDeleteMocks(); // Act await sut.DeleteAsync( s_guidTestRecordKey1, - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert this._qdrantClientMock @@ -272,28 +244,20 @@ await sut.DeleteAsync( } [Theory] - [InlineData(true, true, true)] - [InlineData(true, true, false)] - [InlineData(true, false, true)] - [InlineData(true, false, false)] - [InlineData(false, true, true)] - [InlineData(false, true, false)] - [InlineData(false, false, true)] - [InlineData(false, false, false)] - public async Task CanDeleteManyUlongRecordsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteManyUlongRecordsAsync(bool useDefinition, bool hasNamedVectors) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); this.SetupDeleteMocks(); // Act await sut.DeleteBatchAsync( [UlongTestRecordKey1, UlongTestRecordKey2], - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert this._qdrantClientMock @@ -309,28 +273,20 @@ await sut.DeleteBatchAsync( } [Theory] - [InlineData(true, true, true)] - [InlineData(true, true, false)] - [InlineData(true, false, true)] - [InlineData(true, false, false)] - [InlineData(false, true, true)] - [InlineData(false, true, false)] - [InlineData(false, false, true)] - [InlineData(false, false, false)] - public async Task CanDeleteManyGuidRecordsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanDeleteManyGuidRecordsAsync(bool useDefinition, bool hasNamedVectors) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); this.SetupDeleteMocks(); // Act await sut.DeleteBatchAsync( [s_guidTestRecordKey1, s_guidTestRecordKey2], - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert this._qdrantClientMock @@ -347,20 +303,16 @@ await sut.DeleteBatchAsync( [Theory] [MemberData(nameof(TestOptions))] - public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey testRecordKey) + public async Task CanUpsertRecordAsync(bool useDefinition, bool hasNamedVectors, TKey testRecordKey) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); this.SetupUpsertMock(); // Act await sut.UpsertAsync( CreateModel(testRecordKey, true), - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert this._qdrantClientMock @@ -377,10 +329,10 @@ await sut.UpsertAsync( [Theory] [MemberData(nameof(MultiRecordTestOptions))] - public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors, TKey[] testRecordKeys) + public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool hasNamedVectors, TKey[] testRecordKeys) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod, hasNamedVectors); + var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); this.SetupUpsertMock(); var models = testRecordKeys.Select(x => CreateModel(x, true)); @@ -388,11 +340,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passC // Act var actual = await sut.UpsertBatchAsync( models, - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken).ToListAsync(); + cancellationToken: this._testCancellationToken).ToListAsync(); // Assert Assert.NotNull(actual); @@ -436,9 +384,9 @@ public async Task CanUpsertRecordWithCustomMapperAsync() // Arrange target with custom mapper. var sut = new QdrantVectorRecordStore>( this._qdrantClientMock.Object, + TestCollectionName, new() { - DefaultCollectionName = TestCollectionName, HasNamedVectors = false, MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = mapperMock.Object @@ -564,13 +512,13 @@ private static RetrievedPoint CreateRetrievedPoint(bool hasNamedVectors, T return point; } - private IVectorRecordStore> CreateVectorRecordStore(bool useDefinition, bool passCollectionToMethod, bool hasNamedVectors) + private IVectorRecordStore> CreateVectorRecordStore(bool useDefinition, bool hasNamedVectors) { var store = new QdrantVectorRecordStore>( this._qdrantClientMock.Object, + TestCollectionName, new() { - DefaultCollectionName = passCollectionToMethod ? null : TestCollectionName, VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null, HasNamedVectors = hasNamedVectors }) as IVectorRecordStore>; @@ -614,7 +562,6 @@ public sealed class SinglePropsModel public static IEnumerable TestOptions => GenerateAllCombinations(new object[][] { - new object[] { true, false }, new object[] { true, false }, new object[] { true, false }, new object[] { UlongTestRecordKey1, s_guidTestRecordKey1 } @@ -622,7 +569,6 @@ public static IEnumerable TestOptions public static IEnumerable MultiRecordTestOptions => GenerateAllCombinations(new object[][] { - new object[] { true, false }, new object[] { true, false }, new object[] { true, false }, new object[] { new ulong[] { UlongTestRecordKey1, UlongTestRecordKey2 }, new Guid[] { s_guidTestRecordKey1, s_guidTestRecordKey2 } } diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs index 21438b3726e2..07b79ab7790d 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs @@ -4,7 +4,6 @@ using System.Collections.Generic; using System.Linq; using System.Text.Json.Nodes; -using System.Threading; using System.Threading.Tasks; using Microsoft.SemanticKernel.Data; using Moq; @@ -25,8 +24,6 @@ public class RedisVectorRecordStoreTests private readonly Mock _redisDatabaseMock; - private readonly CancellationToken _testCancellationToken = new(false); - public RedisVectorRecordStoreTests() { this._redisDatabaseMock = new Mock(MockBehavior.Strict); @@ -36,26 +33,19 @@ public RedisVectorRecordStoreTests() } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition) { // Arrange var redisResultString = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act var actual = await sut.GetAsync( TestRecordKey1, - new() - { - IncludeVectors = true, - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + new() { IncludeVectors = true }); // Assert var expectedArgs = new object[] { TestRecordKey1 }; @@ -73,26 +63,19 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passColl } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) { // Arrange var redisResultString = """{ "Data": "data 1" }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act var actual = await sut.GetAsync( TestRecordKey1, - new() - { - IncludeVectors = false, - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + new() { IncludeVectors = false }); // Assert var expectedArgs = new object[] { TestRecordKey1, "Data" }; @@ -110,27 +93,20 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool passC } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange var redisResultString1 = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; var redisResultString2 = """{ "Data": "data 2", "Vector": [5, 6, 7, 8] }"""; SetupExecuteMock(this._redisDatabaseMock, [redisResultString1, redisResultString2]); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act var actual = await sut.GetBatchAsync( [TestRecordKey1, TestRecordKey2], - new() - { - IncludeVectors = true, - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken).ToListAsync(); + new() { IncludeVectors = true }).ToListAsync(); // Assert var expectedArgs = new object[] { TestRecordKey1, TestRecordKey2, "$" }; @@ -167,9 +143,9 @@ public async Task CanGetRecordWithCustomMapperAsync() // Arrange target with custom mapper. var sut = new RedisVectorRecordStore( this._redisDatabaseMock.Object, + TestCollectionName, new() { - DefaultCollectionName = TestCollectionName, MapperType = RedisRecordMapperType.JsonNodeCustomMapper, JsonNodeCustomMapper = mapperMock.Object }); @@ -177,8 +153,7 @@ public async Task CanGetRecordWithCustomMapperAsync() // Act var actual = await sut.GetAsync( TestRecordKey1, - new() { IncludeVectors = true }, - this._testCancellationToken); + new() { IncludeVectors = true }); // Assert Assert.NotNull(actual); @@ -195,24 +170,16 @@ public async Task CanGetRecordWithCustomMapperAsync() } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanDeleteRecordAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanDeleteRecordAsync(bool useDefinition) { // Arrange SetupExecuteMock(this._redisDatabaseMock, "200"); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act - await sut.DeleteAsync( - TestRecordKey1, - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + await sut.DeleteAsync(TestRecordKey1); // Assert var expectedArgs = new object[] { TestRecordKey1 }; @@ -225,24 +192,16 @@ await sut.DeleteAsync( } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange SetupExecuteMock(this._redisDatabaseMock, "200"); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act - await sut.DeleteBatchAsync( - [TestRecordKey1, TestRecordKey2], - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + await sut.DeleteBatchAsync([TestRecordKey1, TestRecordKey2]); // Assert var expectedArgs1 = new object[] { TestRecordKey1 }; @@ -262,25 +221,17 @@ await sut.DeleteBatchAsync( } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanUpsertRecordAsync(bool useDefinition) { // Arrange SetupExecuteMock(this._redisDatabaseMock, "OK"); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); var model = CreateModel(TestRecordKey1, true); // Act - await sut.UpsertAsync( - model, - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + await sut.UpsertAsync(model); // Assert // TODO: Fix issue where NotAnnotated is being included in the JSON. @@ -294,27 +245,19 @@ await sut.UpsertAsync( } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanUpsertManyRecordsAsync(bool useDefinition) { // Arrange SetupExecuteMock(this._redisDatabaseMock, "OK"); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); var model1 = CreateModel(TestRecordKey1, true); var model2 = CreateModel(TestRecordKey2, true); // Act - var actual = await sut.UpsertBatchAsync( - [model1, model2], - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken).ToListAsync(); + var actual = await sut.UpsertBatchAsync([model1, model2]).ToListAsync(); // Assert Assert.NotNull(actual); @@ -348,9 +291,9 @@ public async Task CanUpsertRecordWithCustomMapperAsync() // Arrange target with custom mapper. var sut = new RedisVectorRecordStore( this._redisDatabaseMock.Object, + TestCollectionName, new() { - DefaultCollectionName = TestCollectionName, MapperType = RedisRecordMapperType.JsonNodeCustomMapper, JsonNodeCustomMapper = mapperMock.Object }); @@ -358,10 +301,7 @@ public async Task CanUpsertRecordWithCustomMapperAsync() var model = CreateModel(TestRecordKey1, true); // Act - await sut.UpsertAsync( - model, - null, - this._testCancellationToken); + await sut.UpsertAsync(model); // Assert mapperMock @@ -370,13 +310,13 @@ await sut.UpsertAsync( Times.Once); } - private RedisVectorRecordStore CreateVectorRecordStore(bool useDefinition, bool passCollectionToMethod) + private RedisVectorRecordStore CreateVectorRecordStore(bool useDefinition) { return new RedisVectorRecordStore( this._redisDatabaseMock.Object, + TestCollectionName, new() { - DefaultCollectionName = passCollectionToMethod ? null : TestCollectionName, VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null }); } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs index 7e97c97d33ae..28f7666aa11f 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs @@ -32,10 +32,9 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition // Arrange var options = new AzureAISearchVectorRecordStoreOptions { - DefaultCollectionName = fixture.TestIndexName, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName, options); // Act var hotel = CreateTestHotel("Upsert-1"); @@ -67,11 +66,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition public async Task ItCanUpsertManyDocumentsToVectorStoreAsync() { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions - { - DefaultCollectionName = fixture.TestIndexName - }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); // Act var results = sut.UpsertBatchAsync( @@ -107,10 +102,9 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool // Arrange var options = new AzureAISearchVectorRecordStoreOptions { - DefaultCollectionName = fixture.TestIndexName, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName, options); // Act var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); @@ -140,11 +134,7 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool public async Task ItCanGetManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions - { - DefaultCollectionName = fixture.TestIndexName - }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); // Act // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. @@ -170,10 +160,9 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti // Arrange var options = new AzureAISearchVectorRecordStoreOptions { - DefaultCollectionName = fixture.TestIndexName, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); await sut.UpsertAsync(CreateTestHotel("Remove-1")); // Act @@ -189,11 +178,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions - { - DefaultCollectionName = fixture.TestIndexName - }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); await sut.UpsertAsync(CreateTestHotel("RemoveMany-1")); await sut.UpsertAsync(CreateTestHotel("RemoveMany-2")); await sut.UpsertAsync(CreateTestHotel("RemoveMany-3")); @@ -212,8 +197,7 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); // Act & Assert Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); @@ -223,9 +207,8 @@ public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() public async Task ItThrowsOperationExceptionForFailedConnectionAsync() { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; var searchIndexClient = new SearchIndexClient(new Uri("https://localhost:12345"), new AzureKeyCredential("12345")); - var sut = new AzureAISearchVectorRecordStore(searchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(searchIndexClient, fixture.TestIndexName); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); @@ -235,9 +218,8 @@ public async Task ItThrowsOperationExceptionForFailedConnectionAsync() public async Task ItThrowsOperationExceptionForFailedAuthenticationAsync() { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName }; var searchIndexClient = new SearchIndexClient(new Uri(fixture.Config.ServiceUrl), new AzureKeyCredential("12345")); - var sut = new AzureAISearchVectorRecordStore(searchIndexClient, options); + var sut = new AzureAISearchVectorRecordStore(searchIndexClient, fixture.TestIndexName); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); @@ -247,8 +229,8 @@ public async Task ItThrowsOperationExceptionForFailedAuthenticationAsync() public async Task ItThrowsMappingExceptionForFailedMapperAsync() { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions { DefaultCollectionName = fixture.TestIndexName, MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = new FailingMapper() }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, options); + var options = new AzureAISearchVectorRecordStoreOptions { MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = new FailingMapper() }; + var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName, options); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs index 13ffe798cef0..f14a3234a308 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs @@ -32,10 +32,9 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = hasNamedVectors, - DefaultCollectionName = collectionName, VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, collectionName, options); var record = this.CreateTestHotel(20); @@ -65,8 +64,8 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition public async Task ItCanUpsertAndRemoveDocumentWithGuidIdToVectorStoreAsync() { // Arrange. - var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false, DefaultCollectionName = "singleVectorGuidIdHotels" }; - IVectorRecordStore sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false }; + IVectorRecordStore sut = new QdrantVectorRecordStore(fixture.QdrantClient, "singleVectorGuidIdHotels", options); var record = new HotelInfoWithGuidId { @@ -112,10 +111,9 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool useRecordDefinition, var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = hasNamedVectors, - DefaultCollectionName = collectionName, VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, collectionName, options); // Act. var getResult = await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = withEmbeddings }); @@ -154,10 +152,9 @@ public async Task ItCanGetDocumentWithGuidIdFromVectorStoreAsync(bool useRecordD var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false, - DefaultCollectionName = "singleVectorGuidIdHotels", VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelWithGuidIdVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, "singleVectorGuidIdHotels", options); // Act. var getResult = await sut.GetAsync(Guid.Parse("11111111-1111-1111-1111-111111111111"), new GetRecordOptions { IncludeVectors = withEmbeddings }); @@ -183,8 +180,8 @@ public async Task ItCanGetDocumentWithGuidIdFromVectorStoreAsync(bool useRecordD public async Task ItCanGetManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = true, DefaultCollectionName = "namedVectorsHotels" }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = true }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, "namedVectorsHotels", options); // Act // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. @@ -213,10 +210,9 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = hasNamedVectors, - DefaultCollectionName = collectionName, VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, collectionName, options); await sut.UpsertAsync(this.CreateTestHotel(20)); @@ -240,10 +236,9 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDef var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = hasNamedVectors, - DefaultCollectionName = collectionName, VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, collectionName, options); await sut.UpsertAsync(this.CreateTestHotel(20)); @@ -259,8 +254,8 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDef public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() { // Arrange - var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false, DefaultCollectionName = "singleVectorHotels" }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, "singleVectorHotels", options); // Act & Assert Assert.Null(await sut.GetAsync(15, new GetRecordOptions { IncludeVectors = true })); @@ -270,8 +265,8 @@ public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() public async Task ItThrowsMappingExceptionForFailedMapperAsync() { // Arrange - var options = new QdrantVectorRecordStoreOptions { DefaultCollectionName = "singleVectorHotels", MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = new FailingMapper() }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, options); + var options = new QdrantVectorRecordStoreOptions { MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = new FailingMapper() }; + var sut = new QdrantVectorRecordStore(fixture.QdrantClient, "singleVectorHotels", options); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = true })); diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs index af1c089fb870..2cf8605777de 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs @@ -28,11 +28,10 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition // Arrange. var options = new RedisVectorRecordStoreOptions { - DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorRecordStore(fixture.Database, options); + var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); Hotel record = CreateTestHotel("Upsert-1", 1); // Act. @@ -66,11 +65,10 @@ public async Task ItCanUpsertManyDocumentsToVectorStoreAsync(bool useRecordDefin // Arrange. var options = new RedisVectorRecordStoreOptions { - DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorRecordStore(fixture.Database, options); + var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); // Act. var results = sut.UpsertBatchAsync( @@ -106,11 +104,10 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool // Arrange. var options = new RedisVectorRecordStoreOptions { - DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorRecordStore(fixture.Database, options); + var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); // Act. var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); @@ -142,8 +139,8 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool public async Task ItCanGetManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, options); + var options = new RedisVectorRecordStoreOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); // Act // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. @@ -165,8 +162,8 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() public async Task ItFailsToGetDocumentsWithInvalidSchemaAsync() { // Arrange. - var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, options); + var options = new RedisVectorRecordStoreOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); // Act & Assert. await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-4-Invalid", new GetRecordOptions { IncludeVectors = true })); @@ -180,11 +177,10 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti // Arrange. var options = new RedisVectorRecordStoreOptions { - DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorRecordStore(fixture.Database, options); + var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); var address = new HotelAddress { City = "Seattle", Country = "USA" }; var record = new Hotel { @@ -210,8 +206,8 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, options); + var options = new RedisVectorRecordStoreOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); await sut.UpsertAsync(CreateTestHotel("RemoveMany-1", 1)); await sut.UpsertAsync(CreateTestHotel("RemoveMany-2", 2)); await sut.UpsertAsync(CreateTestHotel("RemoveMany-3", 3)); @@ -230,8 +226,8 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() { // Arrange - var options = new RedisVectorRecordStoreOptions { DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, options); + var options = new RedisVectorRecordStoreOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); // Act & Assert Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); @@ -243,12 +239,11 @@ public async Task ItThrowsMappingExceptionForFailedMapperAsync() // Arrange var options = new RedisVectorRecordStoreOptions { - DefaultCollectionName = "hotels", PrefixCollectionNameToKeyNames = true, MapperType = RedisRecordMapperType.JsonNodeCustomMapper, JsonNodeCustomMapper = new FailingMapper() }; - var sut = new RedisVectorRecordStore(fixture.Database, options); + var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs index 2169c6a5051b..13e62eb84a71 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs @@ -6,6 +6,7 @@ namespace Microsoft.SemanticKernel.Data; /// /// Optional options when calling . +/// Reserved for future use. /// [Experimental("SKEXP0001")] public class DeleteRecordOptions @@ -23,11 +24,5 @@ public DeleteRecordOptions() /// The options to clone public DeleteRecordOptions(DeleteRecordOptions source) { - this.CollectionName = source.CollectionName; } - - /// - /// Get or sets an optional collection name to use for this operation that is different to the default. - /// - public string? CollectionName { get; init; } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs index 83ea8ee5b359..5d99580cb13b 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs @@ -23,15 +23,9 @@ public GetRecordOptions() /// The options to clone public GetRecordOptions(GetRecordOptions source) { - this.CollectionName = source.CollectionName; this.IncludeVectors = source.IncludeVectors; } - /// - /// Get or sets an optional collection name to use for this operation that is different to the default. - /// - public string? CollectionName { get; init; } - /// /// Get or sets a value indicating whether to include vectors in the retrieval result. /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs index 8447b2fc82eb..d291506635ff 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs @@ -6,6 +6,7 @@ namespace Microsoft.SemanticKernel.Data; /// /// Optional options when calling . +/// Reserved for future use. /// [Experimental("SKEXP0001")] public class UpsertRecordOptions @@ -23,11 +24,5 @@ public UpsertRecordOptions() /// The options to clone public UpsertRecordOptions(UpsertRecordOptions source) { - this.CollectionName = source.CollectionName; } - - /// - /// Get or sets an optional collection name to use for this operation that is different to the default. - /// - public string? CollectionName { get; init; } } diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs index dadaa6e76937..5f87bc1a93f6 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs @@ -25,6 +25,9 @@ public sealed class VolatileVectorRecordStore : IVectorRecordStoreOptional configuration options for this class. private readonly VolatileVectorRecordStoreOptions _options; + /// The name of the collection that this will access. + private readonly string _collectionName; + /// A set of types that a key on the provided model may have. private static readonly HashSet s_supportedKeyTypes = [ @@ -37,10 +40,15 @@ public sealed class VolatileVectorRecordStore : IVectorRecordStore /// Initializes a new instance of the class. /// + /// The name of the collection that this will access. /// Optional configuration options for this class. - public VolatileVectorRecordStore(VolatileVectorRecordStoreOptions? options = default) + public VolatileVectorRecordStore(string collectionName, VolatileVectorRecordStoreOptions? options = default) { + // Verify. + Verify.NotNullOrWhiteSpace(collectionName); + // Assign. + this._collectionName = collectionName; this._internalCollection = new(); this._options = options ?? new VolatileVectorRecordStoreOptions(); @@ -64,9 +72,10 @@ public VolatileVectorRecordStore(VolatileVectorRecordStoreOptions? options = def /// Initializes a new instance of the class. /// /// Allows passing in the dictionary used for storage, for testing purposes. + /// The name of the collection that this will access. /// Optional configuration options for this class. - internal VolatileVectorRecordStore(ConcurrentDictionary> internalCollection, VolatileVectorRecordStoreOptions? options = default) - : this(options) + internal VolatileVectorRecordStore(ConcurrentDictionary> internalCollection, string collectionName, VolatileVectorRecordStoreOptions? options = default) + : this(collectionName, options) { this._internalCollection = internalCollection; } @@ -74,7 +83,7 @@ internal VolatileVectorRecordStore(ConcurrentDictionary public Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) { - var collectionDictionary = this.GetCollectionDictionary(options?.CollectionName); + var collectionDictionary = this.GetCollectionDictionary(); if (collectionDictionary.TryGetValue(key, out var record)) { @@ -101,7 +110,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G /// public Task DeleteAsync(string key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) { - var collectionDictionary = this.GetCollectionDictionary(options?.CollectionName); + var collectionDictionary = this.GetCollectionDictionary(); collectionDictionary.TryRemove(key, out _); return Task.CompletedTask; @@ -110,7 +119,7 @@ public Task DeleteAsync(string key, DeleteRecordOptions? options = null, Cancell /// public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) { - var collectionDictionary = this.GetCollectionDictionary(options?.CollectionName); + var collectionDictionary = this.GetCollectionDictionary(); foreach (var key in keys) { @@ -123,7 +132,7 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? opti /// public Task UpsertAsync(TRecord record, UpsertRecordOptions? options = null, CancellationToken cancellationToken = default) { - var collectionDictionary = this.GetCollectionDictionary(options?.CollectionName); + var collectionDictionary = this.GetCollectionDictionary(); var key = this._keyPropertyInfo.GetValue(record) as string; collectionDictionary.AddOrUpdate(key!, record, (key, currentValue) => record); @@ -141,30 +150,16 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco } /// - /// Get a collection dictionary from the internal storage, creating it if it does not exist. - /// Use the provided collection name if not null, and fall back to the default collection name otherwise. + /// Get the collection dictionary from the internal storage, throws if it does not exist. /// - /// The collection name passed to the operation. /// The retrieved collection dictionary. - private ConcurrentDictionary GetCollectionDictionary(string? collectionName) + private ConcurrentDictionary GetCollectionDictionary() { - string? chosenCollectionName = null; - - if (collectionName is not null) - { - chosenCollectionName = collectionName; - } - else if (this._options.DefaultCollectionName is not null) - { - chosenCollectionName = this._options.DefaultCollectionName; - } - else + if (!this._internalCollection.TryGetValue(this._collectionName, out var collectionDictionary)) { -#pragma warning disable CA2208 // Instantiate argument exceptions correctly - throw new ArgumentException("Collection name must be provided in the operation options, since no default was provided at construction time.", "options"); -#pragma warning restore CA2208 // Instantiate argument exceptions correctly + throw new VectorStoreOperationException($"Call to vector store failed. Collection '{this._collectionName}' does not exist."); } - return this._internalCollection.GetOrAdd(chosenCollectionName, _ => new()); + return collectionDictionary; } } diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs index 80506d02ede8..621aaf41e8ae 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs @@ -10,12 +10,6 @@ namespace Microsoft.SemanticKernel.Data; [Experimental("SKEXP0001")] public sealed class VolatileVectorRecordStoreOptions { - /// - /// Gets or sets the default collection name to use. - /// If not provided here, the collection name will need to be provided for each operation or the operation will throw. - /// - public string? DefaultCollectionName { get; init; } = null; - /// /// Gets or sets an optional record definition that defines the schema of the record type. /// diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs index e6dc633bed02..b42b797255d5 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs @@ -29,11 +29,9 @@ public VolatileVectorRecordStoreTests() } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition) { // Arrange var record = CreateModel(TestRecordKey1, withVectors: true); @@ -41,15 +39,14 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passColl collection.TryAdd(TestRecordKey1, record); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act var actual = await sut.GetAsync( TestRecordKey1, new() { - IncludeVectors = true, - CollectionName = passCollectionToMethod ? TestCollectionName : null + IncludeVectors = true }, this._testCancellationToken); @@ -63,11 +60,9 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool passColl } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange var record1 = CreateModel(TestRecordKey1, withVectors: true); @@ -77,15 +72,14 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool pas collection.TryAdd(TestRecordKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act var actual = await sut.GetBatchAsync( [TestRecordKey1, TestRecordKey2], new() { - IncludeVectors = true, - CollectionName = passCollectionToMethod ? TestCollectionName : null + IncludeVectors = true }, this._testCancellationToken).ToListAsync(); @@ -99,11 +93,9 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool pas } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanDeleteRecordAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanDeleteRecordAsync(bool useDefinition) { // Arrange var record1 = CreateModel(TestRecordKey1, withVectors: true); @@ -113,16 +105,12 @@ public async Task CanDeleteRecordAsync(bool useDefinition, bool passCollectionTo collection.TryAdd(TestRecordKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act await sut.DeleteAsync( TestRecordKey1, - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert Assert.False(collection.ContainsKey(TestRecordKey1)); @@ -130,11 +118,9 @@ await sut.DeleteAsync( } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange var record1 = CreateModel(TestRecordKey1, withVectors: true); @@ -144,16 +130,12 @@ public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition, bool collection.TryAdd(TestRecordKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act await sut.DeleteBatchAsync( [TestRecordKey1, TestRecordKey2], - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert Assert.False(collection.ContainsKey(TestRecordKey1)); @@ -161,27 +143,21 @@ await sut.DeleteBatchAsync( } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanUpsertRecordAsync(bool useDefinition) { // Arrange var record1 = CreateModel(TestRecordKey1, withVectors: true); var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act var upsertResult = await sut.UpsertAsync( record1, - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken); + cancellationToken: this._testCancellationToken); // Assert Assert.Equal(TestRecordKey1, upsertResult); @@ -190,11 +166,9 @@ public async Task CanUpsertRecordAsync(bool useDefinition, bool passCollectionTo } [Theory] - [InlineData(true, true)] - [InlineData(true, false)] - [InlineData(false, true)] - [InlineData(false, false)] - public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollectionToMethod) + [InlineData(true)] + [InlineData(false)] + public async Task CanUpsertManyRecordsAsync(bool useDefinition) { // Arrange var record1 = CreateModel(TestRecordKey1, withVectors: true); @@ -203,16 +177,12 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool passCollect var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition, passCollectionToMethod); + var sut = this.CreateVectorRecordStore(useDefinition); // Act var actual = await sut.UpsertBatchAsync( [record1, record2], - new() - { - CollectionName = passCollectionToMethod ? TestCollectionName : null - }, - this._testCancellationToken).ToListAsync(); + cancellationToken: this._testCancellationToken).ToListAsync(); // Assert Assert.NotNull(actual); @@ -235,13 +205,13 @@ private static SinglePropsModel CreateModel(string key, bool withVectors) }; } - private VolatileVectorRecordStore CreateVectorRecordStore(bool useDefinition, bool passCollectionToMethod) + private VolatileVectorRecordStore CreateVectorRecordStore(bool useDefinition) { return new VolatileVectorRecordStore( this._collectionStore, + TestCollectionName, new() { - DefaultCollectionName = passCollectionToMethod ? null : TestCollectionName, VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null }); } From 9759ec36754e28063703e73fcf820d2ab4e08547 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 15 Jul 2024 10:02:31 +0100 Subject: [PATCH 15/48] .Net: Improve support for enumerable types and remove complex object support from azure ai search. (#7210) ### Motivation and Context Supporting complex types is difficult in Azure AI Search for create via the abstraction, so for now, removing support for it, until someone requests it. This means doing type checking on data fields, so adding better support for enumerable types like Arrays, List, IEnumerable, etc. ### Description ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../AzureAISearchVectorRecordStore.cs | 19 ++++++++ .../QdrantVectorStoreRecordMapper.cs | 8 +--- .../AzureAISearchVectorRecordStoreTests.cs | 11 +---- .../AzureAISearchVectorStoreFixture.cs | 46 +++---------------- .../Data/VectorStoreRecordPropertyReader.cs | 42 ++++++++++++++++- .../VectorStoreRecordPropertyReaderTests.cs | 33 +++++++++++++ 6 files changed, 100 insertions(+), 59 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs index 785d18fa6f7a..cb86e1ae3c17 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs @@ -33,6 +33,24 @@ public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore typeof(string) ]; + /// A set of types that data properties on the provided model may have. + private static readonly HashSet s_supportedDataTypes = + [ + typeof(string), + typeof(int), + typeof(long), + typeof(double), + typeof(float), + typeof(bool), + typeof(DateTimeOffset), + typeof(int?), + typeof(long?), + typeof(double?), + typeof(float?), + typeof(bool?), + typeof(DateTimeOffset?), + ]; + /// A set of types that vectors on the provided model may have. /// /// Azure AI Search is adding support for more types than just float32, but these are not available for use via the @@ -103,6 +121,7 @@ public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, strin // Validate property types and store for later use. var jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); this._keyPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, properties.keyProperty); diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index 1881e38293c4..266501e3bfab 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -29,12 +29,6 @@ internal sealed class QdrantVectorStoreRecordMapper : IVectorStoreRecor /// A set of types that data properties on the provided model may have. private static readonly HashSet s_supportedDataTypes = [ - typeof(List), - typeof(List), - typeof(List), - typeof(List), - typeof(List), - typeof(List), typeof(string), typeof(int), typeof(long), @@ -98,7 +92,7 @@ public QdrantVectorStoreRecordMapper(QdrantVectorStoreRecordMapperOptions option // Validate property types and store for later use. VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); this._keyPropertyInfo = properties.keyProperty; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs index 28f7666aa11f..ecd8c4ee6d5f 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs @@ -54,8 +54,6 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition Assert.Equal(hotel.ParkingIncluded, getResult.ParkingIncluded); Assert.Equal(hotel.LastRenovationDate, getResult.LastRenovationDate); Assert.Equal(hotel.Rating, getResult.Rating); - Assert.Equal(hotel.Address.City, getResult.Address.City); - Assert.Equal(hotel.Address.Country, getResult.Address.Country); // Output output.WriteLine(upsertResult); @@ -123,8 +121,6 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool Assert.False(getResult.ParkingIncluded); Assert.Equal(new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), getResult.LastRenovationDate); Assert.Equal(3.6, getResult.Rating); - Assert.Equal("New York", getResult.Address.City); - Assert.Equal("USA", getResult.Address.Country); // Output output.WriteLine(getResult.ToString()); @@ -245,12 +241,7 @@ public async Task ItThrowsMappingExceptionForFailedMapperAsync() Tags = ["pool", "air conditioning", "concierge"], ParkingIncluded = true, LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), - Rating = 3.6, - Address = new Address - { - City = "New York", - Country = "USA" - } + Rating = 3.6 }; private sealed class FailingMapper : IVectorStoreRecordMapper diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs index 3d94b67571bf..f5df69c24023 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs @@ -60,8 +60,7 @@ public AzureAISearchVectorStoreFixture() new VectorStoreRecordDataProperty("Tags"), new VectorStoreRecordDataProperty("ParkingIncluded"), new VectorStoreRecordDataProperty("LastRenovationDate"), - new VectorStoreRecordDataProperty("Rating"), - new VectorStoreRecordDataProperty("Address") + new VectorStoreRecordDataProperty("Rating") } }; } @@ -139,7 +138,7 @@ public static async Task CreateIndexAsync(string indexName, SearchIndexClient ad definition.VectorSearch.Algorithms.Add(new HnswAlgorithmConfiguration("my-hnsw-vector-config-1") { Parameters = new HnswParameters { Metric = VectorSearchAlgorithmMetric.Cosine } }); definition.VectorSearch.Profiles.Add(new VectorSearchProfile("my-vector-profile", "my-hnsw-vector-config-1")); - var suggester = new SearchSuggester("sg", new[] { "HotelName", "Address/City" }); + var suggester = new SearchSuggester("sg", new[] { "HotelName" }); definition.Suggesters.Add(suggester); await adminClient.CreateOrUpdateIndexAsync(definition); @@ -162,12 +161,7 @@ public static void UploadDocuments(SearchClient searchClient) Tags = new[] { "pool", "air conditioning", "concierge" }, ParkingIncluded = false, LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), - Rating = 3.6, - Address = new Address() - { - City = "New York", - Country = "USA" - } + Rating = 3.6 }), IndexDocumentsAction.Upload( new Hotel() @@ -179,12 +173,7 @@ public static void UploadDocuments(SearchClient searchClient) Tags = new[] { "pool", "free wifi", "concierge" }, ParkingIncluded = false, LastRenovationDate = new DateTimeOffset(1979, 2, 18, 0, 0, 0, TimeSpan.Zero), - Rating = 3.60, - Address = new Address() - { - City = "Sarasota", - Country = "USA" - } + Rating = 3.60 }), IndexDocumentsAction.Upload( new Hotel() @@ -196,12 +185,7 @@ public static void UploadDocuments(SearchClient searchClient) Tags = new[] { "air conditioning", "bar", "continental breakfast" }, ParkingIncluded = true, LastRenovationDate = new DateTimeOffset(2015, 9, 20, 0, 0, 0, TimeSpan.Zero), - Rating = 4.80, - Address = new Address() - { - City = "Atlanta", - Country = "USA" - } + Rating = 4.80 }), IndexDocumentsAction.Upload( new Hotel() @@ -213,12 +197,7 @@ public static void UploadDocuments(SearchClient searchClient) Tags = new[] { "concierge", "view", "24-hour front desk service" }, ParkingIncluded = true, LastRenovationDate = new DateTimeOffset(1960, 2, 06, 0, 0, 0, TimeSpan.Zero), - Rating = 4.60, - Address = new Address() - { - City = "San Antonio", - Country = "USA" - } + Rating = 4.60 }) ); @@ -261,19 +240,6 @@ public class Hotel [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] [VectorStoreRecordData] public double? Rating { get; set; } - - [SearchableField] - [VectorStoreRecordData] - public Address Address { get; set; } - } - - public record Address - { - [SearchableField(IsFilterable = true, IsSortable = true, IsFacetable = true)] - public string City { get; set; } - - [SearchableField(IsFilterable = true, IsSortable = true, IsFacetable = true)] - public string Country { get; set; } } #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. } diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index 4cbfe15622bc..883c2de9ea9c 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; @@ -245,12 +246,49 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT /// The properties to check. /// A set of supported types that the provided properties may have. /// A description of the category of properties being checked. Used for error messaging. + /// A value indicating whether versions of all the types should also be supported. /// Thrown if any of the properties are not in the given set of types. - public static void VerifyPropertyTypes(List properties, HashSet supportedTypes, string propertyCategoryDescription) + public static void VerifyPropertyTypes(List properties, HashSet supportedTypes, string propertyCategoryDescription, bool? supportEnumerable = false) { foreach (var property in properties) { - if (!supportedTypes.Contains(property.PropertyType)) + // Add shortcut before testing all the more expensive scenarios. + if (supportedTypes.Contains(property.PropertyType)) + { + continue; + } + + // Check all collection scenarios and get stored type. + Type typeToCheck; + if (typeof(IEnumerable).IsAssignableFrom(property.PropertyType) && supportEnumerable == true) + { + if (property.PropertyType is IEnumerable) + { + typeToCheck = typeof(object); + } + else if (property.PropertyType.IsArray) + { + typeToCheck = property.PropertyType.GetElementType()!; + } + else if (property.PropertyType.IsGenericType && property.PropertyType.GetGenericTypeDefinition() == typeof(IEnumerable<>)) + { + typeToCheck = property.PropertyType.GetGenericArguments()[0]; + } + else if (property.PropertyType.GetInterfaces().FirstOrDefault(i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(IEnumerable<>)) is Type enumerableInterface) + { + typeToCheck = enumerableInterface.GetGenericArguments()[0]; + } + else + { + typeToCheck = property.PropertyType; + } + } + else + { + typeToCheck = property.PropertyType; + } + + if (!supportedTypes.Contains(typeToCheck)) { var supportedTypesString = string.Join(", ", supportedTypes.Select(t => t.FullName)); throw new ArgumentException($"{propertyCategoryDescription} properties must be one of the supported types: {supportedTypesString}. Type of {property.Name} is {property.PropertyType.FullName}."); diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 0fe82c113d35..3fc33bb5d935 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections.Generic; using System.Linq; using System.Reflection; using System.Text.Json; @@ -180,6 +181,16 @@ public void VerifyPropertyTypesPassForAllowedTypes() VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(string)], "Data"); } + [Fact] + public void VerifyPropertyTypesPassForAllowedEnumerableTypes() + { + // Arrange. + var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(EnumerablePropsModel), true); + + // Act. + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(string)], "Data", supportEnumerable: true); + } + [Fact] public void VerifyPropertyTypesFailsForDisallowedTypes() { @@ -244,6 +255,7 @@ public void VerifyGetJsonPropertyNameChecksJsonOptionsAndJsonAttributesAndFallsB } #pragma warning disable CA1812 // Invalid unused classes error, since I am using these for testing purposes above. + private sealed class NoKeyModel { } @@ -338,5 +350,26 @@ private sealed class MultiPropsModel new VectorStoreRecordVectorProperty("Vector2") ] }; + + private sealed class EnumerablePropsModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData] + public IEnumerable EnumerableData { get; set; } = new List(); + + [VectorStoreRecordData] + public string[] ArrayData { get; set; } = Array.Empty(); + + [VectorStoreRecordData] + public List ListData { get; set; } = new List(); + + [VectorStoreRecordVector] + public ReadOnlyMemory Vector { get; set; } + + public string NotAnnotated { get; set; } = string.Empty; + } + #pragma warning restore CA1812 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. } From 6ffc0c7cf46b8ea6111ba61388941bfcbe98d88c Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 15 Jul 2024 13:46:57 +0100 Subject: [PATCH 16/48] .Net: Rename VectorRecordStore to VetorStoreRecordCollection (#7261) ### Motivation and Context After long discussion we decided to move to a model whereby we have a VectorStore that can provide VectorStoreRecordCollection instances. All record and collection specific operations will reside on VectorStoreRecordCollection and all cross collection operations will reside on VectorStore. ### Description This change renames VectorRecordStore to VectorStoreRecordCollection as agreed. The required collection operations will be moved to VectorStoreRecordCollection in a separate pr. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../0045-updated-vector-store-design.md | 212 ++++++++---------- ...SearchVectorStoreRecordCollectionTests.cs} | 14 +- .../AzureAISearchRecordMapperType.cs | 2 +- ...ureAISearchVectorStoreRecordCollection.cs} | 18 +- ...archVectorStoreRecordCollectionOptions.cs} | 4 +- .../QdrantRecordMapperType.cs | 2 +- ...s => QdrantVectorStoreRecordCollection.cs} | 28 +-- ...rantVectorStoreRecordCollectionOptions.cs} | 4 +- .../RedisRecordMapperType.cs | 2 +- ...cs => RedisVectorStoreRecordCollection.cs} | 18 +- ...edisVectorStoreRecordCollectionOptions.cs} | 4 +- ...QdrantVectorStoreRecordCollectionTests.cs} | 16 +- ... RedisVectorStoreRecordCollectionTests.cs} | 14 +- .../AzureAISearchVectorStoreFixture.cs | 2 +- ...SearchVectorStoreRecordCollectionTests.cs} | 32 +-- ...QdrantVectorStoreRecordCollectionTests.cs} | 40 ++-- ... RedisVectorStoreRecordCollectionTests.cs} | 40 ++-- ...ore.cs => IVectorStoreRecordCollection.cs} | 6 +- .../Data/RecordOptions/DeleteRecordOptions.cs | 2 +- .../Data/RecordOptions/GetRecordOptions.cs | 2 +- .../Data/RecordOptions/UpsertRecordOptions.cs | 2 +- ...=> VolatileVectorStoreRecordCollection.cs} | 24 +- ...tileVectorStoreRecordCollectionOptions.cs} | 4 +- ...latileVectorStoreRecordCollectionTests.cs} | 10 +- 24 files changed, 247 insertions(+), 255 deletions(-) rename dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/{AzureAISearchVectorRecordStoreTests.cs => AzureAISearchVectorStoreRecordCollectionTests.cs} (96%) rename dotnet/src/Connectors/Connectors.Memory.AzureAISearch/{AzureAISearchVectorRecordStore.cs => AzureAISearchVectorStoreRecordCollection.cs} (93%) rename dotnet/src/Connectors/Connectors.Memory.AzureAISearch/{AzureAISearchVectorRecordStoreOptions.cs => AzureAISearchVectorStoreRecordCollectionOptions.cs} (92%) rename dotnet/src/Connectors/Connectors.Memory.Qdrant/{QdrantVectorRecordStore.cs => QdrantVectorStoreRecordCollection.cs} (88%) rename dotnet/src/Connectors/Connectors.Memory.Qdrant/{QdrantVectorRecordStoreOptions.cs => QdrantVectorStoreRecordCollectionOptions.cs} (92%) rename dotnet/src/Connectors/Connectors.Memory.Redis/{RedisVectorRecordStore.cs => RedisVectorStoreRecordCollection.cs} (93%) rename dotnet/src/Connectors/Connectors.Memory.Redis/{RedisVectorRecordStoreOptions.cs => RedisVectorStoreRecordCollectionOptions.cs} (94%) rename dotnet/src/Connectors/Connectors.Qdrant.UnitTests/{QdrantVectorRecordStoreTests.cs => QdrantVectorStoreRecordCollectionTests.cs} (97%) rename dotnet/src/Connectors/Connectors.Redis.UnitTests/{RedisVectorRecordStoreTests.cs => RedisVectorStoreRecordCollectionTests.cs} (96%) rename dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/{AzureAISearchVectorRecordStoreTests.cs => AzureAISearchVectorStoreRecordCollectionTests.cs} (81%) rename dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/{QdrantVectorRecordStoreTests.cs => QdrantVectorStoreRecordCollectionTests.cs} (81%) rename dotnet/src/IntegrationTests/Connectors/Memory/Redis/{RedisVectorRecordStoreTests.cs => RedisVectorStoreRecordCollectionTests.cs} (83%) rename dotnet/src/SemanticKernel.Abstractions/Data/{IVectorRecordStore.cs => IVectorStoreRecordCollection.cs} (96%) rename dotnet/src/SemanticKernel.Core/Data/{VolatileVectorRecordStore.cs => VolatileVectorStoreRecordCollection.cs} (83%) rename dotnet/src/SemanticKernel.Core/Data/{VolatileVectorRecordStoreOptions.cs => VolatileVectorStoreRecordCollectionOptions.cs} (84%) rename dotnet/src/SemanticKernel.UnitTests/Data/{VolatileVectorRecordStoreTests.cs => VolatileVectorStoreRecordCollectionTests.cs} (95%) diff --git a/docs/decisions/0045-updated-vector-store-design.md b/docs/decisions/0045-updated-vector-store-design.md index 0e06d1a05b6d..c008068b1e95 100644 --- a/docs/decisions/0045-updated-vector-store-design.md +++ b/docs/decisions/0045-updated-vector-store-design.md @@ -18,7 +18,7 @@ The current abstractions are experimental and the purpose of this ADR is to prog ### Problems with current design -1. The `IMemoryStore` interface has four responsibilities with different cardinalities. +1. The `IMemoryStore` interface has four responsibilities with different cardinalities. Some are schema aware and others schema agnostic. 2. The `IMemoryStore` interface only supports a fixed schema for data storage, retrieval and search, which limits its usability by customers with existing data sets. 2. The `IMemoryStore` implementations are opinionated around key encoding / decoding and collection name sanitization, which limits its usability by customers with existing data sets. @@ -70,10 +70,10 @@ interface IMemoryStore ### Actions -1. The `IMemoryStore` should be split into four different interfaces, one for each responsibility. +1. The `IMemoryStore` should be split into different interfaces, so that schema aware and schema agnostic operations are separated. 2. The **Data Storage and Retrieval** and **Vector Search** areas should allow typed access to data and support any schema that is currently available in the customer's data store. -3. The collection / index create functionality should allow developers to create their own implementations and support creating first party collections for built in functionality. Each implementation would be for a specific schema and data store type. -4. The collection / index list/exists/delete functionality should allow management of any collection regardless of schema. There should be one implementation for each data store type. +3. The collection / index create functionality should allow developers to use a common definition that is part of the abstraction to create collections. +4. The collection / index list/exists/delete functionality should allow management of any collection regardless of schema. 5. Remove opinionated behaviors from connectors. The opinionated behavior limits the ability of these connectors to be used with pre-existing vector databases. As far as possible these behaviors should be moved into decorators or be injectable. Examples of opinionated behaviors: 1. The AzureAISearch connector encodes keys before storing and decodes them after retrieval since keys in Azure AI Search supports a limited set of characters. 2. The AzureAISearch connector sanitizes collection names before using them, since Azure AI Search supports a limited set of characters. @@ -282,7 +282,7 @@ Footnotes: |Id Type|String|UUID|string with collection name prefix|string||string|UUID|64Bit Int / UUID / ULID|64Bit Unsigned Int / UUID|Int64 / varchar| |Supported Vector Types|[Collection(Edm.Byte) / Collection(Edm.Single) / Collection(Edm.Half) / Collection(Edm.Int16) / Collection(Edm.SByte)](https://learn.microsoft.com/en-us/rest/api/searchservice/supported-data-types)|float32|FLOAT32 and FLOAT64|||[Rust f32](https://docs.pinecone.io/troubleshooting/embedding-values-changed-when-upserted)||[single-precision (4 byte float) / half-precision (2 byte float) / binary (1bit) / sparse vectors (4 bytes)](https://github.com/pgvector/pgvector?tab=readme-ov-file#pgvector)|UInt8 / Float32|Binary / Float32 / Float16 / BFloat16 / SparseFloat| |Supported Distance Functions|[Cosine / dot prod / euclidean dist (l2 norm)](https://learn.microsoft.com/en-us/azure/search/vector-search-ranking#similarity-metrics-used-to-measure-nearness)|[Cosine dist / dot prod / Squared L2 dist / hamming (num of diffs) / manhattan dist](https://weaviate.io/developers/weaviate/config-refs/distances#available-distance-metrics)|[Euclidean dist (L2) / Inner prod (IP) / Cosine dist](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/)|[Squared L2 / Inner prod / Cosine similarity](https://docs.trychroma.com/guides#changing-the-distance-function)||[cosine sim / euclidean dist / dot prod](https://docs.pinecone.io/reference/api/control-plane/create_index)||[L2 dist / inner prod / cosine dist / L1 dist / Hamming dist / Jaccard dist (NB: Specified at query time, not index creation time)](https://github.com/pgvector/pgvector?tab=readme-ov-file#pgvector)|[Dot prod / Cosine sim / Euclidean dist (L2) / Manhattan dist](https://qdrant.tech/documentation/concepts/search/)|[Cosine sim / Euclidean dist / Inner Prod](https://milvus.io/docs/index-vector-fields.md)| -|Supported index types|[Exhaustive KNN / HNSW](https://learn.microsoft.com/en-us/azure/search/vector-search-ranking#algorithms-used-in-vector-search)|[HNSW / Flat / Dynamic](https://weaviate.io/developers/weaviate/config-refs/schema/vector-index)|[HNSW / FLAT](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/#create-a-vector-field)|[HNSW not configurable](https://cookbook.chromadb.dev/core/concepts/#vector-index-hnsw-index)||[PGA](https://www.pinecone.io/blog/hnsw-not-enough/)||[HNSW / IVFFlat](https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing)|[HNSW for dense](https://qdrant.tech/documentation/concepts/indexing/#vector-index)|

[In Memory: FLAT / IVF_FLAT / IVF_SQ8 / IVF_PQ / HNSW / SCANN](https://milvus.io/docs/index.md)

[On Disk: DiskANN](https://milvus.io/docs/disk_index.md)

[GPU: GPU_CAGRA / GPU_IVF_FLAT / GPU_IVF_PQ / GPU_BRUTE_FORCE](https://milvus.io/docs/gpu_index.md)

| +|Supported index types|[Exhaustive KNN (FLAT) / HNSW](https://learn.microsoft.com/en-us/azure/search/vector-search-ranking#algorithms-used-in-vector-search)|[HNSW / Flat / Dynamic](https://weaviate.io/developers/weaviate/config-refs/schema/vector-index)|[HNSW / FLAT](https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/vectors/#create-a-vector-field)|[HNSW not configurable](https://cookbook.chromadb.dev/core/concepts/#vector-index-hnsw-index)||[PGA](https://www.pinecone.io/blog/hnsw-not-enough/)||[HNSW / IVFFlat](https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing)|[HNSW for dense](https://qdrant.tech/documentation/concepts/indexing/#vector-index)|

[In Memory: FLAT / IVF_FLAT / IVF_SQ8 / IVF_PQ / HNSW / SCANN](https://milvus.io/docs/index.md)

[On Disk: DiskANN](https://milvus.io/docs/disk_index.md)

[GPU: GPU_CAGRA / GPU_IVF_FLAT / GPU_IVF_PQ / GPU_BRUTE_FORCE](https://milvus.io/docs/gpu_index.md)

| Footnotes: - HNSW = Hierarchical Navigable Small World (HNSW performs an [approximate nearest neighbor (ANN)](https://learn.microsoft.com/en-us/azure/search/vector-search-overview#approximate-nearest-neighbors) search) @@ -310,7 +310,7 @@ Footnotes: Mapping between data models and the storage models can also require custom logic depending on the type of data model and storage model involved. -I'm therefore proposing that we allow mappers to be injectable for each `VectorRecordStore` instance. The interfaces for these would vary depending +I'm therefore proposing that we allow mappers to be injectable for each `VectorStoreCollection` instance. The interfaces for these would vary depending on the storage models used by each vector store and any unique capabilities that each vector store may have, e.g. qdrant can operate in `single` or `multiple named vector` modes, which means the mapper needs to know whether to set a single vector or fill a vector map. @@ -390,7 +390,7 @@ consistency and scalability. |Criteria|Current SK Implementation|Proposed SK Implementation|Spring AI|LlamaIndex|Langchain| |-|-|-|-|-|-| |Support for Custom Schemas|N|Y|N|N|N| -|Naming of store|MemoryStore|VectorRecordStore, VectorCollectionCreate, VectorCollectionNonSchema, VectorCollectionStore, VectorStore|VectorStore|VectorStore|VectorStore| +|Naming of store|MemoryStore|VectorStore, VectorStoreCollection|VectorStore|VectorStore|VectorStore| |MultiVector support|N|Y|N|N|N| |Support Multiple Collections via SDK params|Y|Y|N (via app config)|Y|Y| @@ -580,6 +580,39 @@ internal class VectorStore(IVectorCollectionCreate create, IVectorColle ``` +#### Option 6 - Collection store acts as factory for record store. + +`IVectorStore` acts as a factory for `IVectorStoreCollection`, and any schema agnostic multi-collection operations are kept on `IVectorStore`. + + +```cs +public interface IVectorStore +{ + IVectorStoreCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null); + IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default)); +} + +public interface IVectorStoreCollection +{ + public string Name { get; } + + // Collection Operations + Task CreateCollectionAsync(); + Task CreateCollectionIfNotExistsAsync(); + Task CollectionExistsAsync(); + Task DeleteCollectionAsync(); + + // Data manipulation + Task GetAsync(TKey key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, CancellationToken cancellationToken = default); + Task DeleteAsync(TKey key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); + Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); + Task UpsertAsync(TRecord record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); + IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default); +} +``` + + #### Decision Outcome Option 1 is problematic on its own, since we have to allow consumers to create custom implementations of collection create for break glass scenarios. With @@ -597,14 +630,10 @@ as input, e.g. Azure-ML YAML. Therefore separating create, which may have many i Option 3 provides us this separation, but Option 4 + 5 builds on top of this, and allows us to combine different implementations together for simpler consumption. -Chosen option: 4 + 5. - -- Collection create, configuration and supported options vary considerably across different schemas and database types. -- Collection list, exists and delete is the same across different schemas, but varies by database type. -- Vector storage, even with custom schemas can be supported using a single implementation per database type. -- We will need to support multiple collection create implementations per store type, a single collection nonschema implementation per store type, and a single vector store implementation per store type. -- At the same time we can layer interfaces on top that allow easy combined access to collection and record management. +Chosen option: 6 +- Easy to use, and similar to many SDk implementations. +- Can pass a single object around for both collection and record access. ### Question 2: Collection name and key value normalization in store, decorator or via injection. @@ -614,7 +643,7 @@ Chosen option: 4 + 5. - Cons: The normalization needs to vary separately from the record store, so this will not work ```cs - public class AzureAISearchVectorRecordStore : IVectorRecordStore + public class AzureAISearchVectorStoreCollection : IVectorStoreCollection { ... @@ -637,24 +666,24 @@ Chosen option: 4 + 5. - Pros: No code executed when no normalization required. - Pros: Easy to package matching encoders/decoders together. - Pros: Easier to obsolete encoding/normalization as a concept. -- Cons: Not a major con, but need to implement the full VectorRecordStore interface, instead of e.g. just providing the two translation functions, if we go with option 3. +- Cons: Not a major con, but need to implement the full VectorStoreCollection interface, instead of e.g. just providing the two translation functions, if we go with option 3. - Cons: Hard to have a generic implementation that can work with any model, without either changing the data in the provided object on upsert or doing cloning in an expensive way. ```cs - new KeyNormalizingAISearchVectorRecordStore( + new KeyNormalizingAISearchVectorStoreCollection( "keyField", - new AzureAISearchVectorRecordStore(...)); + new AzureAISearchVectorStoreCollection(...)); ``` #### Option 3 - Normalization via optional function parameters to record store constructor - Pros: Allows normalization to vary separately from the record store. -- Pros: No need to implement the full VectorRecordStore interface. +- Pros: No need to implement the full VectorStoreCollection interface. - Pros: Can modify values on serialization without changing the incoming record, if supported by DB SDK. - Cons: Harder to package matching encoders/decoders together. ```cs -public class AzureAISearchVectorRecordStore(StoreOptions options); +public class AzureAISearchVectorStoreCollection(StoreOptions options); public class StoreOptions { @@ -686,7 +715,7 @@ provide their own encoding / decoding behavior. #### Option 1 - Collection name as method param ```cs -public class MyVectorRecordStore() +public class MyVectorStoreCollection() { public async Task GetAsync(string collectionName, string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); } @@ -695,7 +724,7 @@ public class MyVectorRecordStore() #### Option 2 - Collection name via constructor ```cs -public class MyVectorRecordStore(string defaultCollectionName) +public class MyVectorStoreCollection(string defaultCollectionName) { public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); } @@ -704,7 +733,7 @@ public class MyVectorRecordStore(string defaultCollectionName) #### Option 3 - Collection name via either ```cs -public class MyVectorRecordStore(string defaultCollectionName) +public class MyVectorStoreCollection(string defaultCollectionName) { public async Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default); } @@ -717,7 +746,7 @@ public class GetRecordOptions #### Decision Outcome -Chosen option 3, to allow developers more choice. +Chosen option 2. None of the other options work with the decision outcome of Question 1, since that design requires the `VectorStoreCollection` to be tied to a single collection instance. ### Question 4: How to normalize ids across different vector stores where different types are supported. @@ -846,117 +875,70 @@ interface IMemoryCollectionCreateService {} ### Option 3 - VectorStore ```cs -interface IVectorRecordStore {} +interface IVectorRecordStore {} interface IVectorCollectionNonSchema {} interface IVectorCollectionCreate {} interface IVectorCollectionStore {}: IVectorCollectionCreate, IVectorCollectionNonSchema -interface IVectorStore {}: IVectorCollectionStore, IVectorRecordStore +interface IVectorStore {}: IVectorCollectionStore, IVectorRecordStore ``` -#### Decision Outcome - -Chosen option 3. The word memory is broad enough to encompass any data, so using it seems arbitrary. All competitors are using the term vector store, so using something similar is good for recognition. - -## Usage Examples - -Common Code across all examples +### Option 4 - VectorStore + VectorStoreCollection ```cs -class CacheEntryModel(string prompt, string result, ReadOnlyMemory promptEmbedding); - -class SemanticTextMemory(IVectorRecordStore recordStore, IVectorCollectionStore collectionStore, ITextEmbeddingGenerationService embeddingGenerator): ISemanticTextMemory; - -class CacheSetFunctionFilter(ISemanticTextMemory memory); // Saves results to cache. -class CacheGetPromptFilter(ISemanticTextMemory memory); // Check cache for entries. - -var builder = Kernel.CreateBuilder(); +interface IVectorStore +{ + IVectorStoreCollection GetCollection() +} +interface IVectorStoreCollection +{ + Get() + Delete() + Upsert() +} ``` -### DI Framework: Named Instances - -Similar to HttpClient, register implementations using names, that can only be constructed again -using a specific factory implementation. - -```cs -builder - .AddAzureOpenAITextEmbeddingGeneration(textEmbeddingDeploymentName, azureAIEndpoint, apiKey) - - // Collection Registration: - // Variant 1: Register just create. - .AddNamedAzureAISearchCollectionCreate(name: "CacheCreate", azureAISearchEndpoint, apiKey, createConfiguration) // Config - .AddNamedAzureAISearchCollectionCreate(name: "CacheCreate", sp => new CacheCreate(...)); // Custom implementation - // Create combined collection management that references the previously registered create instance. - .AddNamedAzureAISearchCollectionStore(name: "Cache", azureAISearchEndpoint, apiKey, createName: "CacheCreate") - - // Variant 2: Register collection store in one line with config or custom create implementation. - .AddNamedAzureAISearchCollectionStore(name: "Cache", azureAISearchEndpoint, apiKey, createConfiguration) // Config - .AddNamedAzureAISearchCollectionStore(name: "Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)) // Custom implementation - - // Record Registration with variants 1 and 2: - // Add record stores. - .AddAzureAISearchRecordStore(name: "Cache", azureAISearchEndpoint, apiKey) - - // Variant 3: Register collection and record store in one line with config or custom create implementation. - // Does all of the preious variants in one line. - .AddAzureAISearchVectorStore(name: "Cache", azureAISearchEndpoint, apiKey, createConfiguration) // Config - .AddAzureAISearchVectorStore(name: "Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)) // Custom implementation - - // Add semantic text memory referencing collection and record stores. - // This would register ISemanticTextMemory in the services container. - .AddSemanticTextMemory(collectionStoreName: "Cache", recordServiceName: "Cache"); - -// Add filter to retrieve items from cache and one to add items to cache. -// Since these filters depend on ISemanticTextMemory and that is already registered, it should get matched automatically. -builder.Services.AddTransient(); -builder.Services.AddTransient(); - -var kernel = - .Build(); +#### Decision Outcome -var vectorStoreFactory = kernel.Services.GetRequiredService(); -var cacheCollectionStore = vectorStoreFactory.CreateCollectionStore(name: "Cache"); -var cacheRecordStore = vectorStoreFactory.CreateRecordStore(name: "Cache"); -``` +Chosen option 4. The word memory is broad enough to encompass any data, so using it seems arbitrary. All competitors are using the term vector store, so using something similar is good for recognition. +Option 4 also matches our design as chosen in question 1. -### DI Framework: Registration based on consumer type. +## Usage Examples -Similar to `AddHttpClient`, this approach will register a specific implementation of -the storage implementations, for a provided consumer type. +### DI Framework: .net 8 Keyed Services ```cs -builder - .AddAzureOpenAITextEmbeddingGeneration(textEmbeddingDeploymentName, azureAIEndpoint, apiKey) - - // Collection and record registration with config or custom create implementation. - // This will register both IVectorCollectionStore and IVectorRecordStore and tie it to usage with SemanticTextMemory. - .AddAzureAISearchStorage>(azureAISearchEndpoint, apiKey, createConfiguration) // Config - .AddAzureAISearchStorage>(azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)); // Custom implementation +class CacheEntryModel(string prompt, string result, ReadOnlyMemory promptEmbedding); -// Add Semantic Cache Memory for the cache entry model. -builder.Services.AddTransient, SemanticTextMemory>(); +class SemanticTextMemory(IVectorStore configuredVectorStore, VectorStoreRecordDefinition? vectorStoreRecordDefinition): ISemanticTextMemory +{ + public async Task SaveInformation(string collectionName, TDataType record) + { + var collection = vectorStore.GetCollection(collectionName, vectorStoreRecordDefinition); + if (!await collection.CollectionExists()) + { + await collection.CreateCollection(); + } + await collection.UpsertAsync(record); + } +} -// Add filter to retrieve items from cache and one to add items to cache. -// Since these filters depend on ISemanticTextMemory and that is already registered, it should get matched automatically. -builder.Services.AddTransient(); -builder.Services.AddTransient(); -``` +class CacheSetFunctionFilter(ISemanticTextMemory memory); // Saves results to cache. +class CacheGetPromptFilter(ISemanticTextMemory memory); // Check cache for entries. -### DI Framework: .net 8 Keyed Services +var builder = Kernel.CreateBuilder(); -```cs builder - .AddAzureOpenAITextEmbeddingGeneration(textEmbeddingDeploymentName, azureAIEndpoint, apiKey) + // Existing registration: + .AddAzureOpenAITextEmbeddingGeneration(textEmbeddingDeploymentName, azureAIEndpoint, apiKey, serviceId: "AzureOpenAI:text-embedding-ada-002") - // Collection and record registration with config or custom create implementation. - .AddAzureAISearchVectorStoreKeyedTransient("Cache", azureAISearchEndpoint, apiKey, createConfiguration) - .AddAzureAISearchVectorStoreKeyedTransient("Cache", azureAISearchEndpoint, apiKey, sp => new CacheCreate(...)); + // Register an IVectorStore implementation under the given key. + .AddAzureAISearch("Cache", azureAISearchEndpoint, apiKey, new Options() { withEmbeddingGeneration = true }); // Add Semantic Cache Memory for the cache entry model. -builder.Services.AddTransient>(sp => { - return new SemanticTextMemory( - sp.GetKeyedService>("Cache"), - sp.GetKeyedService("Cache"), - sp.GetRequiredService()); +builder.Services.AddTransient(sp => { + return new SemanticTextMemory( + sp.GetKeyedService("Cache"), + cacheRecordDefinition); }); // Add filter to retrieve items from cache and one to add items to cache. @@ -969,9 +951,9 @@ builder.Services.AddTransient ### Record Management -1. Release VectorRecordStore public interface and implementations for Azure AI Search, Qdrant and Redis. +1. Release VectorStoreCollection public interface and implementations for Azure AI Search, Qdrant and Redis. 2. Add support for registering record stores with SK container to allow automatic dependency injection. -3. Add VectorRecordStore implementations for remaining stores. +3. Add VectorStoreCollection implementations for remaining stores. ### Collection Management diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs similarity index 96% rename from dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs rename to dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs index 8994a8271e81..0023f85975f4 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -19,9 +19,9 @@ namespace SemanticKernel.Connectors.AzureAISearch.UnitTests; /// -/// Contains tests for the class. +/// Contains tests for the class. /// -public class AzureAISearchVectorRecordStoreTests +public class AzureAISearchVectorStoreRecordCollectionTests { private const string TestCollectionName = "testcollection"; private const string TestRecordKey1 = "testid1"; @@ -32,7 +32,7 @@ public class AzureAISearchVectorRecordStoreTests private readonly CancellationToken _testCancellationToken = new(false); - public AzureAISearchVectorRecordStoreTests() + public AzureAISearchVectorStoreRecordCollectionTests() { this._searchClientMock = new Mock(MockBehavior.Strict); this._searchIndexClientMock = new Mock(MockBehavior.Strict); @@ -150,7 +150,7 @@ public async Task CanGetRecordWithCustomMapperAsync() .Returns(CreateModel(TestRecordKey1, true)); // Arrange target with custom mapper. - var sut = new AzureAISearchVectorRecordStore( + var sut = new AzureAISearchVectorStoreRecordCollection( this._searchIndexClientMock.Object, TestCollectionName, new() @@ -364,7 +364,7 @@ public async Task CanUpsertRecordWithCustomMapperAsync() .Returns(storageObject); // Arrange target with custom mapper. - var sut = new AzureAISearchVectorRecordStore( + var sut = new AzureAISearchVectorStoreRecordCollection( this._searchIndexClientMock.Object, TestCollectionName, new() @@ -386,9 +386,9 @@ await sut.UpsertAsync( Times.Once); } - private AzureAISearchVectorRecordStore CreateVectorRecordStore(bool useDefinition) + private AzureAISearchVectorStoreRecordCollection CreateVectorRecordStore(bool useDefinition) { - return new AzureAISearchVectorRecordStore( + return new AzureAISearchVectorStoreRecordCollection( this._searchIndexClientMock.Object, TestCollectionName, new() diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs index e9d99fc87bdb..856d666ae864 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; /// -/// The types of mapper supported by . +/// The types of mapper supported by . /// public enum AzureAISearchRecordMapperType { diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs similarity index 93% rename from dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs rename to dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index cb86e1ae3c17..55344eb02b17 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -21,7 +21,9 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; /// Service for storing and retrieving vector records, that uses Azure AI Search as the underlying storage. ///
/// The data model to use for adding, updating and retrieving data from storage. -public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore +#pragma warning disable CA1711 // Identifiers should not have incorrect suffix +public sealed class AzureAISearchVectorStoreRecordCollection : IVectorStoreRecordCollection +#pragma warning restore CA1711 // Identifiers should not have incorrect suffix where TRecord : class { /// The name of this database for telemetry purposes. @@ -69,11 +71,11 @@ public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore /// Azure AI Search client that can be used to manage data in an Azure AI Search Service index. private readonly SearchClient _searchClient; - /// The name of the collection that this will access. + /// The name of the collection that this will access. private readonly string _collectionName; /// Optional configuration options for this class. - private readonly AzureAISearchVectorRecordStoreOptions _options; + private readonly AzureAISearchVectorStoreRecordCollectionOptions _options; /// The name of the key field for the collections that this class is used with. private readonly string _keyPropertyName; @@ -82,14 +84,14 @@ public sealed class AzureAISearchVectorRecordStore : IVectorRecordStore private readonly List _nonVectorPropertyNames; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. - /// The name of the collection that this will access. + /// The name of the collection that this will access. /// Optional configuration options for this class. /// Thrown when is null. /// Thrown when options are misconfigured. - public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, string collectionName, AzureAISearchVectorRecordStoreOptions? options = default) + public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexClient, string collectionName, AzureAISearchVectorStoreRecordCollectionOptions? options = default) { // Verify. Verify.NotNull(searchIndexClient); @@ -98,13 +100,13 @@ public AzureAISearchVectorRecordStore(SearchIndexClient searchIndexClient, strin // Assign. this._searchIndexClient = searchIndexClient; this._collectionName = collectionName; - this._options = options ?? new AzureAISearchVectorRecordStoreOptions(); + this._options = options ?? new AzureAISearchVectorStoreRecordCollectionOptions(); this._searchClient = this._searchIndexClient.GetSearchClient(collectionName); // Verify custom mapper. if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper && this._options.JsonObjectCustomMapper is null) { - throw new ArgumentException($"The {nameof(AzureAISearchVectorRecordStoreOptions.JsonObjectCustomMapper)} option needs to be set if a {nameof(AzureAISearchVectorRecordStoreOptions.MapperType)} of {nameof(AzureAISearchRecordMapperType.JsonObjectCustomMapper)} has been chosen.", nameof(options)); + throw new ArgumentException($"The {nameof(AzureAISearchVectorStoreRecordCollectionOptions.JsonObjectCustomMapper)} option needs to be set if a {nameof(AzureAISearchVectorStoreRecordCollectionOptions.MapperType)} of {nameof(AzureAISearchRecordMapperType.JsonObjectCustomMapper)} has been chosen.", nameof(options)); } // Enumerate public properties using configuration or attributes. diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollectionOptions.cs similarity index 92% rename from dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs rename to dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollectionOptions.cs index 2f9ca0257132..a5ed542ddb88 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollectionOptions.cs @@ -8,9 +8,9 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; /// -/// Options when creating a . +/// Options when creating a . /// -public sealed class AzureAISearchVectorRecordStoreOptions +public sealed class AzureAISearchVectorStoreRecordCollectionOptions where TRecord : class { /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs index cb8f7bf8b14c..3fa9e5985353 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant; /// -/// The types of mapper supported by . +/// The types of mapper supported by . /// public enum QdrantRecordMapperType { diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs similarity index 88% rename from dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs rename to dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index 63d38d3f6e9c..10513eb05198 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -17,7 +17,9 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant; /// Service for storing and retrieving vector records, that uses Qdrant as the underlying storage. /// /// The data model to use for adding, updating and retrieving data from storage. -public sealed class QdrantVectorRecordStore : IVectorRecordStore, IVectorRecordStore +#pragma warning disable CA1711 // Identifiers should not have incorrect suffix +public sealed class QdrantVectorStoreRecordCollection : IVectorStoreRecordCollection, IVectorStoreRecordCollection +#pragma warning restore CA1711 // Identifiers should not have incorrect suffix where TRecord : class { /// The name of this database for telemetry purposes. @@ -32,37 +34,37 @@ public sealed class QdrantVectorRecordStore : IVectorRecordStoreQdrant client that can be used to manage the collections and points in a Qdrant store.
private readonly MockableQdrantClient _qdrantClient; - /// The name of the collection that this will access. + /// The name of the collection that this will access. private readonly string _collectionName; /// Optional configuration options for this class. - private readonly QdrantVectorRecordStoreOptions _options; + private readonly QdrantVectorStoreRecordCollectionOptions _options; /// A mapper to use for converting between qdrant point and consumer models. private readonly IVectorStoreRecordMapper _mapper; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Qdrant client that can be used to manage the collections and points in a Qdrant store. - /// The name of the collection that this will access. + /// The name of the collection that this will access. /// Optional configuration options for this class. /// Thrown if the is null. /// Thrown for any misconfigured options. - public QdrantVectorRecordStore(QdrantClient qdrantClient, string collectionName, QdrantVectorRecordStoreOptions? options = null) + public QdrantVectorStoreRecordCollection(QdrantClient qdrantClient, string collectionName, QdrantVectorStoreRecordCollectionOptions? options = null) : this(new MockableQdrantClient(qdrantClient), collectionName, options) { } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Qdrant client that can be used to manage the collections and points in a Qdrant store. - /// The name of the collection that this will access. + /// The name of the collection that this will access. /// Optional configuration options for this class. /// Thrown if the is null. /// Thrown for any misconfigured options. - internal QdrantVectorRecordStore(MockableQdrantClient qdrantClient, string collectionName, QdrantVectorRecordStoreOptions? options = null) + internal QdrantVectorStoreRecordCollection(MockableQdrantClient qdrantClient, string collectionName, QdrantVectorStoreRecordCollectionOptions? options = null) { // Verify. Verify.NotNull(qdrantClient); @@ -71,7 +73,7 @@ internal QdrantVectorRecordStore(MockableQdrantClient qdrantClient, string colle // Assign. this._qdrantClient = qdrantClient; this._collectionName = collectionName; - this._options = options ?? new QdrantVectorRecordStoreOptions(); + this._options = options ?? new QdrantVectorStoreRecordCollectionOptions(); // Assign Mapper. if (this._options.MapperType == QdrantRecordMapperType.QdrantPointStructCustomMapper) @@ -79,7 +81,7 @@ internal QdrantVectorRecordStore(MockableQdrantClient qdrantClient, string colle // Custom Mapper. if (this._options.PointStructCustomMapper is null) { - throw new ArgumentException($"The {nameof(QdrantVectorRecordStoreOptions.PointStructCustomMapper)} option needs to be set if a {nameof(QdrantVectorRecordStoreOptions.MapperType)} of {nameof(QdrantRecordMapperType.QdrantPointStructCustomMapper)} has been chosen.", nameof(options)); + throw new ArgumentException($"The {nameof(QdrantVectorStoreRecordCollectionOptions.PointStructCustomMapper)} option needs to be set if a {nameof(QdrantVectorStoreRecordCollectionOptions.MapperType)} of {nameof(QdrantRecordMapperType.QdrantPointStructCustomMapper)} has been chosen.", nameof(options)); } this._mapper = this._options.PointStructCustomMapper; @@ -201,7 +203,7 @@ await this.RunOperationAsync( } /// - async Task IVectorRecordStore.UpsertAsync(TRecord record, UpsertRecordOptions? options, CancellationToken cancellationToken) + async Task IVectorStoreRecordCollection.UpsertAsync(TRecord record, UpsertRecordOptions? options, CancellationToken cancellationToken) { Verify.NotNull(record); @@ -243,7 +245,7 @@ await this.RunOperationAsync( } /// - async IAsyncEnumerable IVectorRecordStore.UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options, [EnumeratorCancellation] CancellationToken cancellationToken) + async IAsyncEnumerable IVectorStoreRecordCollection.UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options, [EnumeratorCancellation] CancellationToken cancellationToken) { Verify.NotNull(records); diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollectionOptions.cs similarity index 92% rename from dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs rename to dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollectionOptions.cs index 2eb6f6c3f53c..a8052e25ff7b 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollectionOptions.cs @@ -6,9 +6,9 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant; /// -/// Options when creating a . +/// Options when creating a . /// -public sealed class QdrantVectorRecordStoreOptions +public sealed class QdrantVectorStoreRecordCollectionOptions where TRecord : class { /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs index 9518c2e228a4..3f0e4af02d99 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; /// -/// The types of mapper supported by . +/// The types of mapper supported by . /// public enum RedisRecordMapperType { diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs similarity index 93% rename from dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs rename to dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs index c82ec9e8d296..f4512062dd9c 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs @@ -20,7 +20,9 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; /// Service for storing and retrieving vector records, that uses Redis as the underlying storage. /// /// The data model to use for adding, updating and retrieving data from storage. -public sealed class RedisVectorRecordStore : IVectorRecordStore +#pragma warning disable CA1711 // Identifiers should not have incorrect suffix +public sealed class RedisVectorStoreRecordCollection : IVectorStoreRecordCollection +#pragma warning restore CA1711 // Identifiers should not have incorrect suffix where TRecord : class { /// The name of this database for telemetry purposes. @@ -44,11 +46,11 @@ public sealed class RedisVectorRecordStore : IVectorRecordStoreThe Redis database to read/write records from.
private readonly IDatabase _database; - /// The name of the collection that this will access. + /// The name of the collection that this will access. private readonly string _collectionName; /// Optional configuration options for this class. - private readonly RedisVectorRecordStoreOptions _options; + private readonly RedisVectorStoreRecordCollectionOptions _options; /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. private readonly PropertyInfo _keyPropertyInfo; @@ -66,13 +68,13 @@ public sealed class RedisVectorRecordStore : IVectorRecordStore - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. ///
/// The Redis database to read/write records from. - /// The name of the collection that this will access. + /// The name of the collection that this will access. /// Optional configuration options for this class. /// Throw when parameters are invalid. - public RedisVectorRecordStore(IDatabase database, string collectionName, RedisVectorRecordStoreOptions? options = null) + public RedisVectorStoreRecordCollection(IDatabase database, string collectionName, RedisVectorStoreRecordCollectionOptions? options = null) { // Verify. Verify.NotNull(database); @@ -81,7 +83,7 @@ public RedisVectorRecordStore(IDatabase database, string collectionName, RedisVe // Assign. this._database = database; this._collectionName = collectionName; - this._options = options ?? new RedisVectorRecordStoreOptions(); + this._options = options ?? new RedisVectorStoreRecordCollectionOptions(); this._jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; // Enumerate public properties using configuration or attributes. @@ -112,7 +114,7 @@ public RedisVectorRecordStore(IDatabase database, string collectionName, RedisVe { if (this._options.JsonNodeCustomMapper is null) { - throw new ArgumentException($"The {nameof(RedisVectorRecordStoreOptions.JsonNodeCustomMapper)} option needs to be set if a {nameof(RedisVectorRecordStoreOptions.MapperType)} of {nameof(RedisRecordMapperType.JsonNodeCustomMapper)} has been chosen.", nameof(options)); + throw new ArgumentException($"The {nameof(RedisVectorStoreRecordCollectionOptions.JsonNodeCustomMapper)} option needs to be set if a {nameof(RedisVectorStoreRecordCollectionOptions.MapperType)} of {nameof(RedisRecordMapperType.JsonNodeCustomMapper)} has been chosen.", nameof(options)); } this._mapper = this._options.JsonNodeCustomMapper; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs similarity index 94% rename from dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs rename to dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs index 5cdf6496e628..1c62d94dd6cc 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorRecordStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs @@ -7,9 +7,9 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; /// -/// Options when creating a . +/// Options when creating a . /// -public sealed class RedisVectorRecordStoreOptions +public sealed class RedisVectorStoreRecordCollectionOptions where TRecord : class { /// diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs similarity index 97% rename from dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs rename to dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs index cd17cdf48e04..c6d302c50388 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs @@ -13,9 +13,9 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant.UnitTests; /// -/// Contains tests for the class. +/// Contains tests for the class. /// -public class QdrantVectorRecordStoreTests +public class QdrantVectorStoreRecordCollectionTests { private const string TestCollectionName = "testcollection"; private const ulong UlongTestRecordKey1 = 1; @@ -27,7 +27,7 @@ public class QdrantVectorRecordStoreTests private readonly CancellationToken _testCancellationToken = new(false); - public QdrantVectorRecordStoreTests() + public QdrantVectorStoreRecordCollectionTests() { this._qdrantClientMock = new Mock(MockBehavior.Strict); } @@ -155,7 +155,7 @@ public async Task CanGetRecordWithCustomMapperAsync() .Returns(CreateModel(UlongTestRecordKey1, true)); // Arrange target with custom mapper. - var sut = new QdrantVectorRecordStore>( + var sut = new QdrantVectorStoreRecordCollection>( this._qdrantClientMock.Object, TestCollectionName, new() @@ -382,7 +382,7 @@ public async Task CanUpsertRecordWithCustomMapperAsync() .Returns(pointStruct); // Arrange target with custom mapper. - var sut = new QdrantVectorRecordStore>( + var sut = new QdrantVectorStoreRecordCollection>( this._qdrantClientMock.Object, TestCollectionName, new() @@ -512,16 +512,16 @@ private static RetrievedPoint CreateRetrievedPoint(bool hasNamedVectors, T return point; } - private IVectorRecordStore> CreateVectorRecordStore(bool useDefinition, bool hasNamedVectors) + private IVectorStoreRecordCollection> CreateVectorRecordStore(bool useDefinition, bool hasNamedVectors) { - var store = new QdrantVectorRecordStore>( + var store = new QdrantVectorStoreRecordCollection>( this._qdrantClientMock.Object, TestCollectionName, new() { VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null, HasNamedVectors = hasNamedVectors - }) as IVectorRecordStore>; + }) as IVectorStoreRecordCollection>; return store!; } diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs similarity index 96% rename from dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs rename to dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs index 07b79ab7790d..983036d8075f 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorRecordStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs @@ -14,9 +14,9 @@ namespace Microsoft.SemanticKernel.Connectors.Redis.UnitTests; /// -/// Contains tests for the class. +/// Contains tests for the class. /// -public class RedisVectorRecordStoreTests +public class RedisVectorStoreRecordCollectionTests { private const string TestCollectionName = "testcollection"; private const string TestRecordKey1 = "testid1"; @@ -24,7 +24,7 @@ public class RedisVectorRecordStoreTests private readonly Mock _redisDatabaseMock; - public RedisVectorRecordStoreTests() + public RedisVectorStoreRecordCollectionTests() { this._redisDatabaseMock = new Mock(MockBehavior.Strict); @@ -141,7 +141,7 @@ public async Task CanGetRecordWithCustomMapperAsync() .Returns(CreateModel(TestRecordKey1, true)); // Arrange target with custom mapper. - var sut = new RedisVectorRecordStore( + var sut = new RedisVectorStoreRecordCollection( this._redisDatabaseMock.Object, TestCollectionName, new() @@ -289,7 +289,7 @@ public async Task CanUpsertRecordWithCustomMapperAsync() .Returns((TestRecordKey1, JsonNode.Parse(jsonNode)!)); // Arrange target with custom mapper. - var sut = new RedisVectorRecordStore( + var sut = new RedisVectorStoreRecordCollection( this._redisDatabaseMock.Object, TestCollectionName, new() @@ -310,9 +310,9 @@ public async Task CanUpsertRecordWithCustomMapperAsync() Times.Once); } - private RedisVectorRecordStore CreateVectorRecordStore(bool useDefinition) + private RedisVectorStoreRecordCollection CreateVectorRecordStore(bool useDefinition) { - return new RedisVectorRecordStore( + return new RedisVectorStoreRecordCollection( this._redisDatabaseMock.Object, TestCollectionName, new() diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs index f5df69c24023..6ac1d80e262c 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs @@ -37,7 +37,7 @@ public class AzureAISearchVectorStoreFixture : IAsyncLifetime .AddJsonFile(path: "testsettings.json", optional: false, reloadOnChange: true) .AddJsonFile(path: "testsettings.development.json", optional: true, reloadOnChange: true) .AddEnvironmentVariables() - .AddUserSecrets() + .AddUserSecrets() .Build(); /// diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs similarity index 81% rename from dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs rename to dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs index ecd8c4ee6d5f..db8989df733b 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -15,11 +15,11 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; /// -/// Integration tests for class. +/// Integration tests for class. /// Tests work with Azure AI Search Instance. /// [Collection("AzureAISearchVectorStoreCollection")] -public sealed class AzureAISearchVectorRecordStoreTests(ITestOutputHelper output, AzureAISearchVectorStoreFixture fixture) : IClassFixture +public sealed class AzureAISearchVectorStoreRecordCollectionTests(ITestOutputHelper output, AzureAISearchVectorStoreFixture fixture) : IClassFixture { // If null, all tests will be enabled private const string SkipReason = null; //"Requires Azure AI Search Service instance up and running"; @@ -30,11 +30,11 @@ public sealed class AzureAISearchVectorRecordStoreTests(ITestOutputHelper output public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition) { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions + var options = new AzureAISearchVectorStoreRecordCollectionOptions { VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName, options); + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName, options); // Act var hotel = CreateTestHotel("Upsert-1"); @@ -64,7 +64,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition public async Task ItCanUpsertManyDocumentsToVectorStoreAsync() { // Arrange - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName); // Act var results = sut.UpsertBatchAsync( @@ -98,11 +98,11 @@ public async Task ItCanUpsertManyDocumentsToVectorStoreAsync() public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool useRecordDefinition) { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions + var options = new AzureAISearchVectorStoreRecordCollectionOptions { VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName, options); + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName, options); // Act var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); @@ -130,7 +130,7 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool public async Task ItCanGetManyDocumentsFromVectorStoreAsync() { // Arrange - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName); // Act // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. @@ -154,11 +154,11 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefinition) { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions + var options = new AzureAISearchVectorStoreRecordCollectionOptions { VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName); await sut.UpsertAsync(CreateTestHotel("Remove-1")); // Act @@ -174,7 +174,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() { // Arrange - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName); await sut.UpsertAsync(CreateTestHotel("RemoveMany-1")); await sut.UpsertAsync(CreateTestHotel("RemoveMany-2")); await sut.UpsertAsync(CreateTestHotel("RemoveMany-3")); @@ -193,7 +193,7 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() { // Arrange - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName); + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName); // Act & Assert Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); @@ -204,7 +204,7 @@ public async Task ItThrowsOperationExceptionForFailedConnectionAsync() { // Arrange var searchIndexClient = new SearchIndexClient(new Uri("https://localhost:12345"), new AzureKeyCredential("12345")); - var sut = new AzureAISearchVectorRecordStore(searchIndexClient, fixture.TestIndexName); + var sut = new AzureAISearchVectorStoreRecordCollection(searchIndexClient, fixture.TestIndexName); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); @@ -215,7 +215,7 @@ public async Task ItThrowsOperationExceptionForFailedAuthenticationAsync() { // Arrange var searchIndexClient = new SearchIndexClient(new Uri(fixture.Config.ServiceUrl), new AzureKeyCredential("12345")); - var sut = new AzureAISearchVectorRecordStore(searchIndexClient, fixture.TestIndexName); + var sut = new AzureAISearchVectorStoreRecordCollection(searchIndexClient, fixture.TestIndexName); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); @@ -225,8 +225,8 @@ public async Task ItThrowsOperationExceptionForFailedAuthenticationAsync() public async Task ItThrowsMappingExceptionForFailedMapperAsync() { // Arrange - var options = new AzureAISearchVectorRecordStoreOptions { MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = new FailingMapper() }; - var sut = new AzureAISearchVectorRecordStore(fixture.SearchIndexClient, fixture.TestIndexName, options); + var options = new AzureAISearchVectorStoreRecordCollectionOptions { MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = new FailingMapper() }; + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName, options); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs similarity index 81% rename from dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs rename to dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs index f14a3234a308..d5b462284596 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs @@ -14,12 +14,12 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; /// -/// Contains tests for the class. +/// Contains tests for the class. /// /// Used for logging. /// Qdrant setup and teardown. [Collection("QdrantVectorStoreCollection")] -public sealed class QdrantVectorRecordStoreTests(ITestOutputHelper output, QdrantVectorStoreFixture fixture) +public sealed class QdrantVectorStoreRecordCollectionTests(ITestOutputHelper output, QdrantVectorStoreFixture fixture) { [Theory] [InlineData(true, "singleVectorHotels", false)] @@ -29,12 +29,12 @@ public sealed class QdrantVectorRecordStoreTests(ITestOutputHelper output, Qdran public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors) { // Arrange. - var options = new QdrantVectorRecordStoreOptions + var options = new QdrantVectorStoreRecordCollectionOptions { HasNamedVectors = hasNamedVectors, VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, collectionName, options); + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options); var record = this.CreateTestHotel(20); @@ -64,8 +64,8 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition public async Task ItCanUpsertAndRemoveDocumentWithGuidIdToVectorStoreAsync() { // Arrange. - var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false }; - IVectorRecordStore sut = new QdrantVectorRecordStore(fixture.QdrantClient, "singleVectorGuidIdHotels", options); + var options = new QdrantVectorStoreRecordCollectionOptions { HasNamedVectors = false }; + IVectorStoreRecordCollection sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, "singleVectorGuidIdHotels", options); var record = new HotelInfoWithGuidId { @@ -108,12 +108,12 @@ public async Task ItCanUpsertAndRemoveDocumentWithGuidIdToVectorStoreAsync() public async Task ItCanGetDocumentFromVectorStoreAsync(bool useRecordDefinition, bool withEmbeddings, string collectionName, bool hasNamedVectors) { // Arrange. - var options = new QdrantVectorRecordStoreOptions + var options = new QdrantVectorStoreRecordCollectionOptions { HasNamedVectors = hasNamedVectors, VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, collectionName, options); + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options); // Act. var getResult = await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = withEmbeddings }); @@ -149,12 +149,12 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool useRecordDefinition, public async Task ItCanGetDocumentWithGuidIdFromVectorStoreAsync(bool useRecordDefinition, bool withEmbeddings) { // Arrange. - var options = new QdrantVectorRecordStoreOptions + var options = new QdrantVectorStoreRecordCollectionOptions { HasNamedVectors = false, VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelWithGuidIdVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, "singleVectorGuidIdHotels", options); + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, "singleVectorGuidIdHotels", options); // Act. var getResult = await sut.GetAsync(Guid.Parse("11111111-1111-1111-1111-111111111111"), new GetRecordOptions { IncludeVectors = withEmbeddings }); @@ -180,8 +180,8 @@ public async Task ItCanGetDocumentWithGuidIdFromVectorStoreAsync(bool useRecordD public async Task ItCanGetManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = true }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, "namedVectorsHotels", options); + var options = new QdrantVectorStoreRecordCollectionOptions { HasNamedVectors = true }; + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, "namedVectorsHotels", options); // Act // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. @@ -207,12 +207,12 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors) { // Arrange. - var options = new QdrantVectorRecordStoreOptions + var options = new QdrantVectorStoreRecordCollectionOptions { HasNamedVectors = hasNamedVectors, VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, collectionName, options); + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options); await sut.UpsertAsync(this.CreateTestHotel(20)); @@ -233,12 +233,12 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDefinition, string collectionName, bool hasNamedVectors) { // Arrange. - var options = new QdrantVectorRecordStoreOptions + var options = new QdrantVectorStoreRecordCollectionOptions { HasNamedVectors = hasNamedVectors, VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, collectionName, options); + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options); await sut.UpsertAsync(this.CreateTestHotel(20)); @@ -254,8 +254,8 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDef public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() { // Arrange - var options = new QdrantVectorRecordStoreOptions { HasNamedVectors = false }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, "singleVectorHotels", options); + var options = new QdrantVectorStoreRecordCollectionOptions { HasNamedVectors = false }; + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, "singleVectorHotels", options); // Act & Assert Assert.Null(await sut.GetAsync(15, new GetRecordOptions { IncludeVectors = true })); @@ -265,8 +265,8 @@ public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() public async Task ItThrowsMappingExceptionForFailedMapperAsync() { // Arrange - var options = new QdrantVectorRecordStoreOptions { MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = new FailingMapper() }; - var sut = new QdrantVectorRecordStore(fixture.QdrantClient, "singleVectorHotels", options); + var options = new QdrantVectorStoreRecordCollectionOptions { MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = new FailingMapper() }; + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, "singleVectorHotels", options); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync(11, new GetRecordOptions { IncludeVectors = true })); diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs similarity index 83% rename from dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs rename to dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs index 2cf8605777de..3957a4387d9f 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorRecordStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs @@ -13,12 +13,12 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.Redis; /// -/// Contains tests for the class. +/// Contains tests for the class. /// /// Used for logging. /// Redis setup and teardown. [Collection("RedisVectorStoreCollection")] -public sealed class RedisVectorRecordStoreTests(ITestOutputHelper output, RedisVectorStoreFixture fixture) +public sealed class RedisVectorStoreRecordCollectionTests(ITestOutputHelper output, RedisVectorStoreFixture fixture) { [Theory] [InlineData(true)] @@ -26,12 +26,12 @@ public sealed class RedisVectorRecordStoreTests(ITestOutputHelper output, RedisV public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition) { // Arrange. - var options = new RedisVectorRecordStoreOptions + var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); + var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); Hotel record = CreateTestHotel("Upsert-1", 1); // Act. @@ -63,12 +63,12 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition public async Task ItCanUpsertManyDocumentsToVectorStoreAsync(bool useRecordDefinition) { // Arrange. - var options = new RedisVectorRecordStoreOptions + var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); + var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act. var results = sut.UpsertBatchAsync( @@ -102,12 +102,12 @@ public async Task ItCanUpsertManyDocumentsToVectorStoreAsync(bool useRecordDefin public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool useRecordDefinition) { // Arrange. - var options = new RedisVectorRecordStoreOptions + var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); + var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act. var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); @@ -139,8 +139,8 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool public async Task ItCanGetManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new RedisVectorRecordStoreOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); + var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. @@ -162,8 +162,8 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() public async Task ItFailsToGetDocumentsWithInvalidSchemaAsync() { // Arrange. - var options = new RedisVectorRecordStoreOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); + var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act & Assert. await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-4-Invalid", new GetRecordOptions { IncludeVectors = true })); @@ -175,12 +175,12 @@ public async Task ItFailsToGetDocumentsWithInvalidSchemaAsync() public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefinition) { // Arrange. - var options = new RedisVectorRecordStoreOptions + var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); + var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); var address = new HotelAddress { City = "Seattle", Country = "USA" }; var record = new Hotel { @@ -206,8 +206,8 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new RedisVectorRecordStoreOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); + var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); await sut.UpsertAsync(CreateTestHotel("RemoveMany-1", 1)); await sut.UpsertAsync(CreateTestHotel("RemoveMany-2", 2)); await sut.UpsertAsync(CreateTestHotel("RemoveMany-3", 3)); @@ -226,8 +226,8 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() { // Arrange - var options = new RedisVectorRecordStoreOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); + var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act & Assert Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); @@ -237,13 +237,13 @@ public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() public async Task ItThrowsMappingExceptionForFailedMapperAsync() { // Arrange - var options = new RedisVectorRecordStoreOptions + var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, MapperType = RedisRecordMapperType.JsonNodeCustomMapper, JsonNodeCustomMapper = new FailingMapper() }; - var sut = new RedisVectorRecordStore(fixture.Database, "hotels", options); + var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorRecordStore.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs similarity index 96% rename from dotnet/src/SemanticKernel.Abstractions/Data/IVectorRecordStore.cs rename to dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs index c88821ccb106..4c0bb8d3fa73 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorRecordStore.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs @@ -8,12 +8,14 @@ namespace Microsoft.SemanticKernel.Data; /// -/// An interface for adding, updating, deleting and retrieving records from a vector store. +/// An interface for managing a collection of records in a vector store. /// /// The data type of the record key. /// The record data model to use for adding, updating and retrieving data from the store. [Experimental("SKEXP0001")] -public interface IVectorRecordStore +#pragma warning disable CA1711 // Identifiers should not have incorrect suffix +public interface IVectorStoreRecordCollection +#pragma warning restore CA1711 // Identifiers should not have incorrect suffix where TRecord : class { /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs index 13e62eb84a71..6de2e07f66c4 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Optional options when calling . +/// Optional options when calling . /// Reserved for future use. /// [Experimental("SKEXP0001")] diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs index 5d99580cb13b..fa86c2b6d5db 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Optional options when calling . +/// Optional options when calling . /// [Experimental("SKEXP0001")] public class GetRecordOptions diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs index d291506635ff..8655cc25fde5 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Optional options when calling . +/// Optional options when calling . /// Reserved for future use. /// [Experimental("SKEXP0001")] diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs similarity index 83% rename from dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs rename to dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs index 5f87bc1a93f6..fcb5398a3a5f 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStore.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs @@ -16,16 +16,18 @@ namespace Microsoft.SemanticKernel.Data; /// /// The data model to use for adding, updating and retrieving data from storage. [Experimental("SKEXP0001")] -public sealed class VolatileVectorRecordStore : IVectorRecordStore +#pragma warning disable CA1711 // Identifiers should not have incorrect suffix +public sealed class VolatileVectorStoreRecordCollection : IVectorStoreRecordCollection +#pragma warning restore CA1711 // Identifiers should not have incorrect suffix where TRecord : class { - /// Internal storage for the record store. + /// Internal storage for the record collection. private readonly ConcurrentDictionary> _internalCollection; /// Optional configuration options for this class. - private readonly VolatileVectorRecordStoreOptions _options; + private readonly VolatileVectorStoreRecordCollectionOptions _options; - /// The name of the collection that this will access. + /// The name of the collection that this will access. private readonly string _collectionName; /// A set of types that a key on the provided model may have. @@ -38,11 +40,11 @@ public sealed class VolatileVectorRecordStore : IVectorRecordStore - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// - /// The name of the collection that this will access. + /// The name of the collection that this will access. /// Optional configuration options for this class. - public VolatileVectorRecordStore(string collectionName, VolatileVectorRecordStoreOptions? options = default) + public VolatileVectorStoreRecordCollection(string collectionName, VolatileVectorStoreRecordCollectionOptions? options = default) { // Verify. Verify.NotNullOrWhiteSpace(collectionName); @@ -50,7 +52,7 @@ public VolatileVectorRecordStore(string collectionName, VolatileVectorRecordStor // Assign. this._collectionName = collectionName; this._internalCollection = new(); - this._options = options ?? new VolatileVectorRecordStoreOptions(); + this._options = options ?? new VolatileVectorStoreRecordCollectionOptions(); // Enumerate public properties using configuration or attributes. (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; @@ -69,12 +71,12 @@ public VolatileVectorRecordStore(string collectionName, VolatileVectorRecordStor } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Allows passing in the dictionary used for storage, for testing purposes. - /// The name of the collection that this will access. + /// The name of the collection that this will access. /// Optional configuration options for this class. - internal VolatileVectorRecordStore(ConcurrentDictionary> internalCollection, string collectionName, VolatileVectorRecordStoreOptions? options = default) + internal VolatileVectorStoreRecordCollection(ConcurrentDictionary> internalCollection, string collectionName, VolatileVectorStoreRecordCollectionOptions? options = default) : this(collectionName, options) { this._internalCollection = internalCollection; diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollectionOptions.cs similarity index 84% rename from dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs rename to dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollectionOptions.cs index 621aaf41e8ae..ef825c2e9ec6 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorRecordStoreOptions.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollectionOptions.cs @@ -5,10 +5,10 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Options when creating a . +/// Options when creating a . /// [Experimental("SKEXP0001")] -public sealed class VolatileVectorRecordStoreOptions +public sealed class VolatileVectorStoreRecordCollectionOptions { /// /// Gets or sets an optional record definition that defines the schema of the record type. diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs similarity index 95% rename from dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs rename to dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs index b42b797255d5..00e71179efa6 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorRecordStoreTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs @@ -11,9 +11,9 @@ namespace SemanticKernel.UnitTests.Data; /// -/// Contains tests for the class. +/// Contains tests for the class. /// -public class VolatileVectorRecordStoreTests +public class VolatileVectorStoreRecordCollectionTests { private const string TestCollectionName = "testcollection"; private const string TestRecordKey1 = "testid1"; @@ -23,7 +23,7 @@ public class VolatileVectorRecordStoreTests private readonly ConcurrentDictionary> _collectionStore; - public VolatileVectorRecordStoreTests() + public VolatileVectorStoreRecordCollectionTests() { this._collectionStore = new(); } @@ -205,9 +205,9 @@ private static SinglePropsModel CreateModel(string key, bool withVectors) }; } - private VolatileVectorRecordStore CreateVectorRecordStore(bool useDefinition) + private VolatileVectorStoreRecordCollection CreateVectorRecordStore(bool useDefinition) { - return new VolatileVectorRecordStore( + return new VolatileVectorStoreRecordCollection( this._collectionStore, TestCollectionName, new() From 0ef9b16566e953be6ca848101abe83a967b737b3 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:37:32 +0100 Subject: [PATCH 17/48] .Net: Adding collectionName, exists and delete to collection interface (#7262) ### Motivation and Context As part of the memory connector redesign we have fixed on a design where we have a VectorStore that produces VectorStoreRecordCollection instances. These are tied to a collection and will expose single collection operations. ### Description This PR adds some of the single collection operations to the VectorStoreRecordCollection, namely: 1. CollectionExists 2. DeleteCollection As well as exposing the name of the collection. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...ISearchVectorStoreRecordCollectionTests.cs | 64 +++++++++++++-- ...zureAISearchVectorStoreRecordCollection.cs | 34 ++++++++ .../MockableQdrantClient.cs | 30 ++++++- .../QdrantVectorStoreRecordCollection.cs | 42 ++++++++++ .../RedisVectorStoreRecordCollection.cs | 55 +++++++++++++ .../QdrantVectorStoreRecordCollectionTests.cs | 67 +++++++++++++--- .../RedisVectorStoreRecordCollectionTests.cs | 78 +++++++++++++++++-- ...ISearchVectorStoreRecordCollectionTests.cs | 31 ++++++++ .../QdrantVectorStoreRecordCollectionTests.cs | 33 ++++++++ .../RedisVectorStoreRecordCollectionTests.cs | 37 +++++++++ .../Data/IVectorStoreRecordCollection.cs | 19 +++++ .../VolatileVectorStoreRecordCollection.cs | 16 ++++ ...olatileVectorStoreRecordCollectionTests.cs | 50 ++++++++++-- 13 files changed, 521 insertions(+), 35 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs index 0023f85975f4..bb5328200c46 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -10,6 +10,7 @@ using Azure; using Azure.Search.Documents; using Azure.Search.Documents.Indexes; +using Azure.Search.Documents.Indexes.Models; using Azure.Search.Documents.Models; using Microsoft.SemanticKernel.Connectors.AzureAISearch; using Microsoft.SemanticKernel.Data; @@ -39,6 +40,53 @@ public AzureAISearchVectorStoreRecordCollectionTests() this._searchIndexClientMock.Setup(x => x.GetSearchClient(TestCollectionName)).Returns(this._searchClientMock.Object); } + [Theory] + [InlineData(TestCollectionName, true)] + [InlineData("nonexistentcollection", false)] + public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) + { + this._searchIndexClientMock.Setup(x => x.GetSearchClient(collectionName)).Returns(this._searchClientMock.Object); + + // Arrange. + if (expectedExists) + { + this._searchIndexClientMock + .Setup(x => x.GetIndexAsync(collectionName, this._testCancellationToken)) + .Returns(Task.FromResult?>(null)); + } + else + { + this._searchIndexClientMock + .Setup(x => x.GetIndexAsync(collectionName, this._testCancellationToken)) + .ThrowsAsync(new RequestFailedException(404, "Index not found")); + } + + var sut = new AzureAISearchVectorStoreRecordCollection(this._searchIndexClientMock.Object, collectionName); + + // Act. + var actual = await sut.CollectionExistsAsync(this._testCancellationToken); + + // Assert. + Assert.Equal(expectedExists, actual); + } + + [Fact] + public async Task CanDeleteCollectionAsync() + { + // Arrange. + this._searchIndexClientMock + .Setup(x => x.DeleteIndexAsync(TestCollectionName, this._testCancellationToken)) + .Returns(Task.FromResult(null)); + + var sut = this.CreateRecordCollection(false); + + // Act. + await sut.DeleteCollectionAsync(this._testCancellationToken); + + // Assert. + this._searchIndexClientMock.Verify(x => x.DeleteIndexAsync(TestCollectionName, this._testCancellationToken), Times.Once); + } + [Theory] [InlineData(true)] [InlineData(false)] @@ -52,7 +100,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) this._testCancellationToken)) .ReturnsAsync(Response.FromValue(CreateModel(TestRecordKey1, true), Mock.Of())); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act. var actual = await sut.GetAsync( @@ -82,7 +130,7 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) this._testCancellationToken)) .ReturnsAsync(Response.FromValue(CreateModel(TestRecordKey1, true), Mock.Of())); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act. var actual = await sut.GetAsync( @@ -112,7 +160,7 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) return Response.FromValue(CreateModel(id, true), Mock.Of()); }); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act. var actual = await sut.GetBatchAsync( @@ -187,7 +235,7 @@ public async Task CanDeleteRecordAsync(bool useDefinition) this._testCancellationToken)) .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act. await sut.DeleteAsync( @@ -222,7 +270,7 @@ public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) this._testCancellationToken)) .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act. await sut.DeleteBatchAsync( @@ -261,7 +309,7 @@ public async Task CanUpsertRecordAsync(bool useDefinition) .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); // Arrange sut. - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); var model = CreateModel(TestRecordKey1, true); @@ -306,7 +354,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); // Arrange sut. - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); var model1 = CreateModel(TestRecordKey1, true); var model2 = CreateModel(TestRecordKey2, true); @@ -386,7 +434,7 @@ await sut.UpsertAsync( Times.Once); } - private AzureAISearchVectorStoreRecordCollection CreateVectorRecordStore(bool useDefinition) + private AzureAISearchVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) { return new AzureAISearchVectorStoreRecordCollection( this._searchIndexClientMock.Object, diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index 55344eb02b17..7cb58de20041 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -131,6 +131,40 @@ public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexCli this._nonVectorPropertyNames = properties.dataProperties.Concat([properties.keyProperty]).Select(x => VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, x)).ToList(); } + /// + public string CollectionName => this._collectionName; + + /// + public async Task CollectionExistsAsync(CancellationToken cancellationToken = default) + { + try + { + await this._searchIndexClient.GetIndexAsync(this._collectionName, cancellationToken).ConfigureAwait(false); + return true; + } + catch (RequestFailedException ex) when (ex.Status == 404) + { + return false; + } + catch (RequestFailedException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = this._collectionName, + OperationName = "GetIndex" + }; + } + } + + /// + public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) + { + return this.RunOperationAsync( + "DeleteIndex", + () => this._searchIndexClient.DeleteIndexAsync(this._collectionName, cancellationToken)); + } + /// public Task GetAsync(string key, GetRecordOptions? options = default, CancellationToken cancellationToken = default) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs index c12d51bcf028..bbda7a838abd 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs @@ -36,14 +36,40 @@ internal MockableQdrantClient() } #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + /// + /// Check if a collection exists. + /// + /// The name of the collection. + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task CollectionExistsAsync( + string collectionName, + CancellationToken cancellationToken = default) + => this._qdrantClient.CollectionExistsAsync(collectionName, cancellationToken); + + /// + /// Drop a collection and all its associated data. + /// + /// The name of the collection. + /// Wait timeout for operation commit in seconds, if not specified - default value will be supplied + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task DeleteCollectionAsync( + string collectionName, + TimeSpan? timeout = null, + CancellationToken cancellationToken = default) + => this._qdrantClient.DeleteCollectionAsync(collectionName, timeout, cancellationToken); + /// /// Delete a point. /// /// The name of the collection. /// The ID to delete. /// Whether to wait until the changes have been applied. Defaults to true. - /// Write ordering guarantees. Defaults to Weak. - /// Option for custom sharding to specify used shard keys. + /// Write ordering guarantees. Defaults to Weak. + /// Option for custom sharding to specify used shard keys. /// /// The token to monitor for cancellation requests. The default value is . /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index 10513eb05198..8620055b5a8b 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -97,6 +97,25 @@ internal QdrantVectorStoreRecordCollection(MockableQdrantClient qdrantClient, st } } + /// + public string CollectionName => this._collectionName; + + /// + public Task CollectionExistsAsync(CancellationToken cancellationToken = default) + { + return this.RunOperationAsync( + "CollectionExists", + () => this._qdrantClient.CollectionExistsAsync(this._collectionName, cancellationToken)); + } + + /// + public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) + { + return this.RunOperationAsync( + "DeleteCollection", + () => this._qdrantClient.DeleteCollectionAsync(this._collectionName, null, cancellationToken)); + } + /// public async Task GetAsync(ulong key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) { @@ -316,6 +335,29 @@ private async IAsyncEnumerable GetBatchByPointIdAsync( } } + /// + /// Run the given operation and wrap any with ."/> + /// + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + private async Task RunOperationAsync(string operationName, Func operation) + { + try + { + await operation.Invoke().ConfigureAwait(false); + } + catch (RpcException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = this._collectionName, + OperationName = operationName + }; + } + } + /// /// Run the given operation and wrap any with ."/> /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs index f4512062dd9c..2b3cd6902f77 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs @@ -125,6 +125,38 @@ public RedisVectorStoreRecordCollection(IDatabase database, string collectionNam } } + /// + public string CollectionName => this._collectionName; + + /// + public async Task CollectionExistsAsync(CancellationToken cancellationToken = default) + { + try + { + await this._database.FT().InfoAsync(this._collectionName).ConfigureAwait(false); + return true; + } + catch (RedisServerException ex) when (ex.Message.Contains("Unknown index name")) + { + return false; + } + catch (RedisConnectionException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = this._collectionName, + OperationName = "FT.INFO" + }; + } + } + + /// + public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) + { + return this.RunOperationAsync("FT.DROPINDEX", () => this._database.FT().DropIndexAsync(this._collectionName)); + } + /// public async Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) { @@ -331,6 +363,29 @@ private string PrefixKeyIfNeeded(string key) return key; } + /// + /// Run the given operation and wrap any Redis exceptions with ."/> + /// + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + private async Task RunOperationAsync(string operationName, Func operation) + { + try + { + await operation.Invoke().ConfigureAwait(false); + } + catch (RedisConnectionException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = this._collectionName, + OperationName = operationName + }; + } + } + /// /// Run the given operation and wrap any Redis exceptions with ."/> /// diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs index c6d302c50388..988575d25996 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs @@ -32,11 +32,58 @@ public QdrantVectorStoreRecordCollectionTests() this._qdrantClientMock = new Mock(MockBehavior.Strict); } + [Theory] + [InlineData(TestCollectionName, true)] + [InlineData("nonexistentcollection", false)] + public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) + { + // Arrange. + var sut = new QdrantVectorStoreRecordCollection>(this._qdrantClientMock.Object, collectionName); + + this._qdrantClientMock + .Setup(x => x.CollectionExistsAsync( + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(expectedExists); + + // Act. + var actual = await sut.CollectionExistsAsync(this._testCancellationToken); + + // Assert. + Assert.Equal(expectedExists, actual); + } + + [Fact] + public async Task CanDeleteCollectionAsync() + { + // Arrange. + var sut = new QdrantVectorStoreRecordCollection>(this._qdrantClientMock.Object, TestCollectionName); + + this._qdrantClientMock + .Setup(x => x.DeleteCollectionAsync( + It.IsAny(), + null, + this._testCancellationToken)) + .Returns(Task.CompletedTask); + + // Act. + await sut.DeleteCollectionAsync(this._testCancellationToken); + + // Assert. + this._qdrantClientMock + .Verify( + x => x.DeleteCollectionAsync( + TestCollectionName, + null, + this._testCancellationToken), + Times.Once); + } + [Theory] [MemberData(nameof(TestOptions))] public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool hasNamedVectors, TKey testRecordKey) { - var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); + var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); // Arrange. var retrievedPoint = CreateRetrievedPoint(hasNamedVectors, testRecordKey); @@ -72,7 +119,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool ha public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool hasNamedVectors, TKey testRecordKey) { // Arrange. - var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); + var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); var retrievedPoint = CreateRetrievedPoint(hasNamedVectors, testRecordKey); this.SetupRetrieveMock([retrievedPoint]); @@ -106,7 +153,7 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool hasNamedVectors, TKey[] testRecordKeys) { // Arrange. - var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); + var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); var retrievedPoint1 = CreateRetrievedPoint(hasNamedVectors, UlongTestRecordKey1); var retrievedPoint2 = CreateRetrievedPoint(hasNamedVectors, UlongTestRecordKey2); this.SetupRetrieveMock(testRecordKeys.Select(x => CreateRetrievedPoint(hasNamedVectors, x)).ToList()); @@ -193,7 +240,7 @@ public async Task CanGetRecordWithCustomMapperAsync() public async Task CanDeleteUlongRecordAsync(bool useDefinition, bool hasNamedVectors) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); + var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); this.SetupDeleteMocks(); // Act @@ -222,7 +269,7 @@ await sut.DeleteAsync( public async Task CanDeleteGuidRecordAsync(bool useDefinition, bool hasNamedVectors) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); + var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); this.SetupDeleteMocks(); // Act @@ -251,7 +298,7 @@ await sut.DeleteAsync( public async Task CanDeleteManyUlongRecordsAsync(bool useDefinition, bool hasNamedVectors) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); + var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); this.SetupDeleteMocks(); // Act @@ -280,7 +327,7 @@ await sut.DeleteBatchAsync( public async Task CanDeleteManyGuidRecordsAsync(bool useDefinition, bool hasNamedVectors) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); + var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); this.SetupDeleteMocks(); // Act @@ -306,7 +353,7 @@ await sut.DeleteBatchAsync( public async Task CanUpsertRecordAsync(bool useDefinition, bool hasNamedVectors, TKey testRecordKey) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); + var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); this.SetupUpsertMock(); // Act @@ -332,7 +379,7 @@ await sut.UpsertAsync( public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool hasNamedVectors, TKey[] testRecordKeys) { // Arrange - var sut = this.CreateVectorRecordStore(useDefinition, hasNamedVectors); + var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); this.SetupUpsertMock(); var models = testRecordKeys.Select(x => CreateModel(x, true)); @@ -512,7 +559,7 @@ private static RetrievedPoint CreateRetrievedPoint(bool hasNamedVectors, T return point; } - private IVectorStoreRecordCollection> CreateVectorRecordStore(bool useDefinition, bool hasNamedVectors) + private IVectorStoreRecordCollection> CreateRecordCollection(bool useDefinition, bool hasNamedVectors) { var store = new QdrantVectorStoreRecordCollection>( this._qdrantClientMock.Object, diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs index 983036d8075f..ee64128e9b4b 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs @@ -32,6 +32,58 @@ public RedisVectorStoreRecordCollectionTests() this._redisDatabaseMock.Setup(x => x.CreateBatch(It.IsAny())).Returns(batchMock.Object); } + [Theory] + [InlineData(TestCollectionName, true)] + [InlineData("nonexistentcollection", false)] + public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) + { + // Arrange + if (expectedExists) + { + SetupExecuteMock(this._redisDatabaseMock, ["index_name", collectionName]); + } + else + { + SetupExecuteMock(this._redisDatabaseMock, new RedisServerException("Unknown index name")); + } + var sut = new RedisVectorStoreRecordCollection( + this._redisDatabaseMock.Object, + collectionName); + + // Act + var actual = await sut.CollectionExistsAsync(); + + // Assert + var expectedArgs = new object[] { collectionName }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "FT.INFO", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + Assert.Equal(expectedExists, actual); + } + + [Fact] + public async Task CanDeleteCollectionAsync() + { + // Arrange + SetupExecuteMock(this._redisDatabaseMock, string.Empty); + var sut = this.CreateRecordCollection(false); + + // Act + await sut.DeleteCollectionAsync(); + + // Assert + var expectedArgs = new object[] { TestCollectionName }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "FT.DROPINDEX", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + } + [Theory] [InlineData(true)] [InlineData(false)] @@ -40,7 +92,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) // Arrange var redisResultString = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var actual = await sut.GetAsync( @@ -70,7 +122,7 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) // Arrange var redisResultString = """{ "Data": "data 1" }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var actual = await sut.GetAsync( @@ -101,7 +153,7 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) var redisResultString1 = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; var redisResultString2 = """{ "Data": "data 2", "Vector": [5, 6, 7, 8] }"""; SetupExecuteMock(this._redisDatabaseMock, [redisResultString1, redisResultString2]); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var actual = await sut.GetBatchAsync( @@ -176,7 +228,7 @@ public async Task CanDeleteRecordAsync(bool useDefinition) { // Arrange SetupExecuteMock(this._redisDatabaseMock, "200"); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act await sut.DeleteAsync(TestRecordKey1); @@ -198,7 +250,7 @@ public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange SetupExecuteMock(this._redisDatabaseMock, "200"); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act await sut.DeleteBatchAsync([TestRecordKey1, TestRecordKey2]); @@ -227,7 +279,7 @@ public async Task CanUpsertRecordAsync(bool useDefinition) { // Arrange SetupExecuteMock(this._redisDatabaseMock, "OK"); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); var model = CreateModel(TestRecordKey1, true); // Act @@ -251,7 +303,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) { // Arrange SetupExecuteMock(this._redisDatabaseMock, "OK"); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); var model1 = CreateModel(TestRecordKey1, true); var model2 = CreateModel(TestRecordKey2, true); @@ -310,7 +362,7 @@ public async Task CanUpsertRecordWithCustomMapperAsync() Times.Once); } - private RedisVectorStoreRecordCollection CreateVectorRecordStore(bool useDefinition) + private RedisVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) { return new RedisVectorStoreRecordCollection( this._redisDatabaseMock.Object, @@ -321,6 +373,16 @@ private RedisVectorStoreRecordCollection CreateVectorRecordSto }); } + private static void SetupExecuteMock(Mock redisDatabaseMock, Exception exception) + { + redisDatabaseMock + .Setup( + x => x.ExecuteAsync( + It.IsAny(), + It.IsAny())) + .ThrowsAsync(exception); + } + private static void SetupExecuteMock(Mock redisDatabaseMock, IEnumerable redisResultStrings) { var results = redisResultStrings diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs index db8989df733b..7772c7359202 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -24,6 +24,37 @@ public sealed class AzureAISearchVectorStoreRecordCollectionTests(ITestOutputHel // If null, all tests will be enabled private const string SkipReason = null; //"Requires Azure AI Search Service instance up and running"; + [Theory(Skip = SkipReason)] + [InlineData(true)] + [InlineData(false)] + public async Task CollectionExistsReturnsCollectionStateAsync(bool expectedExists) + { + // Arrange. + var collectionName = expectedExists ? fixture.TestIndexName : "nonexistentcollection"; + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, collectionName); + + // Act. + var actual = await sut.CollectionExistsAsync(); + + // Assert. + Assert.Equal(expectedExists, actual); + } + + [Fact(Skip = SkipReason)] + public async Task ItCanDeleteCollectionAsync() + { + // Arrange + var tempCollectionName = fixture.TestIndexName + "-delete"; + await AzureAISearchVectorStoreFixture.CreateIndexAsync(tempCollectionName, fixture.SearchIndexClient); + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, tempCollectionName); + + // Act + await sut.DeleteCollectionAsync(); + + // Assert + Assert.False(await sut.CollectionExistsAsync()); + } + [Theory(Skip = SkipReason)] [InlineData(true)] [InlineData(false)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs index d5b462284596..7249bdb0e93e 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs @@ -21,6 +21,39 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; [Collection("QdrantVectorStoreCollection")] public sealed class QdrantVectorStoreRecordCollectionTests(ITestOutputHelper output, QdrantVectorStoreFixture fixture) { + [Theory] + [InlineData("singleVectorHotels", true)] + [InlineData("nonexistentcollection", false)] + public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) + { + // Arrange. + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName); + + // Act. + var actual = await sut.CollectionExistsAsync(); + + // Assert. + Assert.Equal(expectedExists, actual); + } + + [Fact] + public async Task ItCanDeleteCollectionAsync() + { + // Arrange + var tempCollectionName = "temp-test"; + await fixture.QdrantClient.CreateCollectionAsync( + tempCollectionName, + new VectorParams { Size = 4, Distance = Distance.Cosine }); + + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, tempCollectionName); + + // Act + await sut.DeleteCollectionAsync(); + + // Assert + Assert.False(await sut.CollectionExistsAsync()); + } + [Theory] [InlineData(true, "singleVectorHotels", false)] [InlineData(false, "singleVectorHotels", false)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs index 3957a4387d9f..7eb0eb7454be 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs @@ -6,6 +6,8 @@ using System.Threading.Tasks; using Microsoft.SemanticKernel.Connectors.Redis; using Microsoft.SemanticKernel.Data; +using NRedisStack.RedisStackCommands; +using NRedisStack.Search; using Xunit; using Xunit.Abstractions; using static SemanticKernel.IntegrationTests.Connectors.Memory.Redis.RedisVectorStoreFixture; @@ -20,6 +22,41 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.Redis; [Collection("RedisVectorStoreCollection")] public sealed class RedisVectorStoreRecordCollectionTests(ITestOutputHelper output, RedisVectorStoreFixture fixture) { + [Theory] + [InlineData("hotels", true)] + [InlineData("nonexistentcollection", false)] + public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) + { + // Arrange. + var sut = new RedisVectorStoreRecordCollection(fixture.Database, collectionName); + + // Act. + var actual = await sut.CollectionExistsAsync(); + + // Assert. + Assert.Equal(expectedExists, actual); + } + + [Fact] + public async Task ItCanDeleteCollectionAsync() + { + // Arrange + var tempCollectionName = "temp-test"; + var schema = new Schema(); + schema.AddTextField("HotelName"); + var createParams = new FTCreateParams(); + createParams.AddPrefix(tempCollectionName); + await fixture.Database.FT().CreateAsync(tempCollectionName, createParams, schema); + + var sut = new RedisVectorStoreRecordCollection(fixture.Database, tempCollectionName); + + // Act + await sut.DeleteCollectionAsync(); + + // Assert + Assert.False(await sut.CollectionExistsAsync()); + } + [Theory] [InlineData(true)] [InlineData(false)] diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs index 4c0bb8d3fa73..78f588e53c7e 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs @@ -18,6 +18,25 @@ public interface IVectorStoreRecordCollection #pragma warning restore CA1711 // Identifiers should not have incorrect suffix where TRecord : class { + /// + /// Gets the name of the collection. + /// + public string CollectionName { get; } + + /// + /// Check if the collection exists in the vector store. + /// + /// The to monitor for cancellation requests. The default is . + /// if the collection exists, otherwise. + Task CollectionExistsAsync(CancellationToken cancellationToken = default); + + /// + /// Delete the collection from the vector store. + /// + /// The to monitor for cancellation requests. The default is . + /// A task that completes when the collection has been deleted. + Task DeleteCollectionAsync(CancellationToken cancellationToken = default); + /// /// Gets a record from the vector store. Does not guarantee that the collection exists. /// Returns null if the record is not found. diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs index fcb5398a3a5f..ae940067fd1b 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs @@ -82,6 +82,22 @@ internal VolatileVectorStoreRecordCollection(ConcurrentDictionary + public string CollectionName => this._collectionName; + + /// + public Task CollectionExistsAsync(CancellationToken cancellationToken = default) + { + return this._internalCollection.ContainsKey(this._collectionName) ? Task.FromResult(true) : Task.FromResult(false); + } + + /// + public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) + { + this._internalCollection.TryRemove(this._collectionName, out _); + return Task.CompletedTask; + } + /// public Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) { diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs index 00e71179efa6..47fa2e5355df 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs @@ -28,6 +28,42 @@ public VolatileVectorStoreRecordCollectionTests() this._collectionStore = new(); } + [Theory] + [InlineData(TestCollectionName, true)] + [InlineData("nonexistentcollection", false)] + public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) + { + // Arrange + var collection = new ConcurrentDictionary(); + this._collectionStore.TryAdd(TestCollectionName, collection); + + var sut = new VolatileVectorStoreRecordCollection( + this._collectionStore, + collectionName); + + // Act + var actual = await sut.CollectionExistsAsync(this._testCancellationToken); + + // Assert + Assert.Equal(expectedExists, actual); + } + + [Fact] + public async Task DeleteCollectionRemovesCollectionFromDictionaryAsync() + { + // Arrange + var collection = new ConcurrentDictionary(); + this._collectionStore.TryAdd(TestCollectionName, collection); + + var sut = this.CreateRecordCollection(false); + + // Act + await sut.DeleteCollectionAsync(this._testCancellationToken); + + // Assert + Assert.Empty(this._collectionStore); + } + [Theory] [InlineData(true)] [InlineData(false)] @@ -39,7 +75,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) collection.TryAdd(TestRecordKey1, record); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var actual = await sut.GetAsync( @@ -72,7 +108,7 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) collection.TryAdd(TestRecordKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var actual = await sut.GetBatchAsync( @@ -105,7 +141,7 @@ public async Task CanDeleteRecordAsync(bool useDefinition) collection.TryAdd(TestRecordKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act await sut.DeleteAsync( @@ -130,7 +166,7 @@ public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) collection.TryAdd(TestRecordKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act await sut.DeleteBatchAsync( @@ -152,7 +188,7 @@ public async Task CanUpsertRecordAsync(bool useDefinition) var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var upsertResult = await sut.UpsertAsync( @@ -177,7 +213,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateVectorRecordStore(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var actual = await sut.UpsertBatchAsync( @@ -205,7 +241,7 @@ private static SinglePropsModel CreateModel(string key, bool withVectors) }; } - private VolatileVectorStoreRecordCollection CreateVectorRecordStore(bool useDefinition) + private VolatileVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) { return new VolatileVectorStoreRecordCollection( this._collectionStore, From eb54ca25ea3d0d52b59fd0ab9751e96b12cb30f9 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Tue, 16 Jul 2024 18:25:30 +0100 Subject: [PATCH 18/48] .Net: Adding Azure AI Search CollectionCreate (#7287) ### Motivation and Context As part of the memory connector redesign we have fixed on a design where we have a VectorStore that produces VectorStoreRecordCollection instances. These are tied to a collection and will expose single collection operations. ### Description This PR adds: - The ability to create Azure AI Search collections - Additional properties to the attributes and record definitions to support vector configuration and index creation. - Code to parse and map those new properties I'll update the IVectorStoreRecordCollection interface with the CreateCollection and CreateCollectionIfNotExists methods once I've added create to more implementations, to avoid having one very large pr. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...VectorStoreCollectionCreateMappingTests.cs | 181 ++++++++++++++++++ ...ISearchVectorStoreRecordCollectionTests.cs | 81 +++++++- ...earchVectorStoreCollectionCreateMapping.cs | 172 +++++++++++++++++ ...zureAISearchVectorStoreRecordCollection.cs | 56 ++++++ .../AzureAISearchVectorStoreFixture.cs | 16 +- ...ISearchVectorStoreRecordCollectionTests.cs | 27 +++ .../Data/VectorStoreRecordPropertyReader.cs | 5 + .../VectorStoreRecordDataAttribute.cs | 5 + .../VectorStoreRecordVectorAttribute.cs | 46 +++++ .../Data/RecordDefinition/DistanceFunction.cs | 53 +++++ .../Data/RecordDefinition/IndexKind.cs | 28 +++ .../VectorStoreRecordDataProperty.cs | 11 ++ .../VectorStoreRecordVectorProperty.cs | 17 ++ .../VectorStoreRecordPropertyReaderTests.cs | 18 +- 14 files changed, 701 insertions(+), 15 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs create mode 100644 dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs new file mode 100644 index 000000000000..da7733720551 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs @@ -0,0 +1,181 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using Azure.Search.Documents.Indexes.Models; +using Microsoft.SemanticKernel.Connectors.AzureAISearch; +using Microsoft.SemanticKernel.Data; +using Xunit; + +namespace SemanticKernel.Connectors.AzureAISearch.UnitTests; + +/// +/// Contains tests for the class. +/// +public class AzureAISearchVectorStoreCollectionCreateMappingTests +{ + [Fact] + public void MapKeyFieldCreatesSearchableField() + { + // Arrange + var keyProperty = new VectorStoreRecordKeyProperty("testkey"); + + // Act + var result = AzureAISearchVectorStoreCollectionCreateMapping.MapKeyField(keyProperty); + + // Assert + Assert.NotNull(result); + Assert.Equal(keyProperty.PropertyName, result.Name); + Assert.True(result.IsKey); + Assert.True(result.IsFilterable); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void MapStringDataFieldCreatesSearchableField(bool isFilterable) + { + // Arrange + var dataProperty = new VectorStoreRecordDataProperty("testdata") { IsFilterable = isFilterable, PropertyType = typeof(string) }; + + // Act + var result = AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty); + + // Assert + Assert.NotNull(result); + Assert.IsType(result); + Assert.Equal(dataProperty.PropertyName, result.Name); + Assert.False(result.IsKey); + Assert.Equal(isFilterable, result.IsFilterable); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void MapDataFieldCreatesSimpleField(bool isFilterable) + { + // Arrange + var dataProperty = new VectorStoreRecordDataProperty("testdata") { IsFilterable = isFilterable, PropertyType = typeof(int) }; + + // Act + var result = AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty); + + // Assert + Assert.NotNull(result); + Assert.IsType(result); + Assert.Equal(dataProperty.PropertyName, result.Name); + Assert.Equal(SearchFieldDataType.Int32, result.Type); + Assert.False(result.IsKey); + Assert.Equal(isFilterable, result.IsFilterable); + } + + [Fact] + public void MapDataFieldFailsForNullType() + { + // Arrange + var dataProperty = new VectorStoreRecordDataProperty("testdata"); + + // Act & Assert + Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty)); + } + + [Fact] + public void MapVectorFieldCreatesVectorSearchField() + { + // Arrange + var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.DotProductSimilarity }; + + // Act + var (vectorSearchField, algorithmConfiguration, vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty); + + // Assert + Assert.NotNull(vectorSearchField); + Assert.NotNull(algorithmConfiguration); + Assert.NotNull(vectorSearchProfile); + Assert.Equal(vectorProperty.PropertyName, vectorSearchField.Name); + Assert.Equal(vectorProperty.Dimensions, vectorSearchField.VectorSearchDimensions); + + Assert.Equal("testvectorAlgoConfig", algorithmConfiguration.Name); + Assert.IsType(algorithmConfiguration); + var flatConfig = algorithmConfiguration as ExhaustiveKnnAlgorithmConfiguration; + Assert.Equal(VectorSearchAlgorithmMetric.DotProduct, flatConfig!.Parameters.Metric); + + Assert.Equal("testvectorProfile", vectorSearchProfile.Name); + Assert.Equal("testvectorAlgoConfig", vectorSearchProfile.AlgorithmConfigurationName); + } + + [Theory] + [InlineData(IndexKind.Hnsw, typeof(HnswAlgorithmConfiguration))] + [InlineData(IndexKind.Flat, typeof(ExhaustiveKnnAlgorithmConfiguration))] + public void MapVectorFieldCreatesExpectedAlgoConfigTypes(string indexKind, Type algoConfigType) + { + // Arrange + var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10, IndexKind = indexKind, DistanceFunction = DistanceFunction.DotProductSimilarity }; + + // Act + var (vectorSearchField, algorithmConfiguration, vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty); + + // Assert + Assert.Equal("testvectorAlgoConfig", algorithmConfiguration.Name); + Assert.Equal(algoConfigType, algorithmConfiguration.GetType()); + } + + [Fact] + public void MapVectorFieldDefaultsToHsnwAndCosine() + { + // Arrange + var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10 }; + + // Act + var (vectorSearchField, algorithmConfiguration, vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty); + + // Assert + Assert.IsType(algorithmConfiguration); + var hnswConfig = algorithmConfiguration as HnswAlgorithmConfiguration; + Assert.Equal(VectorSearchAlgorithmMetric.Cosine, hnswConfig!.Parameters.Metric); + } + + [Fact] + public void MapVectorFieldThrowsForUnsupportedDistanceFunction() + { + // Arrange + var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10, DistanceFunction = DistanceFunction.ManhattanDistance }; + + // Act + Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty)); + } + + [Fact] + public void MapVectorFieldThrowsForMissingDimensionsCount() + { + // Arrange + var vectorProperty = new VectorStoreRecordVectorProperty("testvector"); + + // Act + Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty)); + } + + [Theory] + [MemberData(nameof(DataTypeMappingOptions))] + public void GetSDKFieldDataTypeMapsTypesCorrectly(Type propertyType, SearchFieldDataType searchFieldDataType) + { + // Act & Assert + Assert.Equal(searchFieldDataType, AzureAISearchVectorStoreCollectionCreateMapping.GetSDKFieldDataType(propertyType)); + } + + public static IEnumerable DataTypeMappingOptions() + { + yield return new object[] { typeof(string), SearchFieldDataType.String }; + yield return new object[] { typeof(bool), SearchFieldDataType.Boolean }; + yield return new object[] { typeof(int), SearchFieldDataType.Int32 }; + yield return new object[] { typeof(long), SearchFieldDataType.Int64 }; + yield return new object[] { typeof(float), SearchFieldDataType.Double }; + yield return new object[] { typeof(double), SearchFieldDataType.Double }; + yield return new object[] { typeof(DateTime), SearchFieldDataType.DateTimeOffset }; + yield return new object[] { typeof(DateTimeOffset), SearchFieldDataType.DateTimeOffset }; + + yield return new object[] { typeof(string[]), SearchFieldDataType.Collection(SearchFieldDataType.String) }; + yield return new object[] { typeof(IEnumerable), SearchFieldDataType.Collection(SearchFieldDataType.String) }; + yield return new object[] { typeof(List), SearchFieldDataType.Collection(SearchFieldDataType.String) }; + } +} diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs index bb5328200c46..ad894f829e43 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -70,6 +70,81 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN Assert.Equal(expectedExists, actual); } + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task CreateCollectionCallsSDKAsync(bool useDefinition) + { + // Arrange. + this._searchIndexClientMock + .Setup(x => x.CreateIndexAsync(It.IsAny(), this._testCancellationToken)) + .ReturnsAsync(Response.FromValue(new SearchIndex(TestCollectionName), Mock.Of())); + + var sut = this.CreateRecordCollection(useDefinition); + + // Act. + await sut.CreateCollectionAsync(); + + // Assert. + this._searchIndexClientMock + .Verify( + x => x.CreateIndexAsync( + It.Is(si => si.Fields.Count == 3 && si.Name == TestCollectionName && si.VectorSearch.Profiles.Count == 1 && si.VectorSearch.Algorithms.Count == 1), + this._testCancellationToken), + Times.Once); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CreateCollectionIfNotExistsSDKAsync(bool useDefinition, bool expectedExists) + { + // Arrange. + if (expectedExists) + { + this._searchIndexClientMock + .Setup(x => x.GetIndexAsync(TestCollectionName, this._testCancellationToken)) + .Returns(Task.FromResult?>(null)); + } + else + { + this._searchIndexClientMock + .Setup(x => x.GetIndexAsync(TestCollectionName, this._testCancellationToken)) + .ThrowsAsync(new RequestFailedException(404, "Index not found")); + } + + this._searchIndexClientMock + .Setup(x => x.CreateIndexAsync(It.IsAny(), this._testCancellationToken)) + .ReturnsAsync(Response.FromValue(new SearchIndex(TestCollectionName), Mock.Of())); + + var sut = this.CreateRecordCollection(useDefinition); + + // Act. + await sut.CreateCollectionIfNotExistsAsync(); + + // Assert. + if (expectedExists) + { + this._searchIndexClientMock + .Verify( + x => x.CreateIndexAsync( + It.IsAny(), + this._testCancellationToken), + Times.Never); + } + else + { + this._searchIndexClientMock + .Verify( + x => x.CreateIndexAsync( + It.Is(si => si.Fields.Count == 3 && si.Name == TestCollectionName && si.VectorSearch.Profiles.Count == 1 && si.VectorSearch.Algorithms.Count == 1), + this._testCancellationToken), + Times.Once); + } + } + [Fact] public async Task CanDeleteCollectionAsync() { @@ -461,8 +536,8 @@ private static SinglePropsModel CreateModel(string key, bool withVectors) Properties = [ new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data"), - new VectorStoreRecordVectorProperty("Vector") + new VectorStoreRecordDataProperty("Data") { PropertyType = typeof(string) }, + new VectorStoreRecordVectorProperty("Vector") { Dimensions = 4 } ] }; @@ -474,7 +549,7 @@ public sealed class SinglePropsModel [VectorStoreRecordData] public string Data { get; set; } = string.Empty; - [VectorStoreRecordVector] + [VectorStoreRecordVector(4)] public ReadOnlyMemory? Vector { get; set; } public string? NotAnnotated { get; set; } diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs new file mode 100644 index 000000000000..2d8cab78ccd1 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs @@ -0,0 +1,172 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using Azure.Search.Documents.Indexes.Models; +using Microsoft.SemanticKernel.Data; + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// Contains mapping helpers to use when creating a Azure AI Search vector collection. +/// +internal static class AzureAISearchVectorStoreCollectionCreateMapping +{ + /// + /// Map from a to an Azure AI Search . + /// + /// The key property definition. + /// The for the provided property definition. + public static SearchableField MapKeyField(VectorStoreRecordKeyProperty keyProperty) + { + return new SearchableField(keyProperty.PropertyName) { IsKey = true, IsFilterable = true }; + } + + /// + /// Map from a to an Azure AI Search . + /// + /// The data property definition. + /// The for the provided property definition. + /// Throws when the definition is missing required information. + public static SimpleField MapDataField(VectorStoreRecordDataProperty dataProperty) + { + if (dataProperty.PropertyType == typeof(string)) + { + return new SearchableField(dataProperty.PropertyName) { IsFilterable = dataProperty.IsFilterable }; + } + + if (dataProperty.PropertyType is null) + { + throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.PropertyName}' must be set to create a collection."); + } + + return new SimpleField(dataProperty.PropertyName, AzureAISearchVectorStoreCollectionCreateMapping.GetSDKFieldDataType(dataProperty.PropertyType)) { IsFilterable = dataProperty.IsFilterable }; + } + + /// + /// Map form a to an Azure AI Search and generate the required index configuration. + /// + /// The vector property definition. + /// The and required index configuration. + /// Throws when the definition is missing required information, or unsupported options are configured. + public static (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfiguration algorithmConfiguration, VectorSearchProfile vectorSearchProfile) MapVectorField(VectorStoreRecordVectorProperty vectorProperty) + { + if (vectorProperty.Dimensions is not > 0) + { + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive ingeteger to create a collection."); + } + + // Build a name for the profile and algorithm configuration based on the property name + // since we'll just create a separate one for each vector property. + var vectorSearchProfileName = $"{vectorProperty.PropertyName}Profile"; + var algorithmConfigName = $"{vectorProperty.PropertyName}AlgoConfig"; + + // Read the vector index settings from the property definition and create the right index configuration. + var indexKind = AzureAISearchVectorStoreCollectionCreateMapping.GetSKIndexKind(vectorProperty); + var algorithmMetric = AzureAISearchVectorStoreCollectionCreateMapping.GetSDKDistanceAlgorithm(vectorProperty); + + VectorSearchAlgorithmConfiguration algorithmConfiguration = indexKind switch + { + IndexKind.Hnsw => new HnswAlgorithmConfiguration(algorithmConfigName) { Parameters = new HnswParameters { Metric = algorithmMetric } }, + IndexKind.Flat => new ExhaustiveKnnAlgorithmConfiguration(algorithmConfigName) { Parameters = new ExhaustiveKnnParameters { Metric = algorithmMetric } }, + _ => throw new InvalidOperationException($"Unsupported index kind '{indexKind}' on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + }; + var vectorSearchProfile = new VectorSearchProfile(vectorSearchProfileName, algorithmConfigName); + + return (new VectorSearchField(vectorProperty.PropertyName, vectorProperty.Dimensions.Value, vectorSearchProfileName), algorithmConfiguration, vectorSearchProfile); + } + + /// + /// Get the configured from the given . + /// If none is configured the default is . + /// + /// The vector property definition. + /// The configured or default . + public static string GetSKIndexKind(VectorStoreRecordVectorProperty vectorProperty) + { + if (vectorProperty.IndexKind is null) + { + return IndexKind.Hnsw; + } + + return vectorProperty.IndexKind; + } + + /// + /// Get the configured from the given . + /// If none is configured, the default is . + /// + /// The vector property definition. + /// The chosen . + /// Thrown if a distance function is chosen that isn't supported by Azure AI Search. + public static VectorSearchAlgorithmMetric GetSDKDistanceAlgorithm(VectorStoreRecordVectorProperty vectorProperty) + { + if (vectorProperty.DistanceFunction is null) + { + return VectorSearchAlgorithmMetric.Cosine; + } + + return vectorProperty.DistanceFunction switch + { + DistanceFunction.CosineSimilarity => VectorSearchAlgorithmMetric.Cosine, + DistanceFunction.DotProductSimilarity => VectorSearchAlgorithmMetric.DotProduct, + DistanceFunction.EuclideanDistance => VectorSearchAlgorithmMetric.Euclidean, + _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + }; + } + + /// + /// Maps the given property type to the corresponding . + /// + /// The property type to map. + /// The that corresponds to the given property type." + /// Thrown if the given type is not supported. + public static SearchFieldDataType GetSDKFieldDataType(Type propertyType) + { + return propertyType switch + { + Type stringType when stringType == typeof(string) => SearchFieldDataType.String, + Type boolType when boolType == typeof(bool) || boolType == typeof(bool?) => SearchFieldDataType.Boolean, + Type intType when intType == typeof(int) || intType == typeof(int?) => SearchFieldDataType.Int32, + Type longType when longType == typeof(long) || longType == typeof(long?) => SearchFieldDataType.Int64, + Type floatType when floatType == typeof(float) || floatType == typeof(float?) => SearchFieldDataType.Double, + Type doubleType when doubleType == typeof(double) || doubleType == typeof(double?) => SearchFieldDataType.Double, + Type dateTimeType when dateTimeType == typeof(DateTime) || dateTimeType == typeof(DateTime?) => SearchFieldDataType.DateTimeOffset, + Type dateTimeOffsetType when dateTimeOffsetType == typeof(DateTimeOffset) || dateTimeOffsetType == typeof(DateTimeOffset?) => SearchFieldDataType.DateTimeOffset, + Type collectionType when typeof(IEnumerable).IsAssignableFrom(collectionType) => SearchFieldDataType.Collection(GetSDKFieldDataType(GetEnumerableType(propertyType))), + _ => throw new InvalidOperationException($"Unsupported data type '{propertyType}' for {nameof(VectorStoreRecordDataProperty)}.") + }; + } + + /// + /// Gets the type of object stored in the given enumerable type. + /// + /// The enumerable to get the stored type for. + /// The type of object stored in the given enumerable type. + /// Thrown when the given type is not enumerable. + public static Type GetEnumerableType(Type type) + { + if (type is IEnumerable) + { + return typeof(object); + } + else if (type.IsArray) + { + return type.GetElementType()!; + } + + if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(IEnumerable<>)) + { + return type.GetGenericArguments()[0]; + } + + if (type.GetInterfaces().FirstOrDefault(i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(IEnumerable<>)) is Type enumerableInterface) + { + return enumerableInterface.GetGenericArguments()[0]; + } + + throw new InvalidOperationException($"Unsupported data type '{type}' for {nameof(VectorStoreRecordDataProperty)}."); + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index 7cb58de20041..4b80c814f752 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -12,6 +12,7 @@ using Azure; using Azure.Search.Documents; using Azure.Search.Documents.Indexes; +using Azure.Search.Documents.Indexes.Models; using Azure.Search.Documents.Models; using Microsoft.SemanticKernel.Data; @@ -77,6 +78,9 @@ public sealed class AzureAISearchVectorStoreRecordCollection : IVectorS /// Optional configuration options for this class. private readonly AzureAISearchVectorStoreRecordCollectionOptions _options; + /// A definition of the current storage model. + private readonly VectorStoreRecordDefinition _vectorStoreRecordDefinition; + /// The name of the key field for the collections that this class is used with. private readonly string _keyPropertyName; @@ -102,6 +106,7 @@ public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexCli this._collectionName = collectionName; this._options = options ?? new AzureAISearchVectorStoreRecordCollectionOptions(); this._searchClient = this._searchIndexClient.GetSearchClient(collectionName); + this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); // Verify custom mapper. if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper && this._options.JsonObjectCustomMapper is null) @@ -157,6 +162,57 @@ public async Task CollectionExistsAsync(CancellationToken cancellationToke } } + /// + public Task CreateCollectionAsync(CancellationToken cancellationToken = default) + { + var vectorSearchConfig = new VectorSearch(); + var searchFields = new List(); + + // Loop through all properties and create the search fields. + foreach (var property in this._vectorStoreRecordDefinition.Properties) + { + // Key property. + if (property is VectorStoreRecordKeyProperty keyProperty) + { + searchFields.Add(AzureAISearchVectorStoreCollectionCreateMapping.MapKeyField(keyProperty)); + } + + // Data property. + if (property is VectorStoreRecordDataProperty dataProperty) + { + searchFields.Add(AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty)); + } + + // Vector property. + if (property is VectorStoreRecordVectorProperty vectorProperty) + { + (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfiguration algorithmConfiguration, VectorSearchProfile vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty); + + // Add the search field, plus its profile and algorithm configuration to the search config. + searchFields.Add(vectorSearchField); + vectorSearchConfig.Algorithms.Add(algorithmConfiguration); + vectorSearchConfig.Profiles.Add(vectorSearchProfile); + } + } + + // Create the index. + var searchIndex = new SearchIndex(this._collectionName, searchFields); + searchIndex.VectorSearch = vectorSearchConfig; + + return this.RunOperationAsync( + "CreateIndex", + () => this._searchIndexClient.CreateIndexAsync(searchIndex, cancellationToken)); + } + + /// + public async Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default) + { + if (!await this.CollectionExistsAsync(cancellationToken).ConfigureAwait(false)) + { + await this.CreateCollectionAsync(cancellationToken).ConfigureAwait(false); + } + } + /// public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) { diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs index 6ac1d80e262c..e91943b1d47a 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs @@ -54,13 +54,13 @@ public AzureAISearchVectorStoreFixture() Properties = new List { new VectorStoreRecordKeyProperty("HotelId"), - new VectorStoreRecordDataProperty("HotelName"), - new VectorStoreRecordDataProperty("Description"), - new VectorStoreRecordVectorProperty("DescriptionEmbedding"), - new VectorStoreRecordDataProperty("Tags"), - new VectorStoreRecordDataProperty("ParkingIncluded"), - new VectorStoreRecordDataProperty("LastRenovationDate"), - new VectorStoreRecordDataProperty("Rating") + new VectorStoreRecordDataProperty("HotelName") { PropertyType = typeof(string) }, + new VectorStoreRecordDataProperty("Description") { PropertyType = typeof(string) }, + new VectorStoreRecordVectorProperty("DescriptionEmbedding") { Dimensions = 4 }, + new VectorStoreRecordDataProperty("Tags") { PropertyType = typeof(string[]) }, + new VectorStoreRecordDataProperty("ParkingIncluded") { PropertyType = typeof(bool?) }, + new VectorStoreRecordDataProperty("LastRenovationDate") { PropertyType = typeof(DateTimeOffset?) }, + new VectorStoreRecordDataProperty("Rating") { PropertyType = typeof(float?) } } }; } @@ -219,7 +219,7 @@ public class Hotel [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] public string Description { get; set; } - [VectorStoreRecordVector] + [VectorStoreRecordVector(4)] public ReadOnlyMemory? DescriptionEmbedding { get; set; } [SearchableField(IsFilterable = true, IsFacetable = true)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs index 7772c7359202..c18aef3c4653 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -40,6 +40,33 @@ public async Task CollectionExistsReturnsCollectionStateAsync(bool expectedExist Assert.Equal(expectedExists, actual); } + [Theory(Skip = SkipReason)] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanCreateACollectionAsync(bool useRecordDefinition) + { + // Arrange + var testCollectionName = $"{fixture.TestIndexName}-createtest"; + var options = new AzureAISearchVectorStoreRecordCollectionOptions + { + VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null + }; + var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, testCollectionName, options); + + await sut.DeleteCollectionAsync(); + + // Act + await sut.CreateCollectionAsync(); + + // Assert + var existResult = await sut.CollectionExistsAsync(); + Assert.True(existResult); + await sut.DeleteCollectionAsync(); + + // Output + output.WriteLine(existResult.ToString()); + } + [Fact(Skip = SkipReason)] public async Task ItCanDeleteCollectionAsync() { diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index 883c2de9ea9c..19dfe2b882d1 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -219,6 +219,8 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT { HasEmbedding = dataAttribute.HasEmbedding, EmbeddingPropertyName = dataAttribute.EmbeddingPropertyName, + IsFilterable = dataAttribute.IsFilterable, + PropertyType = dataProperty.PropertyType, StoragePropertyName = dataAttribute.StoragePropertyName }); } @@ -232,6 +234,9 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT { definitionProperties.Add(new VectorStoreRecordVectorProperty(vectorProperty.Name) { + Dimensions = vectorAttribute.Dimensions, + IndexKind = vectorAttribute.IndexKind, + DistanceFunction = vectorAttribute.DistanceFunction, StoragePropertyName = vectorAttribute.StoragePropertyName }); } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs index 9df2365c78e0..61a74443b6a2 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs @@ -23,6 +23,11 @@ public sealed class VectorStoreRecordDataAttribute : Attribute /// public string? EmbeddingPropertyName { get; init; } + /// + /// Gets or sets a value indicating whether this data property is filterable. + /// + public bool IsFilterable { get; init; } + /// /// Gets or sets an optional name to use for the property in storage, if different from the property name. /// E.g. the property name might be "MyProperty" but the storage name might be "my_property". diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs index b7f059173c20..f5b190417c1b 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs @@ -12,6 +12,52 @@ namespace Microsoft.SemanticKernel.Data; [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] public sealed class VectorStoreRecordVectorAttribute : Attribute { + /// + /// Initializes a new instance of the class. + /// + public VectorStoreRecordVectorAttribute() + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The number of dimensions that the vector has. + public VectorStoreRecordVectorAttribute(int Dimensions) + { + this.Dimensions = Dimensions; + } + + /// + /// Initializes a new instance of the class. + /// + /// The number of dimensions that the vector has. + /// The kind of index to use. + /// The distance function to use when comparing vectors. + public VectorStoreRecordVectorAttribute(int Dimensions, string IndexKind, string DistanceFunction) + { + this.Dimensions = Dimensions; + this.IndexKind = IndexKind; + this.DistanceFunction = DistanceFunction; + } + + /// + /// Gets or sets the number of dimensions that the vector has. + /// + public int? Dimensions { get; private set; } + + /// + /// Gets the kind of index to use. + /// + /// + public string? IndexKind { get; private set; } + + /// + /// Gets the distance function to use when comparing vectors. + /// + /// + public string? DistanceFunction { get; private set; } + /// /// Gets or sets an optional name to use for the property in storage, if different from the property name. /// E.g. the property name might be "MyProperty" but the storage name might be "my_property". diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs new file mode 100644 index 000000000000..1d87f9d2a7f2 --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Data; + +/// +/// Defines the distance functions that can be used to compare vectors. +/// +public static class DistanceFunction +{ + /// + /// The cosine (angular) similarty between two vectors. + /// + /// + /// Measures only the angle between the two vectors, without taking into account the length of the vectors. + /// ConsineSimilarity = 1 - CosineDistance. + /// -1 means vectors are opposite. + /// 0 means vectors are orthogonal. + /// 1 means vectors are identical. + /// + public const string CosineSimilarity = nameof(CosineSimilarity); + + /// + /// The cosine (angular) similarty between two vectors. + /// + /// + /// CosineDistance = 1 - CosineSimilarity. + /// 2 means vectors are opposite. + /// 1 means vectors are orthogonal. + /// 0 means vectors are identical. + /// + public const string CosineDistance = nameof(CosineDistance); + + /// + /// Measures both the length and angle between two vectors. + /// + /// + /// Same as cosine similarity if the vectors are the same length, but more performant. + /// + public const string DotProductSimilarity = nameof(DotProductSimilarity); + + /// + /// Measures the Euclidean distance between two vectors. + /// + /// + /// Also known as l2-norm. + /// + public const string EuclideanDistance = nameof(EuclideanDistance); + + /// + /// Measures the Manhattan distance between two vectors. + /// + public const string ManhattanDistance = nameof(ManhattanDistance); +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs new file mode 100644 index 000000000000..02451513b9ea --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Data; + +/// +/// Defines the index types that can be used to index vectors. +/// +public static class IndexKind +{ + /// + /// Hierarchical Navigable Small World, which performs an approximate nearest neighbour (ANN) search. + /// + /// + /// Lower accuracy than exhaustive k nearest neighbor, but faster and more efficient. + /// + public const string Hnsw = nameof(Hnsw); + + /// + /// Does a brute force search to find the nearest neighbors. + /// Calculates the distances between all pairs of data points, so has a linear time complexity, that grows directly proportional to the number of points. + /// Also referred to as exhaustive k nearest neighbor in some databases. + /// + /// + /// High recall accuracy, but slower and more expensive than HNSW. + /// Better with smaller datasets. + /// + public const string Flat = nameof(Flat); +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs index d7ac00b5ad4c..eca2c72ced59 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.Diagnostics.CodeAnalysis; namespace Microsoft.SemanticKernel.Data; @@ -40,4 +41,14 @@ public VectorStoreRecordDataProperty(VectorStoreRecordDataProperty source) /// Gets or sets the name of the property that contains the embedding for this data property. /// public string? EmbeddingPropertyName { get; init; } + + /// + /// Gets or sets a value indicating whether this data property is filterable. + /// + public bool IsFilterable { get; init; } + + /// + /// Gets or sets the type of the data property. + /// + public Type? PropertyType { get; init; } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs index f4c90ef319cc..9b973f9d3c3e 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs @@ -27,4 +27,21 @@ public VectorStoreRecordVectorProperty(VectorStoreRecordVectorProperty source) : base(source.PropertyName) { } + + /// + /// Gets or sets the number of dimensions that the vector has. + /// + public int? Dimensions { get; init; } + + /// + /// Gets the kind of index to use. + /// + /// + public string? IndexKind { get; init; } + + /// + /// Gets the distance function to use when comparing vectors. + /// + /// + public string? DistanceFunction { get; init; } } diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 3fc33bb5d935..b804f35a442d 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -165,10 +165,20 @@ public void CreateVectorStoreRecordDefinitionFromTypeConvertsAllProps() var data1 = (VectorStoreRecordDataProperty)definition.Properties[1]; var data2 = (VectorStoreRecordDataProperty)definition.Properties[2]; + Assert.True(data1.IsFilterable); + Assert.False(data2.IsFilterable); + Assert.True(data1.HasEmbedding); Assert.False(data2.HasEmbedding); Assert.Equal("Vector1", data1.EmbeddingPropertyName); + + Assert.Equal(typeof(string), data1.PropertyType); + Assert.Equal(typeof(string), data2.PropertyType); + + var vector1 = (VectorStoreRecordVectorProperty)definition.Properties[3]; + + Assert.Equal(4, vector1.Dimensions); } [Fact] @@ -323,13 +333,13 @@ private sealed class MultiPropsModel [VectorStoreRecordKey] public string Key { get; set; } = string.Empty; - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1")] + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1", IsFilterable = true)] public string Data1 { get; set; } = string.Empty; [VectorStoreRecordData] public string Data2 { get; set; } = string.Empty; - [VectorStoreRecordVector] + [VectorStoreRecordVector(4, IndexKind.Flat, DistanceFunction.DotProductSimilarity)] public ReadOnlyMemory Vector1 { get; set; } [VectorStoreRecordVector] @@ -344,9 +354,9 @@ private sealed class MultiPropsModel Properties = [ new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data1") { HasEmbedding = true, EmbeddingPropertyName = "Vector1" }, + new VectorStoreRecordDataProperty("Data1") { HasEmbedding = true, EmbeddingPropertyName = "Vector1", IsFilterable = true }, new VectorStoreRecordDataProperty("Data2") { StoragePropertyName = "data_2" }, - new VectorStoreRecordVectorProperty("Vector1"), + new VectorStoreRecordVectorProperty("Vector1") { Dimensions = 4, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.DotProductSimilarity }, new VectorStoreRecordVectorProperty("Vector2") ] }; From c2792704a35c7c2b57e019e82584fcf71bb4e20a Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 17 Jul 2024 14:34:48 +0100 Subject: [PATCH 19/48] .Net: Add redis collection create support. (#7315) ### Motivation and Context As part of the memory connector redesign we have fixed on a design where we have a VectorStore that produces VectorStoreRecordCollection instances. These are tied to a collection and will expose single collection operations. ### Description This PR contains: - The ability to create Redis collections ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...RedisVectorStoreCollectionCreateMapping.cs | 152 ++++++++++++++++++ .../RedisVectorStoreRecordCollection.cs | 31 ++++ ...VectorStoreCollectionCreateMappingTests.cs | 112 +++++++++++++ .../RedisVectorStoreRecordCollectionTests.cs | 22 ++- .../Memory/Redis/RedisVectorStoreFixture.cs | 12 +- .../RedisVectorStoreRecordCollectionTests.cs | 28 ++++ 6 files changed, 350 insertions(+), 7 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs create mode 100644 dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs new file mode 100644 index 000000000000..f7a4e362eafa --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs @@ -0,0 +1,152 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Globalization; +using Microsoft.SemanticKernel.Data; +using NRedisStack.Search; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Contains mapping helpers to use when creating a redis vector collection. +/// +internal static class RedisVectorStoreCollectionCreateMapping +{ + /// A set of number types that are supported for filtering. + public static readonly HashSet s_supportedFilterableNumericDataTypes = + [ + typeof(short), + typeof(sbyte), + typeof(byte), + typeof(ushort), + typeof(int), + typeof(uint), + typeof(long), + typeof(ulong), + typeof(float), + typeof(double), + typeof(decimal), + + typeof(short?), + typeof(sbyte?), + typeof(byte?), + typeof(ushort?), + typeof(int?), + typeof(uint?), + typeof(long?), + typeof(ulong?), + typeof(float?), + typeof(double?), + typeof(decimal?), + ]; + + /// + /// Map from the given list of items to the Redis . + /// + /// The property definitions to map from. + /// The mapped Redis . + /// Thrown if there are missing required or unsupported configuration options set. + public static Schema MapToSchema(IEnumerable properties) + { + var schema = new Schema(); + + // Loop through all properties and create the index fields. + foreach (var property in properties) + { + // Key property. + if (property is VectorStoreRecordKeyProperty keyProperty) + { + // Do nothing, since key is not stored as part of the payload and therefore doesn't have to be added to the index. + continue; + } + + // Data property. + if (property is VectorStoreRecordDataProperty dataProperty && dataProperty.IsFilterable) + { + if (dataProperty.PropertyType is null) + { + throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.PropertyName}' must be set to create a collection, since the property is filterable."); + } + + if (dataProperty.PropertyType == typeof(string)) + { + schema.AddTextField(new FieldName($"$.{dataProperty.PropertyName}", dataProperty.PropertyName)); + } + + if (RedisVectorStoreCollectionCreateMapping.s_supportedFilterableNumericDataTypes.Contains(dataProperty.PropertyType)) + { + schema.AddNumericField(new FieldName($"$.{dataProperty.PropertyName}", dataProperty.PropertyName)); + } + + continue; + } + + // Vector property. + if (property is VectorStoreRecordVectorProperty vectorProperty) + { + if (vectorProperty.Dimensions is not > 0) + { + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive ingeteger to create a collection."); + } + + var indexKind = GetSDKIndexKind(vectorProperty); + var distanceAlgorithm = GetSDKDistanceAlgorithm(vectorProperty); + var dimensions = vectorProperty.Dimensions.Value.ToString(CultureInfo.InvariantCulture); + schema.AddVectorField(new FieldName($"$.{vectorProperty.PropertyName}", vectorProperty.PropertyName), indexKind, new Dictionary() + { + ["TYPE"] = "FLOAT32", + ["DIM"] = dimensions, + ["DISTANCE_METRIC"] = distanceAlgorithm + }); + } + } + + return schema; + } + + /// + /// Get the configured from the given . + /// If none is configured the default is . + /// + /// The vector property definition. + /// The chosen . + /// Thrown if a index type was chosen that isn't supported by Redis. + public static Schema.VectorField.VectorAlgo GetSDKIndexKind(VectorStoreRecordVectorProperty vectorProperty) + { + if (vectorProperty.IndexKind is null) + { + return Schema.VectorField.VectorAlgo.HNSW; + } + + return vectorProperty.IndexKind switch + { + IndexKind.Hnsw => Schema.VectorField.VectorAlgo.HNSW, + IndexKind.Flat => Schema.VectorField.VectorAlgo.FLAT, + _ => throw new InvalidOperationException($"Unsupported index kind '{vectorProperty.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + }; + } + + /// + /// Get the configured distance metric from the given . + /// If none is configured, the default is cosine. + /// + /// The vector property definition. + /// The chosen distance metric. + /// Thrown if a distance function is chosen that isn't supported by Redis. + public static string GetSDKDistanceAlgorithm(VectorStoreRecordVectorProperty vectorProperty) + { + if (vectorProperty.DistanceFunction is null) + { + return "COSINE"; + } + + return vectorProperty.DistanceFunction switch + { + DistanceFunction.CosineSimilarity => "COSINE", + DistanceFunction.DotProductSimilarity => "IP", + DistanceFunction.EuclideanDistance => "L2", + _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + }; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs index 2b3cd6902f77..61637a6a1e6f 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs @@ -12,6 +12,8 @@ using Microsoft.SemanticKernel.Data; using NRedisStack.Json.DataTypes; using NRedisStack.RedisStackCommands; +using NRedisStack.Search; +using NRedisStack.Search.Literals.Enums; using StackExchange.Redis; namespace Microsoft.SemanticKernel.Connectors.Redis; @@ -52,6 +54,9 @@ public sealed class RedisVectorStoreRecordCollection : IVectorStoreReco /// Optional configuration options for this class. private readonly RedisVectorStoreRecordCollectionOptions _options; + /// A definition of the current storage model. + private readonly VectorStoreRecordDefinition _vectorStoreRecordDefinition; + /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. private readonly PropertyInfo _keyPropertyInfo; @@ -85,6 +90,7 @@ public RedisVectorStoreRecordCollection(IDatabase database, string collectionNam this._collectionName = collectionName; this._options = options ?? new RedisVectorStoreRecordCollectionOptions(); this._jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; + this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); // Enumerate public properties using configuration or attributes. (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; @@ -151,6 +157,31 @@ public async Task CollectionExistsAsync(CancellationToken cancellationToke } } + /// + public Task CreateCollectionAsync(CancellationToken cancellationToken = default) + { + // Map the record definition to a schema. + var schema = RedisVectorStoreCollectionCreateMapping.MapToSchema(this._vectorStoreRecordDefinition.Properties); + + // Create the index creation params. + // Add the collection name and colon as the index prefix, which means that any record where the key is prefixed with this text will be indexed by this index + var createParams = new FTCreateParams() + .AddPrefix($"{this._collectionName}:") + .On(IndexDataType.JSON); + + // Create the index. + return this.RunOperationAsync("FT.CREATE", () => this._database.FT().CreateAsync(this._collectionName, createParams, schema)); + } + + /// + public async Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default) + { + if (!await this.CollectionExistsAsync(cancellationToken).ConfigureAwait(false)) + { + await this.CreateCollectionAsync(cancellationToken).ConfigureAwait(false); + } + } + /// public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) { diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs new file mode 100644 index 000000000000..277a8f57a983 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs @@ -0,0 +1,112 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using Microsoft.SemanticKernel.Data; +using NRedisStack.Search; +using Xunit; +using static NRedisStack.Search.Schema; + +namespace Microsoft.SemanticKernel.Connectors.Redis.UnitTests; + +/// +/// Contains tests for the class. +/// +public class RedisVectorStoreCollectionCreateMappingTests +{ + [Fact] + public void MapToSchemaCreatesSchema() + { + // Arrange. + var properties = new VectorStoreRecordProperty[] + { + new VectorStoreRecordKeyProperty("Key"), + + new VectorStoreRecordDataProperty("FilterableString") { PropertyType = typeof(string), IsFilterable = true }, + new VectorStoreRecordDataProperty("FilterableInt") { PropertyType = typeof(int), IsFilterable = true }, + new VectorStoreRecordDataProperty("FilterableNullableInt") { PropertyType = typeof(int?), IsFilterable = true }, + + new VectorStoreRecordDataProperty("NonFilterableString") { PropertyType = typeof(string) }, + + new VectorStoreRecordVectorProperty("VectorDefaultIndexingOptions") { Dimensions = 10 }, + new VectorStoreRecordVectorProperty("VectorSpecificIndexingOptions") { Dimensions = 20, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.EuclideanDistance }, + }; + + // Act. + var schema = RedisVectorStoreCollectionCreateMapping.MapToSchema(properties); + + // Assert. + Assert.NotNull(schema); + Assert.Equal(5, schema.Fields.Count); + + Assert.IsType(schema.Fields[0]); + Assert.IsType(schema.Fields[1]); + Assert.IsType(schema.Fields[2]); + Assert.IsType(schema.Fields[3]); + Assert.IsType(schema.Fields[4]); + + VerifyFieldName(schema.Fields[0].FieldName, new List { "$.FilterableString", "AS", "FilterableString" }); + VerifyFieldName(schema.Fields[1].FieldName, new List { "$.FilterableInt", "AS", "FilterableInt" }); + VerifyFieldName(schema.Fields[2].FieldName, new List { "$.FilterableNullableInt", "AS", "FilterableNullableInt" }); + + VerifyFieldName(schema.Fields[3].FieldName, new List { "$.VectorDefaultIndexingOptions", "AS", "VectorDefaultIndexingOptions" }); + VerifyFieldName(schema.Fields[4].FieldName, new List { "$.VectorSpecificIndexingOptions", "AS", "VectorSpecificIndexingOptions" }); + + Assert.Equal("10", ((VectorField)schema.Fields[3]).Attributes!["DIM"]); + Assert.Equal("FLOAT32", ((VectorField)schema.Fields[3]).Attributes!["TYPE"]); + Assert.Equal("COSINE", ((VectorField)schema.Fields[3]).Attributes!["DISTANCE_METRIC"]); + + Assert.Equal("20", ((VectorField)schema.Fields[4]).Attributes!["DIM"]); + Assert.Equal("FLOAT32", ((VectorField)schema.Fields[4]).Attributes!["TYPE"]); + Assert.Equal("L2", ((VectorField)schema.Fields[4]).Attributes!["DISTANCE_METRIC"]); + } + + [Fact] + public void MapToSchemaThrowsOnMissingPropertyType() + { + // Arrange. + var properties = new VectorStoreRecordProperty[] { new VectorStoreRecordDataProperty("FilterableString") { IsFilterable = true } }; + + // Act and assert. + Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.MapToSchema(properties)); + } + + [Theory] + [InlineData(null)] + [InlineData(0)] + public void MapToSchemaThrowsOnInvalidVectorDimensions(int? dimensions) + { + // Arrange. + var properties = new VectorStoreRecordProperty[] { new VectorStoreRecordVectorProperty("VectorProperty") { Dimensions = dimensions } }; + + // Act and assert. + Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.MapToSchema(properties)); + } + + [Fact] + public void GetSDKIndexKindThrowsOnUnsupportedIndexKind() + { + // Arrange. + var vectorProperty = new VectorStoreRecordVectorProperty("VectorProperty") { IndexKind = "Unsupported" }; + + // Act and assert. + Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.GetSDKIndexKind(vectorProperty)); + } + + [Fact] + public void GetSDKDistanceAlgorithmThrowsOnUnsupportedDistanceFunction() + { + // Arrange. + var vectorProperty = new VectorStoreRecordVectorProperty("VectorProperty") { DistanceFunction = "Unsupported" }; + + // Act and assert. + Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.GetSDKDistanceAlgorithm(vectorProperty)); + } + + private static void VerifyFieldName(FieldName fieldName, List expected) + { + var args = new List(); + fieldName.AddCommandArguments(args); + Assert.Equal(expected, args); + } +} diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs index ee64128e9b4b..9fde8c6f300d 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs @@ -64,6 +64,26 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN Assert.Equal(expectedExists, actual); } + [Fact] + public async Task CanCreateCollectionAsync() + { + // Arrange. + SetupExecuteMock(this._redisDatabaseMock, string.Empty); + var sut = new RedisVectorStoreRecordCollection(this._redisDatabaseMock.Object, TestCollectionName); + + // Act. + await sut.CreateCollectionAsync(); + + // Assert. + var expectedArgs = new object[] { "testcollection", "ON", "JSON", "PREFIX", 1, "testcollection:", "SCHEMA", "$.Vector", "AS", "Vector", "VECTOR", "HNSW", 6, "TYPE", "FLOAT32", "DIM", "4", "DISTANCE_METRIC", "COSINE" }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "FT.CREATE", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + } + [Fact] public async Task CanDeleteCollectionAsync() { @@ -435,7 +455,7 @@ public sealed class SinglePropsModel [VectorStoreRecordData] public string Data { get; set; } = string.Empty; - [VectorStoreRecordVector] + [VectorStoreRecordVector(4)] public ReadOnlyMemory? Vector { get; set; } public string? NotAnnotated { get; set; } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs index 478ad52b9f56..c79069982824 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs @@ -38,10 +38,10 @@ public RedisVectorStoreFixture() Properties = new List { new VectorStoreRecordKeyProperty("HotelId"), - new VectorStoreRecordDataProperty("HotelName"), - new VectorStoreRecordDataProperty("HotelCode"), + new VectorStoreRecordDataProperty("HotelName") { IsFilterable = true, PropertyType = typeof(string) }, + new VectorStoreRecordDataProperty("HotelCode") { IsFilterable = true, PropertyType = typeof(int) }, new VectorStoreRecordDataProperty("Description"), - new VectorStoreRecordVectorProperty("DescriptionEmbedding"), + new VectorStoreRecordVectorProperty("DescriptionEmbedding") { Dimensions = 4 }, new VectorStoreRecordDataProperty("Tags"), new VectorStoreRecordDataProperty("ParkingIncluded"), new VectorStoreRecordDataProperty("LastRenovationDate"), @@ -166,16 +166,16 @@ public class Hotel [VectorStoreRecordKey] public string HotelId { get; init; } - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] public string HotelName { get; init; } - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] public int HotelCode { get; init; } [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] public string Description { get; init; } - [VectorStoreRecordVector] + [VectorStoreRecordVector(4)] public ReadOnlyMemory? DescriptionEmbedding { get; init; } #pragma warning disable CA1819 // Properties should not return arrays diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs index 7eb0eb7454be..1bd13895c1d4 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs @@ -37,6 +37,34 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN Assert.Equal(expectedExists, actual); } + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanCreateACollectionAsync(bool useRecordDefinition) + { + // Arrange + var collectionNamePostfix = useRecordDefinition ? "WithDefinition" : "WithType"; + var testCollectionName = $"createtest{collectionNamePostfix}"; + + var options = new RedisVectorStoreRecordCollectionOptions + { + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null + }; + var sut = new RedisVectorStoreRecordCollection(fixture.Database, testCollectionName, options); + + // Act + await sut.CreateCollectionAsync(); + + // Assert + var existResult = await sut.CollectionExistsAsync(); + Assert.True(existResult); + await sut.DeleteCollectionAsync(); + + // Output + output.WriteLine(existResult.ToString()); + } + [Fact] public async Task ItCanDeleteCollectionAsync() { From b2d34eea423f54ab40217658bd3e58fcd76f0c9f Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 17 Jul 2024 15:23:14 +0100 Subject: [PATCH 20/48] Adding qdrant collection create. (#7302) ### Motivation and Context As part of the memory connector redesign we have fixed on a design where we have a VectorStore that produces VectorStoreRecordCollection instances. These are tied to a collection and will expose single collection operations. ### Description This PR contains: - The ability to create Qdrant collections - Additional methods on mockable qdrant client to allow unit testing create - Had to move record property enumeration from the mapper to the collection class, since create requires the property information as well, specifically storage property names. - Removed the mapper options class, since the mapper is internal, and all the parameters are now required anyway. I'll update the IVectorStoreRecordCollection interface with the CreateCollection and CreateCollectionIfNotExists methods once I've added create to more implementations, to avoid having one very large pr. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../MockableQdrantClient.cs | 72 +++++++++++ ...drantVectorStoreCollectionCreateMapping.cs | 118 ++++++++++++++++++ .../QdrantVectorStoreRecordCollection.cs | 110 +++++++++++++++- .../QdrantVectorStoreRecordMapper.cs | 76 +++++------ .../QdrantVectorStoreRecordMapperOptions.cs | 27 ---- ...VectorStoreCollectionCreateMappingTests.cs | 93 ++++++++++++++ .../QdrantVectorStoreRecordCollectionTests.cs | 47 ++++++- .../QdrantVectorStoreRecordMapperTests.cs | 116 ++++++++++------- .../Memory/Qdrant/QdrantVectorStoreFixture.cs | 20 +-- .../QdrantVectorStoreRecordCollectionTests.cs | 31 +++++ 10 files changed, 576 insertions(+), 134 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs delete mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreCollectionCreateMappingTests.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs index bbda7a838abd..020455558b7d 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/MockableQdrantClient.cs @@ -28,14 +28,21 @@ public MockableQdrantClient(QdrantClient qdrantClient) } #pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + /// /// Constructor for mocking purposes only. /// internal MockableQdrantClient() { } + #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + /// + /// Gets the internal that this mockable instance wraps. + /// + public QdrantClient QdrantClient => this._qdrantClient; + /// /// Check if a collection exists. /// @@ -48,6 +55,62 @@ public virtual Task CollectionExistsAsync( CancellationToken cancellationToken = default) => this._qdrantClient.CollectionExistsAsync(collectionName, cancellationToken); + /// + /// Creates a new collection with the given parameters. + /// + /// The name of the collection to be created. + /// + /// Configuration of the vector storage. Vector params contains size and distance for the vector storage. + /// This overload creates a single anonymous vector storage. + /// + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task CreateCollectionAsync( + string collectionName, + VectorParams vectorsConfig, + CancellationToken cancellationToken = default) + => this._qdrantClient.CreateCollectionAsync( + collectionName, + vectorsConfig, + cancellationToken: cancellationToken); + + /// + /// Creates a new collection with the given parameters. + /// + /// The name of the collection to be created. + /// + /// Configuration of the vector storage. Vector params contains size and distance for the vector storage. + /// This overload creates a vector storage for each key in the provided map. + /// + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task CreateCollectionAsync( + string collectionName, + VectorParamsMap? vectorsConfig = null, + CancellationToken cancellationToken = default) + => this._qdrantClient.CreateCollectionAsync( + collectionName, + vectorsConfig, + cancellationToken: cancellationToken); + + /// + /// Creates a payload field index in a collection. + /// + /// The name of the collection. + /// Field name to index. + /// The schema type of the field. + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task CreatePayloadIndexAsync( + string collectionName, + string fieldName, + PayloadSchemaType schemaType = PayloadSchemaType.Keyword, + CancellationToken cancellationToken = default) + => this._qdrantClient.CreatePayloadIndexAsync(collectionName, fieldName, schemaType, cancellationToken: cancellationToken); + /// /// Drop a collection and all its associated data. /// @@ -62,6 +125,15 @@ public virtual Task DeleteCollectionAsync( CancellationToken cancellationToken = default) => this._qdrantClient.DeleteCollectionAsync(collectionName, timeout, cancellationToken); + /// + /// Gets the names of all existing collections. + /// + /// + /// The token to monitor for cancellation requests. The default value is . + /// + public virtual Task> ListCollectionsAsync(CancellationToken cancellationToken = default) + => this._qdrantClient.ListCollectionsAsync(cancellationToken); + /// /// Delete a point. /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs new file mode 100644 index 000000000000..4984cae771fb --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs @@ -0,0 +1,118 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using Microsoft.SemanticKernel.Data; +using Qdrant.Client.Grpc; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Contains mapping helpers to use when creating a qdrant vector collection. +/// +internal static class QdrantVectorStoreCollectionCreateMapping +{ + /// A dictionary of types and their matching qdrant index schema type. + public static readonly Dictionary s_schemaTypeMap = new() + { + { typeof(short), PayloadSchemaType.Integer }, + { typeof(sbyte), PayloadSchemaType.Integer }, + { typeof(byte), PayloadSchemaType.Integer }, + { typeof(ushort), PayloadSchemaType.Integer }, + { typeof(int), PayloadSchemaType.Integer }, + { typeof(uint), PayloadSchemaType.Integer }, + { typeof(long), PayloadSchemaType.Integer }, + { typeof(ulong), PayloadSchemaType.Integer }, + { typeof(float), PayloadSchemaType.Float }, + { typeof(double), PayloadSchemaType.Float }, + { typeof(decimal), PayloadSchemaType.Float }, + + { typeof(short?), PayloadSchemaType.Integer }, + { typeof(sbyte?), PayloadSchemaType.Integer }, + { typeof(byte?), PayloadSchemaType.Integer }, + { typeof(ushort?), PayloadSchemaType.Integer }, + { typeof(int?), PayloadSchemaType.Integer }, + { typeof(uint?), PayloadSchemaType.Integer }, + { typeof(long?), PayloadSchemaType.Integer }, + { typeof(ulong?), PayloadSchemaType.Integer }, + { typeof(float?), PayloadSchemaType.Float }, + { typeof(double?), PayloadSchemaType.Float }, + { typeof(decimal?), PayloadSchemaType.Float }, + + { typeof(string), PayloadSchemaType.Text }, + { typeof(DateTime), PayloadSchemaType.Datetime }, + { typeof(bool), PayloadSchemaType.Bool }, + + { typeof(DateTime?), PayloadSchemaType.Datetime }, + { typeof(bool?), PayloadSchemaType.Bool }, + }; + + /// + /// Maps a single to a qdrant . + /// + /// The property to map. + /// The mapped . + /// Thrown if the property is missing information or has unsupported options specified. + public static VectorParams MapSingleVector(VectorStoreRecordVectorProperty vectorProperty) + { + if (vectorProperty!.Dimensions is not > 0) + { + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive ingeteger to create a collection."); + } + + if (vectorProperty!.IndexKind is not null && vectorProperty!.IndexKind != IndexKind.Hnsw) + { + throw new InvalidOperationException($"Unsupported index kind '{vectorProperty!.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'."); + } + + return new VectorParams { Size = (ulong)vectorProperty.Dimensions, Distance = QdrantVectorStoreCollectionCreateMapping.GetSDKDistanceAlgorithm(vectorProperty) }; + } + + /// + /// Maps a collection of to a qdrant . + /// + /// The properties to map. + /// The mapping of property names to storage names. + /// THe mapped . + /// Thrown if the property is missing information or has unsupported options specified. + public static VectorParamsMap MapNamedVectors(IEnumerable vectorProperties, Dictionary storagePropertyNames) + { + var vectorParamsMap = new VectorParamsMap(); + + foreach (var vectorProperty in vectorProperties) + { + var storageName = storagePropertyNames[vectorProperty.PropertyName]; + + // Add each vector property to the vectors map. + vectorParamsMap.Map.Add( + storageName, + MapSingleVector(vectorProperty)); + } + + return vectorParamsMap; + } + + /// + /// Get the configured from the given . + /// If none is configured, the default is . + /// + /// The vector property definition. + /// The chosen . + /// Thrown if a distance function is chosen that isn't supported by Azure AI Search. + public static Distance GetSDKDistanceAlgorithm(VectorStoreRecordVectorProperty vectorProperty) + { + if (vectorProperty.DistanceFunction is null) + { + return Distance.Cosine; + } + + return vectorProperty.DistanceFunction switch + { + DistanceFunction.CosineSimilarity => Distance.Cosine, + DistanceFunction.DotProductSimilarity => Distance.Dot, + DistanceFunction.EuclideanDistance => Distance.Euclid, + DistanceFunction.ManhattanDistance => Distance.Manhattan, + _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + }; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index 8620055b5a8b..b5d68c180786 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Reflection; using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; @@ -22,6 +23,13 @@ public sealed class QdrantVectorStoreRecordCollection : IVectorStoreRec #pragma warning restore CA1711 // Identifiers should not have incorrect suffix where TRecord : class { + /// A set of types that a key on the provided model may have. + private static readonly HashSet s_supportedKeyTypes = + [ + typeof(ulong), + typeof(Guid) + ]; + /// The name of this database for telemetry purposes. private const string DatabaseName = "Qdrant"; @@ -40,9 +48,15 @@ public sealed class QdrantVectorStoreRecordCollection : IVectorStoreRec /// Optional configuration options for this class. private readonly QdrantVectorStoreRecordCollectionOptions _options; + /// A definition of the current storage model. + private readonly VectorStoreRecordDefinition _vectorStoreRecordDefinition; + /// A mapper to use for converting between qdrant point and consumer models. private readonly IVectorStoreRecordMapper _mapper; + /// A dictionary that maps from a property name to the configured name that should be used when storing it. + private readonly Dictionary _storagePropertyNames = new(); + /// /// Initializes a new instance of the class. /// @@ -74,6 +88,24 @@ internal QdrantVectorStoreRecordCollection(MockableQdrantClient qdrantClient, st this._qdrantClient = qdrantClient; this._collectionName = collectionName; this._options = options ?? new QdrantVectorStoreRecordCollectionOptions(); + this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); + + // Enumerate public properties using configuration or attributes. + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; + if (this._options.VectorStoreRecordDefinition is not null) + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: this._options.HasNamedVectors); + } + else + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: this._options.HasNamedVectors); + } + + // Validate key property types. + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + + // Build a map of property names to storage names. + this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._options.VectorStoreRecordDefinition); // Assign Mapper. if (this._options.MapperType == QdrantRecordMapperType.QdrantPointStructCustomMapper) @@ -89,11 +121,12 @@ internal QdrantVectorStoreRecordCollection(MockableQdrantClient qdrantClient, st else { // Default Mapper. - this._mapper = new QdrantVectorStoreRecordMapper(new QdrantVectorStoreRecordMapperOptions - { - HasNamedVectors = this._options.HasNamedVectors, - VectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition - }); + this._mapper = new QdrantVectorStoreRecordMapper( + this._options.HasNamedVectors, + properties.keyProperty, + properties.dataProperties, + properties.vectorProperties, + this._storagePropertyNames); } } @@ -108,6 +141,73 @@ public Task CollectionExistsAsync(CancellationToken cancellationToken = de () => this._qdrantClient.CollectionExistsAsync(this._collectionName, cancellationToken)); } + /// + public async Task CreateCollectionAsync(CancellationToken cancellationToken = default) + { + if (!this._options.HasNamedVectors) + { + // If we are not using named vectors, we can only have one vector property. We can assume we have at least one, since this is already verified in the constructor. + var singleVectorProperty = this._vectorStoreRecordDefinition.Properties.First(x => x is VectorStoreRecordVectorProperty vectorProperty) as VectorStoreRecordVectorProperty; + + // Map the single vector property to the qdrant config. + var vectorParams = QdrantVectorStoreCollectionCreateMapping.MapSingleVector(singleVectorProperty!); + + // Create the collection with the single unnamed vector. + await this.RunOperationAsync( + "CreateCollection", + () => this._qdrantClient.CreateCollectionAsync( + this._collectionName, + vectorParams, + cancellationToken: cancellationToken)).ConfigureAwait(false); + } + else + { + // Since we are using named vectors, iterate over all vector properties. + var vectorProperties = this._vectorStoreRecordDefinition.Properties.Where(x => x is VectorStoreRecordVectorProperty).Select(x => (VectorStoreRecordVectorProperty)x); + + // Map the named vectors to the qdrant config. + var vectorParamsMap = QdrantVectorStoreCollectionCreateMapping.MapNamedVectors(vectorProperties, this._storagePropertyNames); + + // Create the collection with named vectors. + await this.RunOperationAsync( + "CreateCollection", + () => this._qdrantClient.CreateCollectionAsync( + this._collectionName, + vectorParamsMap, + cancellationToken: cancellationToken)).ConfigureAwait(false); + } + + // Add indexes for each of the data properties that require filtering. + var dataProperties = this._vectorStoreRecordDefinition.Properties.Where(x => x is VectorStoreRecordDataProperty).Select(x => (VectorStoreRecordDataProperty)x).Where(x => x.IsFilterable); + foreach (var dataProperty in dataProperties) + { + if (dataProperty.PropertyType is null) + { + throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.PropertyName}' must be set to create a collection, since the property is filterable."); + } + + var storageFieldName = this._storagePropertyNames[dataProperty.PropertyName]; + var schemaType = QdrantVectorStoreCollectionCreateMapping.s_schemaTypeMap[dataProperty.PropertyType!]; + + await this.RunOperationAsync( + "CreatePayloadIndex", + () => this._qdrantClient.CreatePayloadIndexAsync( + this._collectionName, + storageFieldName, + schemaType, + cancellationToken: cancellationToken)).ConfigureAwait(false); + } + } + + /// + public async Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default) + { + if (!await this.CollectionExistsAsync(cancellationToken).ConfigureAwait(false)) + { + await this.CreateCollectionAsync(cancellationToken).ConfigureAwait(false); + } + } + /// public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index 266501e3bfab..fdc4d1ab60f6 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -19,13 +19,6 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant; internal sealed class QdrantVectorStoreRecordMapper : IVectorStoreRecordMapper where TRecord : class { - /// A set of types that a key on the provided model may have. - private static readonly HashSet s_supportedKeyTypes = - [ - typeof(ulong), - typeof(Guid) - ]; - /// A set of types that data properties on the provided model may have. private static readonly HashSet s_supportedDataTypes = [ @@ -55,51 +48,40 @@ internal sealed class QdrantVectorStoreRecordMapper : IVectorStoreRecor typeof(ReadOnlyMemory?) ]; - /// A list of property info objects that point at the payload properties in the current model, and allows easy reading and writing of these properties. - private readonly List _payloadPropertiesInfo = new(); + /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. + private readonly PropertyInfo _keyPropertyInfo; + + /// A list of property info objects that point at the data properties in the current model, and allows easy reading and writing of these properties. + private readonly List _dataPropertiesInfo = new(); /// A list of property info objects that point at the vector properties in the current model, and allows easy reading and writing of these properties. private readonly List _vectorPropertiesInfo = new(); - /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. - private readonly PropertyInfo _keyPropertyInfo; - /// A dictionary that maps from a property name to the configured name that should be used when storing it. private readonly Dictionary _storagePropertyNames = new(); - /// Configuration options for this class. - private readonly QdrantVectorStoreRecordMapperOptions _options; + /// A value indicating whether the vectors in the store are named, or whether there is just a single unnamed vector per qdrant point. + private readonly bool _hasNamedVectors; /// /// Initializes a new instance of the class. /// - /// Options to use when doing the model conversion. - public QdrantVectorStoreRecordMapper(QdrantVectorStoreRecordMapperOptions options) + /// A value indicating whether the vectors in the store are named, or whether there is just a single unnamed vector per qdrant point. + /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. + /// A list of property info objects that point at the data properties in the current model, and allows easy reading and writing of these properties. + /// A list of property info objects that point at the vector properties in the current model, and allows easy reading and writing of these properties. + /// A dictionary that maps from a property name to the configured name that should be used when storing it. + public QdrantVectorStoreRecordMapper(bool hasNamedVectors, PropertyInfo keyProperty, List dataProperties, List vectorProperties, Dictionary storagePropertyNames) { - Verify.NotNull(options); - this._options = options; - - // Enumerate public properties using configuration or attributes. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; - if (this._options.VectorStoreRecordDefinition is not null) - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: this._options.HasNamedVectors); - } - else - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: this._options.HasNamedVectors); - } + this._hasNamedVectors = hasNamedVectors; + this._keyPropertyInfo = keyProperty; + this._dataPropertiesInfo = dataProperties; + this._vectorPropertiesInfo = vectorProperties; + this._storagePropertyNames = storagePropertyNames; // Validate property types and store for later use. - VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); - - this._keyPropertyInfo = properties.keyProperty; - this._payloadPropertiesInfo = properties.dataProperties; - this._vectorPropertiesInfo = properties.vectorProperties; - - this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._options.VectorStoreRecordDefinition); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(vectorProperties, s_supportedVectorTypes, "Vector"); } /// @@ -130,15 +112,15 @@ public PointStruct MapFromDataToStorageModel(TRecord dataModel) }; // Add point payload. - foreach (var payloadPropertyInfo in this._payloadPropertiesInfo) + foreach (var dataPropertyInfo in this._dataPropertiesInfo) { - var propertyName = this._storagePropertyNames[payloadPropertyInfo.Name]; - var propertyValue = payloadPropertyInfo.GetValue(dataModel); + var propertyName = this._storagePropertyNames[dataPropertyInfo.Name]; + var propertyValue = dataPropertyInfo.GetValue(dataModel); pointStruct.Payload.Add(propertyName, ConvertToGrpcFieldValue(propertyValue)); } // Add vectors. - if (this._options.HasNamedVectors) + if (this._hasNamedVectors) { var namedVectors = new NamedVectors(); foreach (var vectorPropertyInfo in this._vectorPropertiesInfo) @@ -191,7 +173,7 @@ public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToData { var propertyName = this._storagePropertyNames[vectorProperty.Name]; - if (this._options.HasNamedVectors) + if (this._hasNamedVectors) { if (storageModel.Vectors.Vectors_.Vectors.TryGetValue(propertyName, out var vector)) { @@ -205,13 +187,13 @@ public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToData } } - // Add each payload property. - foreach (var payloadProperty in this._payloadPropertiesInfo) + // Add each data property. + foreach (var dataProperty in this._dataPropertiesInfo) { - var propertyName = this._storagePropertyNames[payloadProperty.Name]; + var propertyName = this._storagePropertyNames[dataProperty.Name]; if (storageModel.Payload.TryGetValue(propertyName, out var value)) { - outputJsonObject.Add(payloadProperty.Name, ConvertFromGrpcFieldValueToJsonNode(value)); + outputJsonObject.Add(dataProperty.Name, ConvertFromGrpcFieldValueToJsonNode(value)); } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs deleted file mode 100644 index bb6d5d837d05..000000000000 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapperOptions.cs +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using Microsoft.SemanticKernel.Data; - -namespace Microsoft.SemanticKernel.Connectors.Qdrant; - -/// -/// Options when creating a . -/// -internal sealed class QdrantVectorStoreRecordMapperOptions -{ - /// - /// Gets or sets a value indicating whether the vectors in the store are named, or whether there is just a single vector per qdrant point. - /// Defaults to single vector per point. - /// - public bool HasNamedVectors { get; set; } = false; - - /// - /// Gets or sets an optional record definition that defines the schema of the record type. - /// - /// - /// If not provided, the schema will be inferred from the record model class using reflection. - /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. - /// See , and . - /// - public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; -} diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreCollectionCreateMappingTests.cs new file mode 100644 index 000000000000..6a7b9c705b8a --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreCollectionCreateMappingTests.cs @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using Microsoft.SemanticKernel.Data; +using Qdrant.Client.Grpc; +using Xunit; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant.UnitTests; + +/// +/// Contains tests for the class. +/// +public class QdrantVectorStoreCollectionCreateMappingTests +{ + [Fact] + public void MapSingleVectorCreatesVectorParams() + { + // Arrange. + var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 4, DistanceFunction = DistanceFunction.DotProductSimilarity }; + + // Act. + var actual = QdrantVectorStoreCollectionCreateMapping.MapSingleVector(vectorProperty); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(Distance.Dot, actual.Distance); + Assert.Equal(4ul, actual.Size); + } + + [Fact] + public void MapSingleVectorDefaultsToCosine() + { + // Arrange. + var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 4 }; + + // Act. + var actual = QdrantVectorStoreCollectionCreateMapping.MapSingleVector(vectorProperty); + + // Assert. + Assert.Equal(Distance.Cosine, actual.Distance); + } + + [Fact] + public void MapSingleVectorThrowsForUnsupportedDistanceFunction() + { + // Arrange. + var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 4, DistanceFunction = DistanceFunction.CosineDistance }; + + // Act and assert. + Assert.Throws(() => QdrantVectorStoreCollectionCreateMapping.MapSingleVector(vectorProperty)); + } + + [Theory] + [InlineData(null)] + [InlineData(0)] + public void MapSingleVectorThrowsIfDimensionsIsInvalid(int? dimensions) + { + // Arrange. + var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = dimensions }; + + // Act and assert. + Assert.Throws(() => QdrantVectorStoreCollectionCreateMapping.MapSingleVector(vectorProperty)); + } + + [Fact] + public void MapNamedVectorsCreatesVectorParamsMap() + { + // Arrange. + var vectorProperties = new VectorStoreRecordVectorProperty[] + { + new("testvector1") { Dimensions = 10, DistanceFunction = DistanceFunction.EuclideanDistance }, + new("testvector2") { Dimensions = 20 } + }; + + var storagePropertyNames = new Dictionary + { + { "testvector1", "storage_testvector1" }, + { "testvector2", "storage_testvector2" } + }; + + // Act. + var actual = QdrantVectorStoreCollectionCreateMapping.MapNamedVectors(vectorProperties, storagePropertyNames); + + // Assert. + Assert.NotNull(actual); + Assert.Equal(2, actual.Map.Count); + Assert.Equal(10ul, actual.Map["storage_testvector1"].Size); + Assert.Equal(Distance.Euclid, actual.Map["storage_testvector1"].Distance); + Assert.Equal(20ul, actual.Map["storage_testvector2"].Size); + Assert.Equal(Distance.Cosine, actual.Map["storage_testvector2"].Distance); + } +} diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs index 988575d25996..59c5e73e2f56 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs @@ -53,6 +53,49 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN Assert.Equal(expectedExists, actual); } + [Fact] + public async Task CanCreateCollectionAsync() + { + // Arrange. + var sut = new QdrantVectorStoreRecordCollection>(this._qdrantClientMock.Object, TestCollectionName); + + this._qdrantClientMock + .Setup(x => x.CreateCollectionAsync( + It.IsAny(), + It.IsAny(), + this._testCancellationToken)) + .Returns(Task.CompletedTask); + + this._qdrantClientMock + .Setup(x => x.CreatePayloadIndexAsync( + It.IsAny(), + It.IsAny(), + It.IsAny(), + this._testCancellationToken)) + .ReturnsAsync(new UpdateResult()); + + // Act. + await sut.CreateCollectionAsync(this._testCancellationToken); + + // Assert. + this._qdrantClientMock + .Verify( + x => x.CreateCollectionAsync( + TestCollectionName, + It.Is(x => x.Size == 4), + this._testCancellationToken), + Times.Once); + + this._qdrantClientMock + .Verify( + x => x.CreatePayloadIndexAsync( + TestCollectionName, + "Data", + PayloadSchemaType.Text, + this._testCancellationToken), + Times.Once); + } + [Fact] public async Task CanDeleteCollectionAsync() { @@ -598,10 +641,10 @@ public sealed class SinglePropsModel [VectorStoreRecordKey] public required T Key { get; set; } - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] public string Data { get; set; } = string.Empty; - [VectorStoreRecordVector] + [VectorStoreRecordVector(4)] public ReadOnlyMemory? Vector { get; set; } public string? NotAnnotated { get; set; } diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs index e3ce7c19cbfd..6f67c43e87eb 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Reflection; using Microsoft.SemanticKernel.Connectors.Qdrant; using Microsoft.SemanticKernel.Data; using Qdrant.Client.Grpc; @@ -21,7 +22,8 @@ public class QdrantVectorStoreRecordMapperTests public void MapsSinglePropsFromDataToStorageModelWithUlong(bool hasNamedVectors) { // Arrange. - var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = hasNamedVectors }); + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors: hasNamedVectors); + var sut = new QdrantVectorStoreRecordMapper>(hasNamedVectors, keyProperty, dataProperties, vectorProperties, s_singlePropsModelStorageNamesMap); // Act. var actual = sut.MapFromDataToStorageModel(CreateSinglePropsModel(5ul)); @@ -30,11 +32,11 @@ public void MapsSinglePropsFromDataToStorageModelWithUlong(bool hasNamedVectors) Assert.NotNull(actual); Assert.Equal(5ul, actual.Id.Num); Assert.Single(actual.Payload); - Assert.Equal("data", actual.Payload["Data"].StringValue); + Assert.Equal("data value", actual.Payload["data"].StringValue); if (hasNamedVectors) { - Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vectors_.Vectors["Vector"].Data.ToArray()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vectors_.Vectors["vector"].Data.ToArray()); } else { @@ -48,7 +50,8 @@ public void MapsSinglePropsFromDataToStorageModelWithUlong(bool hasNamedVectors) public void MapsSinglePropsFromDataToStorageModelWithGuid(bool hasNamedVectors) { // Arrange. - var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = hasNamedVectors }); + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors: hasNamedVectors); + var sut = new QdrantVectorStoreRecordMapper>(hasNamedVectors, keyProperty, dataProperties, vectorProperties, s_singlePropsModelStorageNamesMap); // Act. var actual = sut.MapFromDataToStorageModel(CreateSinglePropsModel(Guid.Parse("11111111-1111-1111-1111-111111111111"))); @@ -57,7 +60,7 @@ public void MapsSinglePropsFromDataToStorageModelWithGuid(bool hasNamedVectors) Assert.NotNull(actual); Assert.Equal(Guid.Parse("11111111-1111-1111-1111-111111111111"), Guid.Parse(actual.Id.Uuid)); Assert.Single(actual.Payload); - Assert.Equal("data", actual.Payload["Data"].StringValue); + Assert.Equal("data value", actual.Payload["data"].StringValue); } [Theory] @@ -68,7 +71,8 @@ public void MapsSinglePropsFromDataToStorageModelWithGuid(bool hasNamedVectors) public void MapsSinglePropsFromStorageToDataModelWithUlong(bool hasNamedVectors, bool includeVectors) { // Arrange. - var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = hasNamedVectors }); + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors: hasNamedVectors); + var sut = new QdrantVectorStoreRecordMapper>(hasNamedVectors, keyProperty, dataProperties, vectorProperties, s_singlePropsModelStorageNamesMap); // Act. var actual = sut.MapFromStorageToDataModel(CreateSinglePropsPointStruct(5, hasNamedVectors), new() { IncludeVectors = includeVectors }); @@ -76,7 +80,7 @@ public void MapsSinglePropsFromStorageToDataModelWithUlong(bool hasNamedVectors, // Assert. Assert.NotNull(actual); Assert.Equal(5ul, actual.Key); - Assert.Equal("data", actual.Data); + Assert.Equal("data value", actual.Data); if (includeVectors) { @@ -96,7 +100,8 @@ public void MapsSinglePropsFromStorageToDataModelWithUlong(bool hasNamedVectors, public void MapsSinglePropsFromStorageToDataModelWithGuid(bool hasNamedVectors, bool includeVectors) { // Arrange. - var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = hasNamedVectors }); + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors: hasNamedVectors); + var sut = new QdrantVectorStoreRecordMapper>(hasNamedVectors, keyProperty, dataProperties, vectorProperties, s_singlePropsModelStorageNamesMap); // Act. var actual = sut.MapFromStorageToDataModel(CreateSinglePropsPointStruct(Guid.Parse("11111111-1111-1111-1111-111111111111"), hasNamedVectors), new() { IncludeVectors = includeVectors }); @@ -104,7 +109,7 @@ public void MapsSinglePropsFromStorageToDataModelWithGuid(bool hasNamedVectors, // Assert. Assert.NotNull(actual); Assert.Equal(Guid.Parse("11111111-1111-1111-1111-111111111111"), actual.Key); - Assert.Equal("data", actual.Data); + Assert.Equal("data value", actual.Data); if (includeVectors) { @@ -120,7 +125,8 @@ public void MapsSinglePropsFromStorageToDataModelWithGuid(bool hasNamedVectors, public void MapsMultiPropsFromDataToStorageModelWithUlong() { // Arrange. - var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = true }); + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), supportsMultipleVectors: true); + var sut = new QdrantVectorStoreRecordMapper>(true, keyProperty, dataProperties, vectorProperties, s_multiPropsModelStorageNamesMap); // Act. var actual = sut.MapFromDataToStorageModel(CreateMultiPropsModel(5ul)); @@ -129,22 +135,23 @@ public void MapsMultiPropsFromDataToStorageModelWithUlong() Assert.NotNull(actual); Assert.Equal(5ul, actual.Id.Num); Assert.Equal(7, actual.Payload.Count); - Assert.Equal("data 1", actual.Payload["DataString"].StringValue); - Assert.Equal(5, actual.Payload["DataInt"].IntegerValue); - Assert.Equal(5, actual.Payload["DataLong"].IntegerValue); - Assert.Equal(5.5f, actual.Payload["DataFloat"].DoubleValue); - Assert.Equal(5.5d, actual.Payload["DataDouble"].DoubleValue); - Assert.True(actual.Payload["DataBool"].BoolValue); - Assert.Equal(new int[] { 1, 2, 3, 4 }, actual.Payload["DataArrayInt"].ListValue.Values.Select(x => (int)x.IntegerValue).ToArray()); - Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vectors_.Vectors["Vector1"].Data.ToArray()); - Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vectors.Vectors_.Vectors["Vector2"].Data.ToArray()); + Assert.Equal("data 1", actual.Payload["dataString"].StringValue); + Assert.Equal(5, actual.Payload["dataInt"].IntegerValue); + Assert.Equal(5, actual.Payload["dataLong"].IntegerValue); + Assert.Equal(5.5f, actual.Payload["dataFloat"].DoubleValue); + Assert.Equal(5.5d, actual.Payload["dataDouble"].DoubleValue); + Assert.True(actual.Payload["dataBool"].BoolValue); + Assert.Equal(new int[] { 1, 2, 3, 4 }, actual.Payload["dataArrayInt"].ListValue.Values.Select(x => (int)x.IntegerValue).ToArray()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vectors_.Vectors["vector1"].Data.ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vectors.Vectors_.Vectors["vector2"].Data.ToArray()); } [Fact] public void MapsMultiPropsFromDataToStorageModelWithGuid() { // Arrange. - var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = true }); + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), supportsMultipleVectors: true); + var sut = new QdrantVectorStoreRecordMapper>(true, keyProperty, dataProperties, vectorProperties, s_multiPropsModelStorageNamesMap); // Act. var actual = sut.MapFromDataToStorageModel(CreateMultiPropsModel(Guid.Parse("11111111-1111-1111-1111-111111111111"))); @@ -153,15 +160,15 @@ public void MapsMultiPropsFromDataToStorageModelWithGuid() Assert.NotNull(actual); Assert.Equal(Guid.Parse("11111111-1111-1111-1111-111111111111"), Guid.Parse(actual.Id.Uuid)); Assert.Equal(7, actual.Payload.Count); - Assert.Equal("data 1", actual.Payload["DataString"].StringValue); - Assert.Equal(5, actual.Payload["DataInt"].IntegerValue); - Assert.Equal(5, actual.Payload["DataLong"].IntegerValue); - Assert.Equal(5.5f, actual.Payload["DataFloat"].DoubleValue); - Assert.Equal(5.5d, actual.Payload["DataDouble"].DoubleValue); - Assert.True(actual.Payload["DataBool"].BoolValue); - Assert.Equal(new int[] { 1, 2, 3, 4 }, actual.Payload["DataArrayInt"].ListValue.Values.Select(x => (int)x.IntegerValue).ToArray()); - Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vectors_.Vectors["Vector1"].Data.ToArray()); - Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vectors.Vectors_.Vectors["Vector2"].Data.ToArray()); + Assert.Equal("data 1", actual.Payload["dataString"].StringValue); + Assert.Equal(5, actual.Payload["dataInt"].IntegerValue); + Assert.Equal(5, actual.Payload["dataLong"].IntegerValue); + Assert.Equal(5.5f, actual.Payload["dataFloat"].DoubleValue); + Assert.Equal(5.5d, actual.Payload["dataDouble"].DoubleValue); + Assert.True(actual.Payload["dataBool"].BoolValue); + Assert.Equal(new int[] { 1, 2, 3, 4 }, actual.Payload["dataArrayInt"].ListValue.Values.Select(x => (int)x.IntegerValue).ToArray()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vectors.Vectors_.Vectors["vector1"].Data.ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vectors.Vectors_.Vectors["vector2"].Data.ToArray()); } [Theory] @@ -170,7 +177,8 @@ public void MapsMultiPropsFromDataToStorageModelWithGuid() public void MapsMultiPropsFromStorageToDataModelWithUlong(bool includeVectors) { // Arrange. - var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = true }); + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), supportsMultipleVectors: true); + var sut = new QdrantVectorStoreRecordMapper>(true, keyProperty, dataProperties, vectorProperties, s_multiPropsModelStorageNamesMap); // Act. var actual = sut.MapFromStorageToDataModel(CreateMultiPropsPointStruct(5), new() { IncludeVectors = includeVectors }); @@ -204,7 +212,8 @@ public void MapsMultiPropsFromStorageToDataModelWithUlong(bool includeVectors) public void MapsMultiPropsFromStorageToDataModelWithGuid(bool includeVectors) { // Arrange. - var sut = new QdrantVectorStoreRecordMapper>(new() { HasNamedVectors = true }); + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), supportsMultipleVectors: true); + var sut = new QdrantVectorStoreRecordMapper>(true, keyProperty, dataProperties, vectorProperties, s_multiPropsModelStorageNamesMap); // Act. var actual = sut.MapFromStorageToDataModel(CreateMultiPropsPointStruct(Guid.Parse("11111111-1111-1111-1111-111111111111")), new() { IncludeVectors = includeVectors }); @@ -237,7 +246,7 @@ private static SinglePropsModel CreateSinglePropsModel(TKey key) return new SinglePropsModel { Key = key, - Data = "data", + Data = "data value", Vector = new float[] { 1, 2, 3, 4 }, NotAnnotated = "notAnnotated", }; @@ -279,12 +288,12 @@ private static PointStruct CreateSinglePropsPointStruct(Guid id, bool hasNamedVe private static void AddDataToSinglePropsPointStruct(PointStruct pointStruct, bool hasNamedVectors) { - pointStruct.Payload.Add("Data", "data"); + pointStruct.Payload.Add("data", "data value"); if (hasNamedVectors) { var namedVectors = new NamedVectors(); - namedVectors.Vectors.Add("Vector", new[] { 1f, 2f, 3f, 4f }); + namedVectors.Vectors.Add("vector", new[] { 1f, 2f, 3f, 4f }); pointStruct.Vectors = new Vectors() { Vectors_ = namedVectors }; } else @@ -311,26 +320,33 @@ private static PointStruct CreateMultiPropsPointStruct(Guid id) private static void AddDataToMultiPropsPointStruct(PointStruct pointStruct) { - pointStruct.Payload.Add("DataString", "data 1"); - pointStruct.Payload.Add("DataInt", 5); - pointStruct.Payload.Add("DataLong", 5L); - pointStruct.Payload.Add("DataFloat", 5.5f); - pointStruct.Payload.Add("DataDouble", 5.5d); - pointStruct.Payload.Add("DataBool", true); + pointStruct.Payload.Add("dataString", "data 1"); + pointStruct.Payload.Add("dataInt", 5); + pointStruct.Payload.Add("dataLong", 5L); + pointStruct.Payload.Add("dataFloat", 5.5f); + pointStruct.Payload.Add("dataDouble", 5.5d); + pointStruct.Payload.Add("dataBool", true); var dataIntArray = new ListValue(); dataIntArray.Values.Add(1); dataIntArray.Values.Add(2); dataIntArray.Values.Add(3); dataIntArray.Values.Add(4); - pointStruct.Payload.Add("DataArrayInt", new Value { ListValue = dataIntArray }); + pointStruct.Payload.Add("dataArrayInt", new Value { ListValue = dataIntArray }); var namedVectors = new NamedVectors(); - namedVectors.Vectors.Add("Vector1", new[] { 1f, 2f, 3f, 4f }); - namedVectors.Vectors.Add("Vector2", new[] { 5f, 6f, 7f, 8f }); + namedVectors.Vectors.Add("vector1", new[] { 1f, 2f, 3f, 4f }); + namedVectors.Vectors.Add("vector2", new[] { 5f, 6f, 7f, 8f }); pointStruct.Vectors = new Vectors() { Vectors_ = namedVectors }; } + private static readonly Dictionary s_singlePropsModelStorageNamesMap = new() + { + { "Key", "key" }, + { "Data", "data" }, + { "Vector", "vector" }, + }; + private sealed class SinglePropsModel { [VectorStoreRecordKey] @@ -345,6 +361,20 @@ private sealed class SinglePropsModel public string NotAnnotated { get; set; } = string.Empty; } + private static readonly Dictionary s_multiPropsModelStorageNamesMap = new() + { + { "Key", "key" }, + { "DataString", "dataString" }, + { "DataInt", "dataInt" }, + { "DataLong", "dataLong" }, + { "DataFloat", "dataFloat" }, + { "DataDouble", "dataDouble" }, + { "DataBool", "dataBool" }, + { "DataArrayInt", "dataArrayInt" }, + { "Vector1", "vector1" }, + { "Vector2", "vector2" }, + }; + private sealed class MultiPropsModel { [VectorStoreRecordKey] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs index 26c82c311228..15136f8d95e6 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs @@ -35,13 +35,13 @@ public QdrantVectorStoreFixture() Properties = new List { new VectorStoreRecordKeyProperty("HotelId"), - new VectorStoreRecordDataProperty("HotelName"), - new VectorStoreRecordDataProperty("HotelCode"), - new VectorStoreRecordDataProperty("ParkingIncluded") { StoragePropertyName = "parking_is_included" }, - new VectorStoreRecordDataProperty("HotelRating"), + new VectorStoreRecordDataProperty("HotelName") { IsFilterable = true, PropertyType = typeof(string) }, + new VectorStoreRecordDataProperty("HotelCode") { IsFilterable = true, PropertyType = typeof(int) }, + new VectorStoreRecordDataProperty("ParkingIncluded") { IsFilterable = true, PropertyType = typeof(bool), StoragePropertyName = "parking_is_included" }, + new VectorStoreRecordDataProperty("HotelRating") { IsFilterable = true, PropertyType = typeof(float) }, new VectorStoreRecordDataProperty("Tags"), new VectorStoreRecordDataProperty("Description"), - new VectorStoreRecordVectorProperty("DescriptionEmbedding") + new VectorStoreRecordVectorProperty("DescriptionEmbedding") { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance } } }; this.HotelWithGuidIdVectorStoreRecordDefinition = new VectorStoreRecordDefinition @@ -272,19 +272,19 @@ public record HotelInfo() public ulong HotelId { get; init; } /// A string metadata field. - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] public string? HotelName { get; set; } /// An int metadata field. - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] public int HotelCode { get; set; } /// A float metadata field. - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] public float? HotelRating { get; set; } /// A bool metadata field. - [VectorStoreRecordData(StoragePropertyName = "parking_is_included")] + [VectorStoreRecordData(IsFilterable = true, StoragePropertyName = "parking_is_included")] public bool ParkingIncluded { get; set; } [VectorStoreRecordData] @@ -295,7 +295,7 @@ public record HotelInfo() public string Description { get; set; } /// A vector field. - [VectorStoreRecordVector] + [VectorStoreRecordVector(4, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)] public ReadOnlyMemory? DescriptionEmbedding { get; set; } } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs index 7249bdb0e93e..fbe2c997ab80 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs @@ -36,6 +36,37 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN Assert.Equal(expectedExists, actual); } + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task ItCanCreateACollectionAsync(bool hasNamedVectors, bool useRecordDefinition) + { + // Arrange + var collectionNamePostfix1 = useRecordDefinition ? "WithDefinition" : "WithType"; + var collectionNamePostfix2 = hasNamedVectors ? "HasNamedVectors" : "SingleUnnamedVector"; + var testCollectionName = $"createtest{collectionNamePostfix1}{collectionNamePostfix2}"; + + var options = new QdrantVectorStoreRecordCollectionOptions + { + HasNamedVectors = hasNamedVectors, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.HotelVectorStoreRecordDefinition : null + }; + var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, testCollectionName, options); + + // Act + await sut.CreateCollectionAsync(); + + // Assert + var existResult = await sut.CollectionExistsAsync(); + Assert.True(existResult); + await sut.DeleteCollectionAsync(); + + // Output + output.WriteLine(existResult.ToString()); + } + [Fact] public async Task ItCanDeleteCollectionAsync() { From 16dd96281f90ffa042157a1b45acf3230f5ce3b4 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 17 Jul 2024 16:34:11 +0100 Subject: [PATCH 21/48] .Net: Add CreateCollection to Volatile vectorstore and abstraction (#7318) ### Motivation and Context As part of the memory connector redesign we have fixed on a design where we have a VectorStore that produces VectorStoreRecordCollection instances. These are tied to a collection and will expose single collection operations. ### Description This PR contains: - The ability to create Volatile collections - Updating the abstraction to include the new methods. With this pr, all implementations now support create. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../Data/IVectorStoreRecordCollection.cs | 17 ++++++++++++++++- .../Data/VolatileVectorStoreRecordCollection.cs | 16 ++++++++++++++++ .../VolatileVectorStoreRecordCollectionTests.cs | 13 +++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs index 78f588e53c7e..7ca6b1896d8f 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs @@ -30,11 +30,25 @@ public interface IVectorStoreRecordCollection /// if the collection exists, otherwise. Task CollectionExistsAsync(CancellationToken cancellationToken = default); + /// + /// Create this collection in the vector store. + /// + /// The to monitor for cancellation requests. The default is . + /// A that completes when the collection has been created. + Task CreateCollectionAsync(CancellationToken cancellationToken = default); + + /// + /// Create this collection in the vector store if it does not already exist. + /// + /// The to monitor for cancellation requests. The default is . + /// A that completes when the collection has been created. + Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default); + /// /// Delete the collection from the vector store. /// /// The to monitor for cancellation requests. The default is . - /// A task that completes when the collection has been deleted. + /// A that completes when the collection has been deleted. Task DeleteCollectionAsync(CancellationToken cancellationToken = default); /// @@ -82,6 +96,7 @@ public interface IVectorStoreRecordCollection /// The unique ids associated with the records to remove. /// Optional options for removing the records. /// The to monitor for cancellation requests. The default is . + /// A that completes when the records have been deleted. /// Throw when the command fails to execute for any reason other than that a record does not exist. Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default); diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs index ae940067fd1b..209b2ca664eb 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs @@ -91,6 +91,22 @@ public Task CollectionExistsAsync(CancellationToken cancellationToken = de return this._internalCollection.ContainsKey(this._collectionName) ? Task.FromResult(true) : Task.FromResult(false); } + /// + public Task CreateCollectionAsync(CancellationToken cancellationToken = default) + { + this._internalCollection.TryAdd(this._collectionName, new ConcurrentDictionary()); + return Task.CompletedTask; + } + + /// + public async Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default) + { + if (!await this.CollectionExistsAsync(cancellationToken).ConfigureAwait(false)) + { + await this.CreateCollectionAsync(cancellationToken).ConfigureAwait(false); + } + } + /// public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) { diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs index 47fa2e5355df..6486d64c7b59 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs @@ -48,6 +48,19 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN Assert.Equal(expectedExists, actual); } + [Fact] + public async Task CanCreateCollectionAsync() + { + // Arrange + var sut = this.CreateRecordCollection(false); + + // Act + await sut.CreateCollectionAsync(this._testCancellationToken); + + // Assert + Assert.True(this._collectionStore.ContainsKey(TestCollectionName)); + } + [Fact] public async Task DeleteCollectionRemovesCollectionFromDictionaryAsync() { From b0b6ece48b76f2b2acab8dd6ea3a1e8768e121d7 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 17 Jul 2024 18:28:34 +0100 Subject: [PATCH 22/48] .Net: Add IVectorStore interface and Volatile implementation (#7332) ### Motivation and Context As part of the memory connector redesign we have fixed on a design where we have a VectorStore that produces VectorStoreRecordCollection instances. These are tied to a collection and will expose single collection operations. ### Description This PR adds: - The new IVectorStore interface that can serve collection instances and list collection names. - An in memory (Volatile) implementation of the new interface. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../Data/IVectorStore.cs | 40 ++++++++++ .../VectorStoreRecordDataAttribute.cs | 6 +- .../Data/VolatileVectorStore.cs | 56 ++++++++++++++ .../Data/VolatileVectorStoreTests.cs | 73 +++++++++++++++++++ 4 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs create mode 100644 dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs create mode 100644 dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreTests.cs diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs new file mode 100644 index 000000000000..9146c20d2c4a --- /dev/null +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Threading; + +namespace Microsoft.SemanticKernel.Data; + +/// +/// Interface for accessing the list of collections in a vector store. +/// +/// +/// This interface can be used with collections of any schema type, but requires you to provide schema information when getting a collection. +/// +public interface IVectorStore +{ + /// + /// Get a collection from the vector store. + /// + /// The data type of the record key. + /// The record data model to use for adding, updating and retrieving data from the collection. + /// The name of the collection. + /// Defines the schema of the record type. + /// A new instance for managing the records in the collection. + /// + /// To successfully request a collection, either must be annotated with attributes that define the schema of + /// the record type, or must be provided. + /// + /// + /// + /// + IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) + where TRecord : class; + + /// + /// Retrieve the names of all the collections in the vector store. + /// + /// The to monitor for cancellation requests. The default is . + /// The list of names of all the collections in the vector store. + IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default); +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs index 61a74443b6a2..eb1ead024a2a 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs @@ -6,8 +6,12 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Attribute to mark a property on a record class as data. +/// Attribute to mark a property on a record class as 'data'. /// +/// +/// Marking a property as 'data' means that the property is not a key, and not a vector, but optionally +/// this property may have an associated vector field containing an embedding for this data. +/// [Experimental("SKEXP0001")] [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] public sealed class VectorStoreRecordDataAttribute : Attribute diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs new file mode 100644 index 000000000000..cb3ef8ec6e0c --- /dev/null +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.Linq; +using System.Threading; + +namespace Microsoft.SemanticKernel.Data; + +/// +/// Service for storing and retrieving vector records, and managing vector record collections, that uses an in memory dictionary as the underlying storage. +/// +[Experimental("SKEXP0001")] +public sealed class VolatileVectorStore : IVectorStore +{ + /// Internal storage for the record collection. + private readonly ConcurrentDictionary> _internalCollection; + + /// + /// Initializes a new instance of the class. + /// + public VolatileVectorStore() + { + this._internalCollection = new(); + } + + /// + /// Initializes a new instance of the class. + /// + /// Allows passing in the dictionary used for storage, for testing purposes. + internal VolatileVectorStore(ConcurrentDictionary> internalCollection) + { + this._internalCollection = internalCollection; + } + + /// + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + { + if (typeof(TKey) != typeof(string)) + { + throw new NotSupportedException("Only string keys are supported."); + } + + var typedInternalCollection = this._internalCollection as ConcurrentDictionary>; + var collection = new VolatileVectorStoreRecordCollection(typedInternalCollection!, name, new() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; + return collection!; + } + + /// + public IAsyncEnumerable ListCollectionNamesAsync(CancellationToken cancellationToken = default) + { + return this._internalCollection.Keys.ToAsyncEnumerable(); + } +} diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreTests.cs new file mode 100644 index 000000000000..6174f24bec6c --- /dev/null +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreTests.cs @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Concurrent; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Data; +using Xunit; + +namespace SemanticKernel.UnitTests.Data; + +/// +/// Contains tests for the class. +/// +public class VolatileVectorStoreTests +{ + private const string TestCollectionName = "testcollection"; + + [Fact] + public void GetCollectionReturnsCollection() + { + // Arrange. + var sut = new VolatileVectorStore(); + + // Act. + var actual = sut.GetCollection>(TestCollectionName); + + // Assert. + Assert.NotNull(actual); + Assert.IsType>>(actual); + } + + [Fact] + public void GetCollectionThrowsForInvalidKeyType() + { + // Arrange. + var sut = new VolatileVectorStore(); + + // Act & Assert. + Assert.Throws(() => sut.GetCollection>(TestCollectionName)); + } + + [Fact] + public async Task ListCollectionNamesReadsDictionaryAsync() + { + // Arrange. + var collectionStore = new ConcurrentDictionary>(); + collectionStore.TryAdd("collection1", new ConcurrentDictionary()); + collectionStore.TryAdd("collection2", new ConcurrentDictionary()); + var sut = new VolatileVectorStore(collectionStore); + + // Act. + var collectionNames = sut.ListCollectionNamesAsync(); + + // Assert. + var collectionNamesList = await collectionNames.ToListAsync(); + Assert.Equal(new[] { "collection1", "collection2" }, collectionNamesList); + } + + public sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public required TKey Key { get; set; } + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector(4)] + public ReadOnlyMemory? Vector { get; set; } + + public string? NotAnnotated { get; set; } + } +} From ba8edc84fe37bc564eaa8ecf8dfc05405736b2f1 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 18 Jul 2024 09:55:15 +0100 Subject: [PATCH 23/48] .Net: Adding VectorStore for Azure AI Search. (#7335) ### Motivation and Context As part of the memory connector redesign we have fixed on a design where we have a VectorStore that produces VectorStoreRecordCollection instances. These are tied to a collection and will expose single collection operations. ### Description This pr adds - A IVectorStore implementation for AzureAISearch ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../AzureAISearchVectorStoreTests.cs | 121 ++++++++++++++++++ .../AzureAISearchVectorStore.cs | 108 ++++++++++++++++ .../AzureAISearchVectorStoreOptions.cs | 14 ++ ...earchVectorStoreRecordCollectionFactory.cs | 23 ++++ ...ISearchVectorStoreRecordCollectionTests.cs | 4 +- .../AzureAISearchVectorStoreTests.cs | 45 +++++++ 6 files changed, 313 insertions(+), 2 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreTests.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStore.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/IAzureAISearchVectorStoreRecordCollectionFactory.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreTests.cs diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreTests.cs new file mode 100644 index 000000000000..889b486da2ad --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreTests.cs @@ -0,0 +1,121 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Azure; +using Azure.Search.Documents; +using Azure.Search.Documents.Indexes; +using Microsoft.SemanticKernel.Connectors.AzureAISearch; +using Microsoft.SemanticKernel.Data; +using Moq; +using Xunit; + +namespace SemanticKernel.Connectors.AzureAISearch.UnitTests; + +/// +/// Contains tests for the class. +/// +public class AzureAISearchVectorStoreTests +{ + private const string TestCollectionName = "testcollection"; + + private readonly Mock _searchIndexClientMock; + private readonly Mock _searchClientMock; + + private readonly CancellationToken _testCancellationToken = new(false); + + public AzureAISearchVectorStoreTests() + { + this._searchClientMock = new Mock(MockBehavior.Strict); + this._searchIndexClientMock = new Mock(MockBehavior.Strict); + this._searchIndexClientMock.Setup(x => x.GetSearchClient(TestCollectionName)).Returns(this._searchClientMock.Object); + } + + [Fact] + public void GetCollectionReturnsCollection() + { + // Arrange. + var sut = new AzureAISearchVectorStore(this._searchIndexClientMock.Object); + + // Act. + var actual = sut.GetCollection(TestCollectionName); + + // Assert. + Assert.NotNull(actual); + Assert.IsType>(actual); + } + + [Fact] + public void GetCollectionCallsFactoryIfProvided() + { + // Arrange. + var factoryMock = new Mock(MockBehavior.Strict); + var collectionMock = new Mock>(MockBehavior.Strict); + factoryMock + .Setup(x => x.CreateVectorStoreRecordCollection(this._searchIndexClientMock.Object, TestCollectionName, null)) + .Returns(collectionMock.Object); + var sut = new AzureAISearchVectorStore(this._searchIndexClientMock.Object, new() { VectorStoreCollectionFactory = factoryMock.Object }); + + // Act. + var actual = sut.GetCollection(TestCollectionName); + + // Assert. + Assert.Equal(collectionMock.Object, actual); + } + + [Fact] + public void GetCollectionThrowsForInvalidKeyType() + { + // Arrange. + var sut = new AzureAISearchVectorStore(this._searchIndexClientMock.Object); + + // Act & Assert. + Assert.Throws(() => sut.GetCollection(TestCollectionName)); + } + + [Fact] + public async Task ListCollectionNamesCallsSDKAsync() + { + // Arrange async enumerator mock. + var iterationCounter = 0; + var asyncEnumeratorMock = new Mock>(MockBehavior.Strict); + asyncEnumeratorMock.Setup(x => x.MoveNextAsync()).Returns(() => ValueTask.FromResult(iterationCounter++ <= 4)); + asyncEnumeratorMock.Setup(x => x.Current).Returns(() => $"testcollection{iterationCounter}"); + + // Arrange pageable mock. + var pageableMock = new Mock>(MockBehavior.Strict); + pageableMock.Setup(x => x.GetAsyncEnumerator(this._testCancellationToken)).Returns(asyncEnumeratorMock.Object); + + // Arrange search index client mock and sut. + this._searchIndexClientMock + .Setup(x => x.GetIndexNamesAsync(this._testCancellationToken)) + .Returns(pageableMock.Object); + var sut = new AzureAISearchVectorStore(this._searchIndexClientMock.Object); + + // Act. + var actual = sut.ListCollectionNamesAsync(this._testCancellationToken); + + // Assert. + Assert.NotNull(actual); + var actualList = await actual.ToListAsync(); + Assert.Equal(5, actualList.Count); + Assert.All(actualList, (value, index) => Assert.Equal($"testcollection{index + 1}", value)); + } + + public sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector(4)] + public ReadOnlyMemory? Vector { get; set; } + + public string? NotAnnotated { get; set; } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStore.cs new file mode 100644 index 000000000000..aa5cc82d4360 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStore.cs @@ -0,0 +1,108 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using Azure; +using Azure.Search.Documents.Indexes; +using Microsoft.SemanticKernel.Data; + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// Class for accessing the list of collections in a Azure AI Search vector store. +/// +/// +/// This class can be used with collections of any schema type, but requires you to provide schema information when getting a collection. +/// +public sealed class AzureAISearchVectorStore : IVectorStore +{ + /// The name of this database for telemetry purposes. + private const string DatabaseName = "AzureAISearch"; + + /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. + private readonly SearchIndexClient _searchIndexClient; + + /// Optional configuration options for this class. + private readonly AzureAISearchVectorStoreOptions _options; + + /// + /// Initializes a new instance of the class. + /// + /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. + /// Optional configuration options for this class. + public AzureAISearchVectorStore(SearchIndexClient searchIndexClient, AzureAISearchVectorStoreOptions? options = default) + { + Verify.NotNull(searchIndexClient); + + this._searchIndexClient = searchIndexClient; + this._options = options ?? new AzureAISearchVectorStoreOptions(); + } + + /// + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + { + if (typeof(TKey) != typeof(string)) + { + throw new NotSupportedException("Only string keys are supported."); + } + + if (this._options.VectorStoreCollectionFactory is not null) + { + return this._options.VectorStoreCollectionFactory.CreateVectorStoreRecordCollection(this._searchIndexClient, name, vectorStoreRecordDefinition); + } + + var directlyCreatedStore = new AzureAISearchVectorStoreRecordCollection(this._searchIndexClient, name, new AzureAISearchVectorStoreRecordCollectionOptions() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; + return directlyCreatedStore!; + } + + /// + public async IAsyncEnumerable ListCollectionNamesAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var indexNamesEnumerable = this._searchIndexClient.GetIndexNamesAsync(cancellationToken).ConfigureAwait(false); + var indexNamesEnumerator = indexNamesEnumerable.GetAsyncEnumerator(); + + var nextResult = await GetNextIndexNameAsync(indexNamesEnumerator).ConfigureAwait(false); + while (nextResult.more) + { + yield return nextResult.name; + nextResult = await GetNextIndexNameAsync(indexNamesEnumerator).ConfigureAwait(false); + } + } + + /// + /// Helper method to get the next index name from the enumerator with a try catch around the move next call to convert + /// any to , since try catch is not supported + /// around a yield return. + /// + /// The enumerator to get the next result from. + /// A value indicating whether there are more results and the current string if true. + private static async Task<(string name, bool more)> GetNextIndexNameAsync(ConfiguredCancelableAsyncEnumerable.Enumerator enumerator) + { + const string OperationName = "GetIndexNames"; + + try + { + var more = await enumerator.MoveNextAsync(); + return (enumerator.Current, more); + } + catch (AggregateException ex) when (ex.InnerException is RequestFailedException innerEx) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + OperationName = OperationName + }; + } + catch (RequestFailedException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + OperationName = OperationName + }; + } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreOptions.cs new file mode 100644 index 000000000000..e8d54c8b7740 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreOptions.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// Options when creating a . +/// +public sealed class AzureAISearchVectorStoreOptions +{ + /// + /// An optional factory to use for constructing instances, if custom options are required. + /// + public IAzureAISearchVectorStoreRecordCollectionFactory? VectorStoreCollectionFactory { get; init; } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/IAzureAISearchVectorStoreRecordCollectionFactory.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/IAzureAISearchVectorStoreRecordCollectionFactory.cs new file mode 100644 index 000000000000..ae83ec11b9fc --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/IAzureAISearchVectorStoreRecordCollectionFactory.cs @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Azure.Search.Documents.Indexes; +using Microsoft.SemanticKernel.Data; + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// Interface for constructing Azure AI Search instances when using to retrieve these. +/// +public interface IAzureAISearchVectorStoreRecordCollectionFactory +{ + /// + /// Constructs a new instance of the . + /// + /// The data type of the record key. + /// The data model to use for adding, updating and retrieving data from storage. + /// Azure AI Search client that can be used to manage the list of indices in an Azure AI Search Service. + /// The name of the collection to connect to. + /// An optional record definition that defines the schema of the record type. If not present, attributes on will be used. + /// The new instance of . + IVectorStoreRecordCollection CreateVectorStoreRecordCollection(SearchIndexClient searchIndexClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class; +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs index c18aef3c4653..30eb3545e7e0 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -16,10 +16,10 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; /// /// Integration tests for class. -/// Tests work with Azure AI Search Instance. +/// Tests work with an Azure AI Search Instance. /// [Collection("AzureAISearchVectorStoreCollection")] -public sealed class AzureAISearchVectorStoreRecordCollectionTests(ITestOutputHelper output, AzureAISearchVectorStoreFixture fixture) : IClassFixture +public sealed class AzureAISearchVectorStoreRecordCollectionTests(ITestOutputHelper output, AzureAISearchVectorStoreFixture fixture) { // If null, all tests will be enabled private const string SkipReason = null; //"Requires Azure AI Search Service instance up and running"; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreTests.cs new file mode 100644 index 000000000000..1b198da2b2a0 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreTests.cs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.AzureAISearch; +using Xunit; +using Xunit.Abstractions; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; + +/// +/// Contains integration tests for the class. +/// Tests work with an Azure AI Search Instance. +/// +[Collection("AzureAISearchVectorStoreCollection")] +public class AzureAISearchVectorStoreTests(ITestOutputHelper output, AzureAISearchVectorStoreFixture fixture) +{ + // If null, all tests will be enabled + private const string SkipReason = null; //"Requires Azure AI Search Service instance up and running"; + + [Fact(Skip = SkipReason)] + public async Task ItCanGetAListOfExistingCollectionNamesAsync() + { + // Arrange + var additionalCollectionName = fixture.TestIndexName + "-listnames"; + await AzureAISearchVectorStoreFixture.DeleteIndexIfExistsAsync(additionalCollectionName, fixture.SearchIndexClient); + await AzureAISearchVectorStoreFixture.CreateIndexAsync(additionalCollectionName, fixture.SearchIndexClient); + var sut = new AzureAISearchVectorStore(fixture.SearchIndexClient); + + // Act + var collectionNames = await sut.ListCollectionNamesAsync().ToListAsync(); + + // Assert + Assert.Equal(2, collectionNames.Where(x => x.StartsWith(fixture.TestIndexName, StringComparison.InvariantCultureIgnoreCase)).Count()); + Assert.Contains(fixture.TestIndexName, collectionNames); + Assert.Contains(additionalCollectionName, collectionNames); + + // Output + output.WriteLine(string.Join(",", collectionNames)); + + // Cleanup + await AzureAISearchVectorStoreFixture.DeleteIndexIfExistsAsync(additionalCollectionName, fixture.SearchIndexClient); + } +} From 6969ec0b8d01c1f1487dcadb304899aa8506d6d5 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 18 Jul 2024 16:07:35 +0100 Subject: [PATCH 24/48] .Net: Adding VectorStore for Redis. (#7345) ### Motivation and Context As part of the memory connector redesign we have fixed on a design where we have a VectorStore that produces VectorStoreRecordCollection instances. These are tied to a collection and will expose single collection operations. ### Description This pr adds: - An IVectorStore implementation for Redis ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...RedisVectorStoreRecordCollectionFactory.cs | 23 ++++ .../RedisVectorStore.cs | 88 ++++++++++++++ .../RedisVectorStoreOptions.cs | 14 +++ .../RedisVectorStoreRecordCollection.cs | 4 +- .../RedisVectorStoreTests.cs | 110 ++++++++++++++++++ .../Memory/Redis/RedisVectorStoreTests.cs | 35 ++++++ .../Data/IVectorStoreRecordCollection.cs | 2 +- 7 files changed, 273 insertions(+), 3 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/IRedisVectorStoreRecordCollectionFactory.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreTests.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/IRedisVectorStoreRecordCollectionFactory.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/IRedisVectorStoreRecordCollectionFactory.cs new file mode 100644 index 000000000000..6ad90b5e61f1 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/IRedisVectorStoreRecordCollectionFactory.cs @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Data; +using StackExchange.Redis; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Interface for constructing Redis instances when using to retrieve these. +/// +public interface IRedisVectorStoreRecordCollectionFactory +{ + /// + /// Constructs a new instance of the . + /// + /// The data type of the record key. + /// The data model to use for adding, updating and retrieving data from storage. + /// The Redis database to read/write records from. + /// The name of the collection to connect to. + /// An optional record definition that defines the schema of the record type. If not present, attributes on will be used. + /// The new instance of . + IVectorStoreRecordCollection CreateVectorStoreRecordCollection(IDatabase database, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs new file mode 100644 index 000000000000..498e4be9bb94 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs @@ -0,0 +1,88 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Threading; +using Microsoft.SemanticKernel.Data; +using NRedisStack.RedisStackCommands; +using StackExchange.Redis; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Class for accessing the list of collections in a Redis vector store. +/// +/// +/// This class can be used with collections of any schema type, but requires you to provide schema information when getting a collection. +/// +public sealed class RedisVectorStore : IVectorStore +{ + /// The name of this database for telemetry purposes. + private const string DatabaseName = "Redis"; + + /// The redis database to read/write indices from. + private readonly IDatabase _database; + + /// Optional configuration options for this class. + private readonly RedisVectorStoreOptions _options; + + /// + /// Initializes a new instance of the class. + /// + /// The redis database to read/write indices from. + /// Optional configuration options for this class. + public RedisVectorStore(IDatabase database, RedisVectorStoreOptions? options = default) + { + Verify.NotNull(database); + + this._database = database; + this._options = options ?? new RedisVectorStoreOptions(); + } + + /// + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + { + if (typeof(TKey) != typeof(string)) + { + throw new NotSupportedException("Only string keys are supported."); + } + + if (this._options.VectorStoreCollectionFactory is not null) + { + return this._options.VectorStoreCollectionFactory.CreateVectorStoreRecordCollection(this._database, name, vectorStoreRecordDefinition); + } + + var directlyCreatedStore = new RedisVectorStoreRecordCollection(this._database, name, new RedisVectorStoreRecordCollectionOptions() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; + return directlyCreatedStore!; + } + + /// + public async IAsyncEnumerable ListCollectionNamesAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) + { + const string OperationName = ""; + RedisResult[] listResult; + + try + { + listResult = await this._database.FT()._ListAsync().ConfigureAwait(false); + } + catch (RedisException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + OperationName = OperationName + }; + } + + foreach (var item in listResult) + { + var name = item.ToString(); + if (name != null) + { + yield return name; + } + } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs new file mode 100644 index 000000000000..b36747bb29e4 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Options when creating a . +/// +public sealed class RedisVectorStoreOptions +{ + /// + /// An optional factory to use for constructing instances, if custom options are required. + /// + public IRedisVectorStoreRecordCollectionFactory? VectorStoreCollectionFactory { get; init; } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs index 61637a6a1e6f..e526cfbb67fb 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs @@ -406,7 +406,7 @@ private async Task RunOperationAsync(string operationName, Func operation) { await operation.Invoke().ConfigureAwait(false); } - catch (RedisConnectionException ex) + catch (RedisException ex) { throw new VectorStoreOperationException("Call to vector store failed.", ex) { @@ -430,7 +430,7 @@ private async Task RunOperationAsync(string operationName, Func> o { return await operation.Invoke().ConfigureAwait(false); } - catch (RedisConnectionException ex) + catch (RedisException ex) { throw new VectorStoreOperationException("Call to vector store failed.", ex) { diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs new file mode 100644 index 000000000000..91ecd526dcfd --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs @@ -0,0 +1,110 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Data; +using Moq; +using StackExchange.Redis; +using Xunit; + +namespace Microsoft.SemanticKernel.Connectors.Redis.UnitTests; + +/// +/// Contains tests for the class. +/// +public class RedisVectorStoreTests +{ + private const string TestCollectionName = "testcollection"; + + private readonly Mock _redisDatabaseMock; + + public RedisVectorStoreTests() + { + this._redisDatabaseMock = new Mock(MockBehavior.Strict); + + var batchMock = new Mock(); + this._redisDatabaseMock.Setup(x => x.CreateBatch(It.IsAny())).Returns(batchMock.Object); + } + + [Fact] + public void GetCollectionReturnsCollection() + { + // Arrange. + var sut = new RedisVectorStore(this._redisDatabaseMock.Object); + + // Act. + var actual = sut.GetCollection>(TestCollectionName); + + // Assert. + Assert.NotNull(actual); + Assert.IsType>>(actual); + } + + [Fact] + public void GetCollectionCallsFactoryIfProvided() + { + // Arrange. + var factoryMock = new Mock(MockBehavior.Strict); + var collectionMock = new Mock>>(MockBehavior.Strict); + factoryMock + .Setup(x => x.CreateVectorStoreRecordCollection>(It.IsAny(), TestCollectionName, null)) + .Returns(collectionMock.Object); + var sut = new RedisVectorStore(this._redisDatabaseMock.Object, new() { VectorStoreCollectionFactory = factoryMock.Object }); + + // Act. + var actual = sut.GetCollection>(TestCollectionName); + + // Assert. + Assert.Equal(collectionMock.Object, actual); + factoryMock.Verify(x => x.CreateVectorStoreRecordCollection>(It.IsAny(), TestCollectionName, null), Times.Once); + } + + [Fact] + public void GetCollectionThrowsForInvalidKeyType() + { + // Arrange. + var sut = new RedisVectorStore(this._redisDatabaseMock.Object); + + // Act & Assert. + Assert.Throws(() => sut.GetCollection>(TestCollectionName)); + } + + [Fact] + public async Task ListCollectionNamesCallsSDKAsync() + { + // Arrange. + var redisResultStrings = new string[] { "collection1", "collection2" }; + var results = redisResultStrings + .Select(x => RedisResult.Create(new RedisValue(x))) + .ToArray(); + this._redisDatabaseMock + .Setup( + x => x.ExecuteAsync( + It.IsAny(), + It.IsAny())) + .ReturnsAsync(RedisResult.Create(results)); + var sut = new RedisVectorStore(this._redisDatabaseMock.Object); + + // Act. + var collectionNames = sut.ListCollectionNamesAsync(); + + // Assert. + var collectionNamesList = await collectionNames.ToListAsync(); + Assert.Equal(new[] { "collection1", "collection2" }, collectionNamesList); + } + + public sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public required TKey Key { get; set; } + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector(4)] + public ReadOnlyMemory? Vector { get; set; } + + public string? NotAnnotated { get; set; } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreTests.cs new file mode 100644 index 000000000000..cbc1ce74c01c --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreTests.cs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Linq; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.Redis; +using Xunit; +using Xunit.Abstractions; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Redis; + +/// +/// Contains tests for the class. +/// +/// Used to write to the test output stream. +/// The test fixture. +[Collection("RedisVectorStoreCollection")] +public class RedisVectorStoreTests(ITestOutputHelper output, RedisVectorStoreFixture fixture) +{ + [Fact] + public async Task ItCanGetAListOfExistingCollectionNamesAsync() + { + // Arrange + var sut = new RedisVectorStore(fixture.Database); + + // Act + var collectionNames = await sut.ListCollectionNamesAsync().ToListAsync(); + + // Assert + Assert.Single(collectionNames); + Assert.Contains("hotels", collectionNames); + + // Output + output.WriteLine(string.Join(",", collectionNames)); + } +} diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs index 7ca6b1896d8f..0ab29fb8ac84 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs @@ -8,7 +8,7 @@ namespace Microsoft.SemanticKernel.Data; /// -/// An interface for managing a collection of records in a vector store. +/// A schema aware interface for managing a named collection of records in a vector store and for creating or deleting the collection itself. /// /// The data type of the record key. /// The record data model to use for adding, updating and retrieving data from the store. From 8909d87f012ff1cf3e7c8c39d46ded0ee499cc6c Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 18 Jul 2024 17:04:30 +0100 Subject: [PATCH 25/48] .Net: Adding a VectorStore implementation for Qdrant (#7354) ### Motivation and Context As part of the memory connector redesign we have fixed on a design where we have a VectorStore that produces VectorStoreRecordCollection instances. These are tied to a collection and will expose single collection operations. ### Description This pr adds: - An IVectorStore implementation for Qdrant - Fix a bug in the mapping where using the json parser to create the caller object model needs json property names in order to correctly deserialize the json object. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...drantVectorStoreRecordCollectionFactory.cs | 23 ++++ .../QdrantVectorStore.cs | 94 ++++++++++++++++ .../QdrantVectorStoreOptions.cs | 20 ++++ .../QdrantVectorStoreRecordMapper.cs | 27 +++-- .../QdrantVectorStoreRecordMapperTests.cs | 2 + .../QdrantVectorStoreTests.cs | 103 ++++++++++++++++++ .../Memory/Qdrant/QdrantVectorStoreTests.cs | 32 ++++++ 7 files changed, 293 insertions(+), 8 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/IQdrantVectorStoreRecordCollectionFactory.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStore.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreTests.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreTests.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/IQdrantVectorStoreRecordCollectionFactory.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/IQdrantVectorStoreRecordCollectionFactory.cs new file mode 100644 index 000000000000..a94e472da3eb --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/IQdrantVectorStoreRecordCollectionFactory.cs @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Data; +using Qdrant.Client; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Interface for constructing Qdrant instances when using to retrieve these. +/// +public interface IQdrantVectorStoreRecordCollectionFactory +{ + /// + /// Constructs a new instance of the . + /// + /// The data type of the record key. + /// The data model to use for adding, updating and retrieving data from storage. + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + /// The name of the collection to connect to. + /// An optional record definition that defines the schema of the record type. If not present, attributes on will be used. + /// The new instance of . + IVectorStoreRecordCollection CreateVectorStoreRecordCollection(QdrantClient qdrantClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStore.cs new file mode 100644 index 000000000000..3ec3e44ee6e4 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStore.cs @@ -0,0 +1,94 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Threading; +using Grpc.Core; +using Microsoft.SemanticKernel.Data; +using Qdrant.Client; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Class for accessing the list of collections in a Qdrant vector store. +/// +/// +/// This class can be used with collections of any schema type, but requires you to provide schema information when getting a collection. +/// +public sealed class QdrantVectorStore : IVectorStore +{ + /// The name of this database for telemetry purposes. + private const string DatabaseName = "Qdrant"; + + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + private readonly MockableQdrantClient _qdrantClient; + + /// Optional configuration options for this class. + private readonly QdrantVectorStoreOptions _options; + + /// + /// Initializes a new instance of the class. + /// + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + /// Optional configuration options for this class. + public QdrantVectorStore(QdrantClient qdrantClient, QdrantVectorStoreOptions? options = default) + : this(new MockableQdrantClient(qdrantClient), options) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// Qdrant client that can be used to manage the collections and points in a Qdrant store. + /// Optional configuration options for this class. + internal QdrantVectorStore(MockableQdrantClient qdrantClient, QdrantVectorStoreOptions? options = default) + { + Verify.NotNull(qdrantClient); + + this._qdrantClient = qdrantClient; + this._options = options ?? new QdrantVectorStoreOptions(); + } + + /// + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + { + if (typeof(TKey) != typeof(ulong) && typeof(TKey) != typeof(Guid)) + { + throw new NotSupportedException("Only ulong and Guid keys are supported."); + } + + if (this._options.VectorStoreCollectionFactory is not null) + { + return this._options.VectorStoreCollectionFactory.CreateVectorStoreRecordCollection(this._qdrantClient.QdrantClient, name, vectorStoreRecordDefinition); + } + + var directlyCreatedStore = new QdrantVectorStoreRecordCollection(this._qdrantClient, name, new QdrantVectorStoreRecordCollectionOptions() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }); + var castCreatedStore = directlyCreatedStore as IVectorStoreRecordCollection; + return castCreatedStore!; + } + + /// + public async IAsyncEnumerable ListCollectionNamesAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) + { + IReadOnlyList collections; + + try + { + collections = await this._qdrantClient.ListCollectionsAsync(cancellationToken).ConfigureAwait(false); + } + catch (RpcException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + OperationName = "ListCollections" + }; + } + + foreach (var collection in collections) + { + yield return collection; + } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreOptions.cs new file mode 100644 index 000000000000..c3ead1bdee2d --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreOptions.cs @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Options when creating a . +/// +public sealed class QdrantVectorStoreOptions +{ + /// + /// Gets or sets a value indicating whether the vectors in the store are named and multiple vectors are supported, or whether there is just a single unnamed vector per qdrant point. + /// Defaults to single vector per point. + /// + public bool HasNamedVectors { get; set; } = false; + + /// + /// An optional factory to use for constructing instances, if custom options are required. + /// + public IQdrantVectorStoreRecordCollectionFactory? VectorStoreCollectionFactory { get; init; } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index fdc4d1ab60f6..b40dacbb47e7 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -52,13 +52,16 @@ internal sealed class QdrantVectorStoreRecordMapper : IVectorStoreRecor private readonly PropertyInfo _keyPropertyInfo; /// A list of property info objects that point at the data properties in the current model, and allows easy reading and writing of these properties. - private readonly List _dataPropertiesInfo = new(); + private readonly List _dataPropertiesInfo; /// A list of property info objects that point at the vector properties in the current model, and allows easy reading and writing of these properties. - private readonly List _vectorPropertiesInfo = new(); + private readonly List _vectorPropertiesInfo; /// A dictionary that maps from a property name to the configured name that should be used when storing it. - private readonly Dictionary _storagePropertyNames = new(); + private readonly Dictionary _storagePropertyNames; + + /// A dictionary that maps from a property name to the configured name that should be used when serializing it to json. + private readonly Dictionary _jsonPropertyNames = new(); /// A value indicating whether the vectors in the store are named, or whether there is just a single unnamed vector per qdrant point. private readonly bool _hasNamedVectors; @@ -82,6 +85,11 @@ public QdrantVectorStoreRecordMapper(bool hasNamedVectors, PropertyInfo keyPrope // Validate property types and store for later use. VectorStoreRecordPropertyReader.VerifyPropertyTypes(dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); VectorStoreRecordPropertyReader.VerifyPropertyTypes(vectorProperties, s_supportedVectorTypes, "Vector"); + + foreach (var property in dataProperties.Concat(vectorProperties).Concat([keyProperty])) + { + this._jsonPropertyNames[property.Name] = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, property); + } } /// @@ -157,13 +165,13 @@ public PointStruct MapFromDataToStorageModel(TRecord dataModel) public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToDataModelMapperOptions options) { // Get the key property name and value. - var keyPropertyName = this._storagePropertyNames[this._keyPropertyInfo.Name]; + var keyJsonName = this._jsonPropertyNames[this._keyPropertyInfo.Name]; var keyPropertyValue = storageModel.Id.HasNum ? storageModel.Id.Num as object : storageModel.Id.Uuid as object; // Create a json object to represent the point. var outputJsonObject = new JsonObject { - { this._keyPropertyInfo.Name, JsonValue.Create(keyPropertyValue) }, + { keyJsonName, JsonValue.Create(keyPropertyValue) }, }; // Add each vector property if embeddings are included in the point. @@ -172,17 +180,18 @@ public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToData foreach (var vectorProperty in this._vectorPropertiesInfo) { var propertyName = this._storagePropertyNames[vectorProperty.Name]; + var jsonName = this._jsonPropertyNames[vectorProperty.Name]; if (this._hasNamedVectors) { if (storageModel.Vectors.Vectors_.Vectors.TryGetValue(propertyName, out var vector)) { - outputJsonObject.Add(vectorProperty.Name, new JsonArray(vector.Data.Select(x => JsonValue.Create(x)).ToArray())); + outputJsonObject.Add(jsonName, new JsonArray(vector.Data.Select(x => JsonValue.Create(x)).ToArray())); } } else { - outputJsonObject.Add(vectorProperty.Name, new JsonArray(storageModel.Vectors.Vector.Data.Select(x => JsonValue.Create(x)).ToArray())); + outputJsonObject.Add(jsonName, new JsonArray(storageModel.Vectors.Vector.Data.Select(x => JsonValue.Create(x)).ToArray())); } } } @@ -191,9 +200,11 @@ public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToData foreach (var dataProperty in this._dataPropertiesInfo) { var propertyName = this._storagePropertyNames[dataProperty.Name]; + var jsonName = this._jsonPropertyNames[dataProperty.Name]; + if (storageModel.Payload.TryGetValue(propertyName, out var value)) { - outputJsonObject.Add(dataProperty.Name, ConvertFromGrpcFieldValueToJsonNode(value)); + outputJsonObject.Add(jsonName, ConvertFromGrpcFieldValueToJsonNode(value)); } } diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs index 6f67c43e87eb..3a3569a85dab 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Linq; using System.Reflection; +using System.Text.Json.Serialization; using Microsoft.SemanticKernel.Connectors.Qdrant; using Microsoft.SemanticKernel.Data; using Qdrant.Client.Grpc; @@ -383,6 +384,7 @@ private sealed class MultiPropsModel [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1")] public string DataString { get; set; } = string.Empty; + [JsonPropertyName("data_int_json")] [VectorStoreRecordData] public int DataInt { get; set; } = 0; diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreTests.cs new file mode 100644 index 000000000000..2a234f08442a --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreTests.cs @@ -0,0 +1,103 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Data; +using Moq; +using Qdrant.Client; +using Xunit; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant.UnitTests; + +/// +/// Contains tests for the class. +/// +public class QdrantVectorStoreTests +{ + private const string TestCollectionName = "testcollection"; + + private readonly Mock _qdrantClientMock; + + private readonly CancellationToken _testCancellationToken = new(false); + + public QdrantVectorStoreTests() + { + this._qdrantClientMock = new Mock(MockBehavior.Strict); + } + + [Fact] + public void GetCollectionReturnsCollection() + { + // Arrange. + var sut = new QdrantVectorStore(this._qdrantClientMock.Object); + + // Act. + var actual = sut.GetCollection>(TestCollectionName); + + // Assert. + Assert.NotNull(actual); + Assert.IsType>>(actual); + } + + [Fact] + public void GetCollectionCallsFactoryIfProvided() + { + // Arrange. + var factoryMock = new Mock(MockBehavior.Strict); + var collectionMock = new Mock>>(MockBehavior.Strict); + factoryMock + .Setup(x => x.CreateVectorStoreRecordCollection>(It.IsAny(), TestCollectionName, null)) + .Returns(collectionMock.Object); + var sut = new QdrantVectorStore(this._qdrantClientMock.Object, new() { VectorStoreCollectionFactory = factoryMock.Object }); + + // Act. + var actual = sut.GetCollection>(TestCollectionName); + + // Assert. + Assert.Equal(collectionMock.Object, actual); + factoryMock.Verify(x => x.CreateVectorStoreRecordCollection>(It.IsAny(), TestCollectionName, null), Times.Once); + } + + [Fact] + public void GetCollectionThrowsForInvalidKeyType() + { + // Arrange. + var sut = new QdrantVectorStore(this._qdrantClientMock.Object); + + // Act & Assert. + Assert.Throws(() => sut.GetCollection>(TestCollectionName)); + } + + [Fact] + public async Task ListCollectionNamesCallsSDKAsync() + { + // Arrange. + this._qdrantClientMock + .Setup(x => x.ListCollectionsAsync(It.IsAny())) + .ReturnsAsync(new[] { "collection1", "collection2" }); + var sut = new QdrantVectorStore(this._qdrantClientMock.Object); + + // Act. + var collectionNames = sut.ListCollectionNamesAsync(this._testCancellationToken); + + // Assert. + var collectionNamesList = await collectionNames.ToListAsync(); + Assert.Equal(new[] { "collection1", "collection2" }, collectionNamesList); + } + + public sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public required TKey Key { get; set; } + + [VectorStoreRecordData] + public string Data { get; set; } = string.Empty; + + [VectorStoreRecordVector(4)] + public ReadOnlyMemory? Vector { get; set; } + + public string? NotAnnotated { get; set; } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreTests.cs new file mode 100644 index 000000000000..0da44530f5c0 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreTests.cs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Linq; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Xunit; +using Xunit.Abstractions; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; + +[Collection("QdrantVectorStoreCollection")] +public class QdrantVectorStoreTests(ITestOutputHelper output, QdrantVectorStoreFixture fixture) +{ + [Fact] + public async Task ItCanGetAListOfExistingCollectionNamesAsync() + { + // Arrange + var sut = new QdrantVectorStore(fixture.QdrantClient); + + // Act + var collectionNames = await sut.ListCollectionNamesAsync().ToListAsync(); + + // Assert + Assert.Equal(3, collectionNames.Count); + Assert.Contains("namedVectorsHotels", collectionNames); + Assert.Contains("singleVectorHotels", collectionNames); + Assert.Contains("singleVectorGuidIdHotels", collectionNames); + + // Output + output.WriteLine(string.Join(",", collectionNames)); + } +} From 9f71a4dfd3a18f625b7cda842499de64539c943d Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Fri, 19 Jul 2024 11:13:50 +0100 Subject: [PATCH 26/48] .Net: Remove mapper type option and default redis prefixing to true. (#7356) ### Motivation and Context As part of the new VectorStore pattern, we support passing custom models to RecordCollection implementations. There was an enum to specify which one you want to use, default or custom, but we can just use the custom if provided and fall back to the built in version if not. Redis requires you to add a configurable prefix to record keys in order for the record to be indexed by an index. Whether you want prefixing to happen automatically is currently optional. Since records cannot be indexed without it, and configuring the setting is painful with the new vector store design, changing the default to true. ### Description - Removing mapper type enums - Switching redis prefix setting to true ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...ISearchVectorStoreRecordCollectionTests.cs | 2 -- .../AzureAISearchRecordMapperType.cs | 21 ------------------- ...zureAISearchVectorStoreRecordCollection.cs | 10 ++------- ...earchVectorStoreRecordCollectionOptions.cs | 7 +------ .../QdrantRecordMapperType.cs | 21 ------------------- .../QdrantVectorStoreRecordCollection.cs | 7 +------ ...drantVectorStoreRecordCollectionOptions.cs | 7 +------ .../RedisRecordMapperType.cs | 21 ------------------- .../RedisVectorStoreRecordCollection.cs | 7 +------ ...RedisVectorStoreRecordCollectionOptions.cs | 11 +++------- .../QdrantVectorStoreRecordCollectionTests.cs | 2 -- .../RedisVectorStoreRecordCollectionTests.cs | 3 +-- ...ISearchVectorStoreRecordCollectionTests.cs | 2 +- .../QdrantVectorStoreRecordCollectionTests.cs | 2 +- .../RedisVectorStoreRecordCollectionTests.cs | 1 - 15 files changed, 12 insertions(+), 112 deletions(-) delete mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs delete mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs delete mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs index ad894f829e43..585b38de81a7 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -278,7 +278,6 @@ public async Task CanGetRecordWithCustomMapperAsync() TestCollectionName, new() { - MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = mapperMock.Object }); @@ -492,7 +491,6 @@ public async Task CanUpsertRecordWithCustomMapperAsync() TestCollectionName, new() { - MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = mapperMock.Object }); diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs deleted file mode 100644 index 856d666ae864..000000000000 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchRecordMapperType.cs +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System.Text.Json.Nodes; - -namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; - -/// -/// The types of mapper supported by . -/// -public enum AzureAISearchRecordMapperType -{ - /// - /// Use the default mapper that is provided by the Azure AI Search client SDK. - /// - Default, - - /// - /// Use a custom mapper between and the data model. - /// - JsonObjectCustomMapper -} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index 4b80c814f752..679b663f87b1 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -108,12 +108,6 @@ public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexCli this._searchClient = this._searchIndexClient.GetSearchClient(collectionName); this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); - // Verify custom mapper. - if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper && this._options.JsonObjectCustomMapper is null) - { - throw new ArgumentException($"The {nameof(AzureAISearchVectorStoreRecordCollectionOptions.JsonObjectCustomMapper)} option needs to be set if a {nameof(AzureAISearchVectorStoreRecordCollectionOptions.MapperType)} of {nameof(AzureAISearchRecordMapperType.JsonObjectCustomMapper)} has been chosen.", nameof(options)); - } - // Enumerate public properties using configuration or attributes. (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; if (this._options.VectorStoreRecordDefinition is not null) @@ -323,7 +317,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco const string OperationName = "GetDocument"; // Use the user provided mapper. - if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) + if (this._options.JsonObjectCustomMapper is not null) { var jsonObject = await this.RunOperationAsync( OperationName, @@ -362,7 +356,7 @@ private Task> MapToStorageModelAndUploadDocumentA const string OperationName = "UploadDocuments"; // Use the user provided mapper. - if (this._options.MapperType == AzureAISearchRecordMapperType.JsonObjectCustomMapper) + if (this._options.JsonObjectCustomMapper is not null) { var jsonObjects = VectorStoreErrorHandler.RunModelConversion( DatabaseName, diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollectionOptions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollectionOptions.cs index a5ed542ddb88..462dcd5d6e66 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollectionOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollectionOptions.cs @@ -13,16 +13,11 @@ namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; public sealed class AzureAISearchVectorStoreRecordCollectionOptions where TRecord : class { - /// - /// Gets or sets the choice of mapper to use when converting between the data model and the Azure AI Search record. - /// - public AzureAISearchRecordMapperType MapperType { get; init; } = AzureAISearchRecordMapperType.Default; - /// /// Gets or sets an optional custom mapper to use when converting between the data model and the Azure AI Search record. /// /// - /// Set to to use this mapper."/> + /// If not set, the default mapper that is provided by the Azure AI Search client SDK will be used. /// public IVectorStoreRecordMapper? JsonObjectCustomMapper { get; init; } = null; diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs deleted file mode 100644 index 3fa9e5985353..000000000000 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantRecordMapperType.cs +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using Qdrant.Client.Grpc; - -namespace Microsoft.SemanticKernel.Connectors.Qdrant; - -/// -/// The types of mapper supported by . -/// -public enum QdrantRecordMapperType -{ - /// - /// Use the default mapper that is provided by the semantic kernel SDK that uses json as an intermediary to allows automatic mapping to a wide variety of types. - /// - Default, - - /// - /// Use a custom mapper between and the data model. - /// - QdrantPointStructCustomMapper -} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index b5d68c180786..0197346a8228 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -108,14 +108,9 @@ internal QdrantVectorStoreRecordCollection(MockableQdrantClient qdrantClient, st this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._options.VectorStoreRecordDefinition); // Assign Mapper. - if (this._options.MapperType == QdrantRecordMapperType.QdrantPointStructCustomMapper) + if (this._options.PointStructCustomMapper is not null) { // Custom Mapper. - if (this._options.PointStructCustomMapper is null) - { - throw new ArgumentException($"The {nameof(QdrantVectorStoreRecordCollectionOptions.PointStructCustomMapper)} option needs to be set if a {nameof(QdrantVectorStoreRecordCollectionOptions.MapperType)} of {nameof(QdrantRecordMapperType.QdrantPointStructCustomMapper)} has been chosen.", nameof(options)); - } - this._mapper = this._options.PointStructCustomMapper; } else diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollectionOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollectionOptions.cs index a8052e25ff7b..e6c51c97f6a6 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollectionOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollectionOptions.cs @@ -17,16 +17,11 @@ public sealed class QdrantVectorStoreRecordCollectionOptions /// public bool HasNamedVectors { get; set; } = false; - /// - /// Gets or sets the choice of mapper to use when converting between the data model and the qdrant point. - /// - public QdrantRecordMapperType MapperType { get; init; } = QdrantRecordMapperType.Default; - /// /// Gets or sets an optional custom mapper to use when converting between the data model and the qdrant point. /// /// - /// Set to to use this mapper."/> + /// If not set, a default mapper that uses json as an intermediary to allow automatic mapping to a wide variety of types will be used. /// public IVectorStoreRecordMapper? PointStructCustomMapper { get; init; } = null; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs deleted file mode 100644 index 3f0e4af02d99..000000000000 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisRecordMapperType.cs +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System.Text.Json.Nodes; - -namespace Microsoft.SemanticKernel.Connectors.Redis; - -/// -/// The types of mapper supported by . -/// -public enum RedisRecordMapperType -{ - /// - /// Use the default semantic kernel mapper that uses property attributes to determine how to map fields. - /// - Default, - - /// - /// Use a custom mapper between and the data model. - /// - JsonNodeCustomMapper -} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs index e526cfbb67fb..fdb293b429b6 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs @@ -116,13 +116,8 @@ public RedisVectorStoreRecordCollection(IDatabase database, string collectionNam .ToArray(); // Assign Mapper. - if (this._options.MapperType == RedisRecordMapperType.JsonNodeCustomMapper) + if (this._options.JsonNodeCustomMapper is not null) { - if (this._options.JsonNodeCustomMapper is null) - { - throw new ArgumentException($"The {nameof(RedisVectorStoreRecordCollectionOptions.JsonNodeCustomMapper)} option needs to be set if a {nameof(RedisVectorStoreRecordCollectionOptions.MapperType)} of {nameof(RedisRecordMapperType.JsonNodeCustomMapper)} has been chosen.", nameof(options)); - } - this._mapper = this._options.JsonNodeCustomMapper; } else diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs index 1c62d94dd6cc..3e3d647d20e9 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs @@ -14,24 +14,19 @@ public sealed class RedisVectorStoreRecordCollectionOptions { /// /// Gets or sets a value indicating whether the collection name should be prefixed to the - /// key names before reading or writing to the Redis store. Default is false. + /// key names before reading or writing to the Redis store. Default is true. /// /// /// For a record to be indexed by a specific Redis index, the key name must be prefixed with the matching prefix configured on the Redis index. /// You can either pass in keys that are already prefixed, or set this option to true to have the collection name prefixed to the key names automatically. /// - public bool PrefixCollectionNameToKeyNames { get; init; } = false; - - /// - /// Gets or sets the choice of mapper to use when converting between the data model and the Redis record. - /// - public RedisRecordMapperType MapperType { get; init; } = RedisRecordMapperType.Default; + public bool PrefixCollectionNameToKeyNames { get; init; } = true; /// /// Gets or sets an optional custom mapper to use when converting between the data model and the Redis record. /// /// - /// Set to to use this mapper."/> + /// If not set, the default built in mapper will be used, which uses record attrigutes or the provided to map the record. /// public IVectorStoreRecordMapper? JsonNodeCustomMapper { get; init; } = null; diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs index 59c5e73e2f56..9db90fb7fe82 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs @@ -251,7 +251,6 @@ public async Task CanGetRecordWithCustomMapperAsync() new() { HasNamedVectors = true, - MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = mapperMock.Object }); @@ -478,7 +477,6 @@ public async Task CanUpsertRecordWithCustomMapperAsync() new() { HasNamedVectors = false, - MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = mapperMock.Object }); diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs index 9fde8c6f300d..2ecc113c60f6 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs @@ -218,7 +218,6 @@ public async Task CanGetRecordWithCustomMapperAsync() TestCollectionName, new() { - MapperType = RedisRecordMapperType.JsonNodeCustomMapper, JsonNodeCustomMapper = mapperMock.Object }); @@ -366,7 +365,6 @@ public async Task CanUpsertRecordWithCustomMapperAsync() TestCollectionName, new() { - MapperType = RedisRecordMapperType.JsonNodeCustomMapper, JsonNodeCustomMapper = mapperMock.Object }); @@ -389,6 +387,7 @@ private RedisVectorStoreRecordCollection CreateRecordCollectio TestCollectionName, new() { + PrefixCollectionNameToKeyNames = false, VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null }); } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs index 30eb3545e7e0..84d24af2bd1f 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -283,7 +283,7 @@ public async Task ItThrowsOperationExceptionForFailedAuthenticationAsync() public async Task ItThrowsMappingExceptionForFailedMapperAsync() { // Arrange - var options = new AzureAISearchVectorStoreRecordCollectionOptions { MapperType = AzureAISearchRecordMapperType.JsonObjectCustomMapper, JsonObjectCustomMapper = new FailingMapper() }; + var options = new AzureAISearchVectorStoreRecordCollectionOptions { JsonObjectCustomMapper = new FailingMapper() }; var sut = new AzureAISearchVectorStoreRecordCollection(fixture.SearchIndexClient, fixture.TestIndexName, options); // Act & Assert diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs index fbe2c997ab80..906d9b41b957 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs @@ -329,7 +329,7 @@ public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() public async Task ItThrowsMappingExceptionForFailedMapperAsync() { // Arrange - var options = new QdrantVectorStoreRecordCollectionOptions { MapperType = QdrantRecordMapperType.QdrantPointStructCustomMapper, PointStructCustomMapper = new FailingMapper() }; + var options = new QdrantVectorStoreRecordCollectionOptions { PointStructCustomMapper = new FailingMapper() }; var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, "singleVectorHotels", options); // Act & Assert diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs index 1bd13895c1d4..b53321433daf 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs @@ -305,7 +305,6 @@ public async Task ItThrowsMappingExceptionForFailedMapperAsync() var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, - MapperType = RedisRecordMapperType.JsonNodeCustomMapper, JsonNodeCustomMapper = new FailingMapper() }; var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); From 288c57d77752c3c62018cb418f636454b0f4ab2d Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:29:02 +0100 Subject: [PATCH 27/48] .Net: Fix casting bug in VolatileVectorStore (#7385) ### Motivation and Context The VolatileVectorStore uses a ConcurrentDictionary of ConcurrentDictionary instances to store records. Each collection can contain different types of records. It's not possible to cast the entire sub dictionary to the right generic type, so switching to casing on a per record basis. ### Description Fixing bug where casting at the dictionary level was causing a null reference exception. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../Data/VolatileVectorStore.cs | 3 +-- .../VolatileVectorStoreRecordCollection.cs | 10 ++++---- ...olatileVectorStoreRecordCollectionTests.cs | 24 ++++++++++--------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs index cb3ef8ec6e0c..d8da4508c386 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs @@ -43,8 +43,7 @@ public IVectorStoreRecordCollection GetCollection( throw new NotSupportedException("Only string keys are supported."); } - var typedInternalCollection = this._internalCollection as ConcurrentDictionary>; - var collection = new VolatileVectorStoreRecordCollection(typedInternalCollection!, name, new() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; + var collection = new VolatileVectorStoreRecordCollection(this._internalCollection, name, new() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; return collection!; } diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs index 209b2ca664eb..abe55996abd9 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs @@ -22,7 +22,7 @@ public sealed class VolatileVectorStoreRecordCollection : IVectorStoreR where TRecord : class { /// Internal storage for the record collection. - private readonly ConcurrentDictionary> _internalCollection; + private readonly ConcurrentDictionary> _internalCollection; /// Optional configuration options for this class. private readonly VolatileVectorStoreRecordCollectionOptions _options; @@ -76,7 +76,7 @@ public VolatileVectorStoreRecordCollection(string collectionName, VolatileVector /// Allows passing in the dictionary used for storage, for testing purposes. /// The name of the collection that this will access. /// Optional configuration options for this class. - internal VolatileVectorStoreRecordCollection(ConcurrentDictionary> internalCollection, string collectionName, VolatileVectorStoreRecordCollectionOptions? options = default) + internal VolatileVectorStoreRecordCollection(ConcurrentDictionary> internalCollection, string collectionName, VolatileVectorStoreRecordCollectionOptions? options = default) : this(collectionName, options) { this._internalCollection = internalCollection; @@ -94,7 +94,7 @@ public Task CollectionExistsAsync(CancellationToken cancellationToken = de /// public Task CreateCollectionAsync(CancellationToken cancellationToken = default) { - this._internalCollection.TryAdd(this._collectionName, new ConcurrentDictionary()); + this._internalCollection.TryAdd(this._collectionName, new ConcurrentDictionary()); return Task.CompletedTask; } @@ -121,7 +121,7 @@ public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) if (collectionDictionary.TryGetValue(key, out var record)) { - return Task.FromResult(record); + return Task.FromResult(record as TRecord); } return Task.FromResult(null); @@ -187,7 +187,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco /// Get the collection dictionary from the internal storage, throws if it does not exist. /// /// The retrieved collection dictionary. - private ConcurrentDictionary GetCollectionDictionary() + private ConcurrentDictionary GetCollectionDictionary() { if (!this._internalCollection.TryGetValue(this._collectionName, out var collectionDictionary)) { diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs index 6486d64c7b59..35b3312afc87 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs @@ -21,7 +21,7 @@ public class VolatileVectorStoreRecordCollectionTests private readonly CancellationToken _testCancellationToken = new(false); - private readonly ConcurrentDictionary> _collectionStore; + private readonly ConcurrentDictionary> _collectionStore; public VolatileVectorStoreRecordCollectionTests() { @@ -34,7 +34,7 @@ public VolatileVectorStoreRecordCollectionTests() public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) { // Arrange - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); var sut = new VolatileVectorStoreRecordCollection( @@ -65,7 +65,7 @@ public async Task CanCreateCollectionAsync() public async Task DeleteCollectionRemovesCollectionFromDictionaryAsync() { // Arrange - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); var sut = this.CreateRecordCollection(false); @@ -84,7 +84,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) { // Arrange var record = CreateModel(TestRecordKey1, withVectors: true); - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); collection.TryAdd(TestRecordKey1, record); this._collectionStore.TryAdd(TestCollectionName, collection); @@ -116,7 +116,7 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) // Arrange var record1 = CreateModel(TestRecordKey1, withVectors: true); var record2 = CreateModel(TestRecordKey2, withVectors: true); - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); collection.TryAdd(TestRecordKey1, record1); collection.TryAdd(TestRecordKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); @@ -149,7 +149,7 @@ public async Task CanDeleteRecordAsync(bool useDefinition) // Arrange var record1 = CreateModel(TestRecordKey1, withVectors: true); var record2 = CreateModel(TestRecordKey2, withVectors: true); - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); collection.TryAdd(TestRecordKey1, record1); collection.TryAdd(TestRecordKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); @@ -174,7 +174,7 @@ public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) // Arrange var record1 = CreateModel(TestRecordKey1, withVectors: true); var record2 = CreateModel(TestRecordKey2, withVectors: true); - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); collection.TryAdd(TestRecordKey1, record1); collection.TryAdd(TestRecordKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); @@ -198,7 +198,7 @@ public async Task CanUpsertRecordAsync(bool useDefinition) { // Arrange var record1 = CreateModel(TestRecordKey1, withVectors: true); - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); var sut = this.CreateRecordCollection(useDefinition); @@ -211,7 +211,8 @@ public async Task CanUpsertRecordAsync(bool useDefinition) // Assert Assert.Equal(TestRecordKey1, upsertResult); Assert.True(collection.ContainsKey(TestRecordKey1)); - Assert.Equal("data testid1", collection[TestRecordKey1].Data); + Assert.IsType(collection[TestRecordKey1]); + Assert.Equal("data testid1", (collection[TestRecordKey1] as SinglePropsModel)!.Data); } [Theory] @@ -223,7 +224,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) var record1 = CreateModel(TestRecordKey1, withVectors: true); var record2 = CreateModel(TestRecordKey2, withVectors: true); - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); var sut = this.CreateRecordCollection(useDefinition); @@ -240,7 +241,8 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) Assert.Equal(TestRecordKey2, actual[1]); Assert.True(collection.ContainsKey(TestRecordKey1)); - Assert.Equal("data testid1", collection[TestRecordKey1].Data); + Assert.IsType(collection[TestRecordKey1]); + Assert.Equal("data testid1", (collection[TestRecordKey1] as SinglePropsModel)!.Data); } private static SinglePropsModel CreateModel(string key, bool withVectors) From 7cce74088599a7b0ffcabad4f6d8b5fa0327a8d7 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 22 Jul 2024 17:17:41 +0100 Subject: [PATCH 28/48] .Net: Add basic KernelBuilder and ServiceCollection extensions for registering VectorStores. (#7386) ### Motivation and Context As part of the memory connector redesign, we are adding new VectorStore classes. These should be easy to add to service collections both on the Kernel or outside. ### Description Adding KernelBuilder and ServiceCollection extension methods to register VectorStore instances for each of the VectorStore implementations, and adding unit tests for these as well. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...ureAISearchKernelBuilderExtensionsTests.cs | 68 +++++++++++++ ...ISearchServiceCollectionExtensionsTests.cs | 67 +++++++++++++ .../AzureAISearchKernelBuilderExtensions.cs | 58 +++++++++++ ...zureAISearchServiceCollectionExtensions.cs | 98 +++++++++++++++++++ .../QdrantKernelBuilderExtensions.cs | 28 ++++++ .../QdrantServiceCollectionExtensions.cs | 41 ++++++++ .../RedisKernelBuilderExtensions.cs | 26 +++++ .../RedisServiceCollectionExtensions.cs | 59 +++++++++++ .../QdrantKernelBuilderExtensionsTests.cs | 65 ++++++++++++ .../QdrantServiceCollectionExtensionsTests.cs | 64 ++++++++++++ .../RedisKernelBuilderExtensionsTests.cs | 45 +++++++++ .../RedisServiceCollectionExtensionsTests.cs | 44 +++++++++ .../Data/KernelBuilderExtensions.cs | 21 ++++ .../Data/ServiceCollectionExtensions.cs | 23 +++++ .../Data/KernelBuilderExtensionsTests.cs | 34 +++++++ .../Data/ServiceCollectionExtensionsTests.cs | 33 +++++++ 16 files changed, 774 insertions(+) create mode 100644 dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchKernelBuilderExtensionsTests.cs create mode 100644 dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchServiceCollectionExtensionsTests.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchKernelBuilderExtensions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs create mode 100644 dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantKernelBuilderExtensionsTests.cs create mode 100644 dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantServiceCollectionExtensionsTests.cs create mode 100644 dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisKernelBuilderExtensionsTests.cs create mode 100644 dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisServiceCollectionExtensionsTests.cs create mode 100644 dotnet/src/SemanticKernel.Core/Data/KernelBuilderExtensions.cs create mode 100644 dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs create mode 100644 dotnet/src/SemanticKernel.UnitTests/Data/KernelBuilderExtensionsTests.cs create mode 100644 dotnet/src/SemanticKernel.UnitTests/Data/ServiceCollectionExtensionsTests.cs diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchKernelBuilderExtensionsTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchKernelBuilderExtensionsTests.cs new file mode 100644 index 000000000000..740c3898ce03 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchKernelBuilderExtensionsTests.cs @@ -0,0 +1,68 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Azure; +using Azure.Core; +using Azure.Search.Documents.Indexes; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.AzureAISearch; +using Microsoft.SemanticKernel.Data; +using Moq; +using Xunit; + +namespace SemanticKernel.Connectors.AzureAISearch.UnitTests; + +/// +/// Tests for the class. +/// +public class AzureAISearchKernelBuilderExtensionsTests +{ + private readonly IKernelBuilder _kernelBuilder; + + public AzureAISearchKernelBuilderExtensionsTests() + { + this._kernelBuilder = Kernel.CreateBuilder(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Arrange. + this._kernelBuilder.Services.AddSingleton(Mock.Of()); + + // Act. + this._kernelBuilder.AddAzureAISearchVectorStore(); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithUriAndCredsRegistersClass() + { + // Act. + this._kernelBuilder.AddAzureAISearchVectorStore(new Uri("https://localhost"), new AzureKeyCredential("fakeKey")); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithUriAndTokenCredsRegistersClass() + { + // Act. + this._kernelBuilder.AddAzureAISearchVectorStore(new Uri("https://localhost"), Mock.Of()); + + // Assert. + this.AssertVectorStoreCreated(); + } + + private void AssertVectorStoreCreated() + { + var kernel = this._kernelBuilder.Build(); + var vectorStore = kernel.Services.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchServiceCollectionExtensionsTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchServiceCollectionExtensionsTests.cs new file mode 100644 index 000000000000..f2446ea7a809 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchServiceCollectionExtensionsTests.cs @@ -0,0 +1,67 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Azure; +using Azure.Core; +using Azure.Search.Documents.Indexes; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.Connectors.AzureAISearch; +using Microsoft.SemanticKernel.Data; +using Moq; +using Xunit; + +namespace SemanticKernel.Connectors.AzureAISearch.UnitTests; + +/// +/// Tests for the class. +/// +public class AzureAISearchServiceCollectionExtensionsTests +{ + private readonly IServiceCollection _serviceCollection; + + public AzureAISearchServiceCollectionExtensionsTests() + { + this._serviceCollection = new ServiceCollection(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Arrange. + this._serviceCollection.AddSingleton(Mock.Of()); + + // Act. + this._serviceCollection.AddAzureAISearchVectorStore(); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithUriAndCredsRegistersClass() + { + // Act. + this._serviceCollection.AddAzureAISearchVectorStore(new Uri("https://localhost"), new AzureKeyCredential("fakeKey")); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithUriAndTokenCredsRegistersClass() + { + // Act. + this._serviceCollection.AddAzureAISearchVectorStore(new Uri("https://localhost"), Mock.Of()); + + // Assert. + this.AssertVectorStoreCreated(); + } + + private void AssertVectorStoreCreated() + { + var serviceProvider = this._serviceCollection.BuildServiceProvider(); + var vectorStore = serviceProvider.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchKernelBuilderExtensions.cs new file mode 100644 index 000000000000..c1eb8f2a2faf --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchKernelBuilderExtensions.cs @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Azure; +using Azure.Core; +using Azure.Search.Documents.Indexes; +using Microsoft.SemanticKernel.Data; + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// Extension methods to register Azure AI Search instances on the . +/// +public static class AzureAISearchKernelBuilderExtensions +{ + /// + /// Register an Azure AI Search with the specified service ID and where is retrieved from the dependency injection container. + /// + /// The builder to register the on. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder builder, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + { + builder.Services.AddAzureAISearchVectorStore(serviceId, options); + return builder; + } + + /// + /// Register an Azure AI Search with the provided and and the specified service ID. + /// + /// The builder to register the on. + /// The service endpoint for Azure AI Search. + /// The credential to authenticate to Azure AI Search with. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder builder, Uri endpoint, TokenCredential tokenCredential, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + { + builder.Services.AddAzureAISearchVectorStore(endpoint, tokenCredential, serviceId, options); + return builder; + } + + /// + /// Register an Azure AI Search with the provided and and the specified service ID. + /// + /// The builder to register the on. + /// The service endpoint for Azure AI Search. + /// The credential to authenticate to Azure AI Search with. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder builder, Uri endpoint, AzureKeyCredential credential, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + { + builder.Services.AddAzureAISearchVectorStore(endpoint, credential, serviceId, options); + return builder; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs new file mode 100644 index 000000000000..32ad0ed2b1fa --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs @@ -0,0 +1,98 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Azure; +using Azure.Core; +using Azure.Search.Documents.Indexes; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.Data; + +namespace Microsoft.SemanticKernel.Connectors.AzureAISearch; + +/// +/// Extension methods to register Azure AI Search instances on an . +/// +public static class AzureAISearchServiceCollectionExtensions +{ + /// + /// Register an Azure AI Search with the specified service ID and where is retrieved from the dependency injection container. + /// + /// The to register the on. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + { + services.AddKeyedTransient( + serviceId, + (sp, obj) => + { + var searchIndexClient = sp.GetRequiredService(); + var selectedOptions = options ?? sp.GetService(); + + return new AzureAISearchVectorStore( + searchIndexClient, + selectedOptions); + }); + + return services; + } + + /// + /// Register an Azure AI Search with the provided and and the specified service ID. + /// + /// The to register the on. + /// The service endpoint for Azure AI Search. + /// The credential to authenticate to Azure AI Search with. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, Uri endpoint, TokenCredential tokenCredential, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + { + Verify.NotNull(endpoint); + Verify.NotNull(tokenCredential); + + services.AddKeyedTransient( + serviceId, + (sp, obj) => + { + var searchIndexClient = new SearchIndexClient(endpoint, tokenCredential); + var selectedOptions = options ?? sp.GetService(); + + return new AzureAISearchVectorStore( + searchIndexClient, + selectedOptions); + }); + + return services; + } + + /// + /// Register an Azure AI Search with the provided and and the specified service ID. + /// + /// The to register the on. + /// The service endpoint for Azure AI Search. + /// The credential to authenticate to Azure AI Search with. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, Uri endpoint, AzureKeyCredential credential, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + { + Verify.NotNull(endpoint); + Verify.NotNull(credential); + + services.AddKeyedTransient( + serviceId, + (sp, obj) => + { + var searchIndexClient = new SearchIndexClient(endpoint, credential); + var selectedOptions = options ?? sp.GetService(); + + return new AzureAISearchVectorStore( + searchIndexClient, + selectedOptions); + }); + + return services; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs new file mode 100644 index 000000000000..4d0605cceca8 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Data; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Extension methods to register Qdrant instances on the . +/// +public static class QdrantKernelBuilderExtensions +{ + /// + /// Register a Qdrant with the specified service ID. + /// + /// The builder to register the on. + /// The Qdrant service host name. + /// The Qdrant service port. + /// A value indicating whether to use HTTPS for communicating with Qdrant. + /// The Qdrant service API key. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IKernelBuilder AddQdrantVectorStore(this IKernelBuilder builder, string? host = default, int port = 6334, bool https = false, string? apiKey = default, string? serviceId = default, QdrantVectorStoreOptions? options = default) + { + builder.Services.AddQdrantVectorStore(host, port, https, apiKey, serviceId, options); + return builder; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs new file mode 100644 index 000000000000..3470274e7a02 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.Data; +using Qdrant.Client; + +namespace Microsoft.SemanticKernel.Connectors.Qdrant; + +/// +/// Extension methods to register Qdrant instances on an . +/// +public static class QdrantServiceCollectionExtensions +{ + /// + /// Register a Qdrant with the specified service ID. + /// + /// The to register the on. + /// The Qdrant service host name. + /// The Qdrant service port. + /// A value indicating whether to use HTTPS for communicating with Qdrant. + /// The Qdrant service API key. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IServiceCollection AddQdrantVectorStore(this IServiceCollection services, string? host = default, int port = 6334, bool https = false, string? apiKey = default, string? serviceId = default, QdrantVectorStoreOptions? options = default) + { + services.AddKeyedTransient( + serviceId, + (sp, obj) => + { + var qdrantClient = host == null ? sp.GetRequiredService() : new QdrantClient(host, port, https, apiKey); + var selectedOptions = options ?? sp.GetService(); + + return new QdrantVectorStore( + qdrantClient, + selectedOptions); + }); + + return services; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs new file mode 100644 index 000000000000..1bc6374d6f93 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Data; +using StackExchange.Redis; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Extension methods to register Redis instances on the . +/// +public static class RedisKernelBuilderExtensions +{ + /// + /// Register a Redis with the specified service ID. + /// + /// The builder to register the on. + /// The Redis connection configuration string. If not provided, an instance will be requested from the dependency injection container. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IKernelBuilder AddRedisVectorStore(this IKernelBuilder builder, string? redisConnectionConfiguration = default, string? serviceId = default, RedisVectorStoreOptions? options = default) + { + builder.Services.AddRedisVectorStore(redisConnectionConfiguration, serviceId, options); + return builder; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs new file mode 100644 index 000000000000..fb608884af10 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.Data; +using StackExchange.Redis; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Extension methods to register Redis instances on an . +/// +public static class RedisServiceCollectionExtensions +{ + /// + /// Register a Redis with the specified service ID. + /// + /// The to register the on. + /// The Redis connection configuration string. If not provided, an instance will be requested from the dependency injection container. + /// An optional service id to use as the service key. + /// Optional options to further configure the . + /// The kernel builder. + public static IServiceCollection AddRedisVectorStore(this IServiceCollection services, string? redisConnectionConfiguration = default, string? serviceId = default, RedisVectorStoreOptions? options = default) + { + if (redisConnectionConfiguration == null) + { + // If we are not constructing the ConnectionMultiplexer, add the IVectorStore as transient, since we + // cannot make assumptions about how IDatabase is being managed. + services.AddKeyedTransient( + serviceId, + (sp, obj) => + { + var database = sp.GetRequiredService(); + var selectedOptions = options ?? sp.GetService(); + + return new RedisVectorStore( + database, + selectedOptions); + }); + + return services; + } + + // If we are constructing the ConnectionMultiplexer, add the IVectorStore as singleton, since we are managing the lifetime + // of the ConnectionMultiplexer, and the recommendation from StackExchange.Redis is to share the ConnectionMultiplexer. + services.AddKeyedSingleton( + serviceId, + (sp, obj) => + { + var database = ConnectionMultiplexer.Connect(redisConnectionConfiguration).GetDatabase(); + var selectedOptions = options ?? sp.GetService(); + + return new RedisVectorStore( + database, + selectedOptions); + }); + + return services; + } +} diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantKernelBuilderExtensionsTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantKernelBuilderExtensionsTests.cs new file mode 100644 index 000000000000..f0b4f327c0f0 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantKernelBuilderExtensionsTests.cs @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Microsoft.SemanticKernel.Data; +using Qdrant.Client; +using Xunit; + +namespace SemanticKernel.Connectors.Qdrant.UnitTests; + +/// +/// Tests for the class. +/// +public class QdrantKernelBuilderExtensionsTests +{ + private readonly IKernelBuilder _kernelBuilder; + + public QdrantKernelBuilderExtensionsTests() + { + this._kernelBuilder = Kernel.CreateBuilder(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Arrange. + using var qdrantClient = new QdrantClient("localhost"); + this._kernelBuilder.Services.AddSingleton(qdrantClient); + + // Act. + this._kernelBuilder.AddQdrantVectorStore(); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithHostAndPortAndCredsRegistersClass() + { + // Act. + this._kernelBuilder.AddQdrantVectorStore("localhost", 8080, true, "apikey"); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithHostRegistersClass() + { + // Act. + this._kernelBuilder.AddQdrantVectorStore("localhost"); + + // Assert. + this.AssertVectorStoreCreated(); + } + + private void AssertVectorStoreCreated() + { + var kernel = this._kernelBuilder.Build(); + var vectorStore = kernel.Services.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantServiceCollectionExtensionsTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantServiceCollectionExtensionsTests.cs new file mode 100644 index 000000000000..056b8cfaf9d1 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantServiceCollectionExtensionsTests.cs @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Microsoft.SemanticKernel.Data; +using Qdrant.Client; +using Xunit; + +namespace SemanticKernel.Connectors.Qdrant.UnitTests; + +/// +/// Tests for the class. +/// +public class QdrantServiceCollectionExtensionsTests +{ + private readonly IServiceCollection _serviceCollection; + + public QdrantServiceCollectionExtensionsTests() + { + this._serviceCollection = new ServiceCollection(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Arrange. + using var qdrantClient = new QdrantClient("localhost"); + this._serviceCollection.AddSingleton(qdrantClient); + + // Act. + this._serviceCollection.AddQdrantVectorStore(); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithHostAndPortAndCredsRegistersClass() + { + // Act. + this._serviceCollection.AddQdrantVectorStore("localhost", 8080, true, "apikey"); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithHostRegistersClass() + { + // Act. + this._serviceCollection.AddQdrantVectorStore("localhost"); + + // Assert. + this.AssertVectorStoreCreated(); + } + + private void AssertVectorStoreCreated() + { + var serviceProvider = this._serviceCollection.BuildServiceProvider(); + var vectorStore = serviceProvider.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisKernelBuilderExtensionsTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisKernelBuilderExtensionsTests.cs new file mode 100644 index 000000000000..dcb8383b1525 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisKernelBuilderExtensionsTests.cs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.Redis; +using Microsoft.SemanticKernel.Data; +using Moq; +using StackExchange.Redis; +using Xunit; + +namespace SemanticKernel.Connectors.Redis.UnitTests; + +/// +/// Tests for the class. +/// +public class RedisKernelBuilderExtensionsTests +{ + private readonly IKernelBuilder _kernelBuilder; + + public RedisKernelBuilderExtensionsTests() + { + this._kernelBuilder = Kernel.CreateBuilder(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Arrange. + this._kernelBuilder.Services.AddSingleton(Mock.Of()); + + // Act. + this._kernelBuilder.AddRedisVectorStore(); + + // Assert. + this.AssertVectorStoreCreated(); + } + + private void AssertVectorStoreCreated() + { + var kernel = this._kernelBuilder.Build(); + var vectorStore = kernel.Services.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisServiceCollectionExtensionsTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisServiceCollectionExtensionsTests.cs new file mode 100644 index 000000000000..fe08b6d568b6 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisServiceCollectionExtensionsTests.cs @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.Connectors.Redis; +using Microsoft.SemanticKernel.Data; +using Moq; +using StackExchange.Redis; +using Xunit; + +namespace SemanticKernel.Connectors.Redis.UnitTests; + +/// +/// Tests for the class. +/// +public class RedisServiceCollectionExtensionsTests +{ + private readonly IServiceCollection _serviceCollection; + + public RedisServiceCollectionExtensionsTests() + { + this._serviceCollection = new ServiceCollection(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Arrange. + this._serviceCollection.AddSingleton(Mock.Of()); + + // Act. + this._serviceCollection.AddRedisVectorStore(); + + // Assert. + this.AssertVectorStoreCreated(); + } + + private void AssertVectorStoreCreated() + { + var serviceProvider = this._serviceCollection.BuildServiceProvider(); + var vectorStore = serviceProvider.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} diff --git a/dotnet/src/SemanticKernel.Core/Data/KernelBuilderExtensions.cs b/dotnet/src/SemanticKernel.Core/Data/KernelBuilderExtensions.cs new file mode 100644 index 000000000000..75897402d6ee --- /dev/null +++ b/dotnet/src/SemanticKernel.Core/Data/KernelBuilderExtensions.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Data; + +/// +/// Extension methods to register Data services on the . +/// +public static class KernelBuilderExtensions +{ + /// + /// Register a Volatile with the specified service ID. + /// + /// The builder to register the on. + /// An optional service id to use as the service key. + /// The kernel builder. + public static IKernelBuilder AddVolatileVectorStore(this IKernelBuilder builder, string? serviceId = default) + { + builder.Services.AddVolatileVectorStore(serviceId); + return builder; + } +} diff --git a/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs b/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs new file mode 100644 index 000000000000..3997a78ddd79 --- /dev/null +++ b/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; + +namespace Microsoft.SemanticKernel.Data; + +/// +/// Extension methods to register Data services on an . +/// +public static class ServiceCollectionExtensions +{ + /// + /// Register a Volatile with the specified service ID. + /// + /// The to register the on. + /// An optional service id to use as the service key. + /// The kernel builder. + public static IServiceCollection AddVolatileVectorStore(this IServiceCollection services, string? serviceId = default) + { + services.AddKeyedSingleton(serviceId); + return services; + } +} diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/KernelBuilderExtensionsTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/KernelBuilderExtensionsTests.cs new file mode 100644 index 000000000000..2f1f3923c3c4 --- /dev/null +++ b/dotnet/src/SemanticKernel.UnitTests/Data/KernelBuilderExtensionsTests.cs @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Data; +using Xunit; + +namespace SemanticKernel.UnitTests.Data; + +/// +/// Contains tests for . +/// +public class KernelBuilderExtensionsTests +{ + private readonly IKernelBuilder _kernelBuilder; + + public KernelBuilderExtensionsTests() + { + this._kernelBuilder = Kernel.CreateBuilder(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Act. + this._kernelBuilder.AddVolatileVectorStore(); + + // Assert. + var kernel = this._kernelBuilder.Build(); + var vectorStore = kernel.Services.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/ServiceCollectionExtensionsTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/ServiceCollectionExtensionsTests.cs new file mode 100644 index 000000000000..9b8e934c11ca --- /dev/null +++ b/dotnet/src/SemanticKernel.UnitTests/Data/ServiceCollectionExtensionsTests.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.Data; +using Xunit; + +namespace SemanticKernel.UnitTests.Data; + +/// +/// Contains tests for the class. +/// +public class ServiceCollectionExtensionsTests +{ + private readonly IServiceCollection _serviceCollection; + + public ServiceCollectionExtensionsTests() + { + this._serviceCollection = new ServiceCollection(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Act. + this._serviceCollection.AddVolatileVectorStore(); + + // Assert. + var serviceProvider = this._serviceCollection.BuildServiceProvider(); + var vectorStore = serviceProvider.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} From bb91cc69e907698e4391a3a45ae1372eed1ffc20 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Tue, 23 Jul 2024 18:32:59 +0100 Subject: [PATCH 29/48] .Net: Add Data ingestion sample for VectorStore (#7388) ### Motivation and Context Adding a sample that uses the new VectorStore implementations. Alternative suggestions for the best sample category for these are welcome. For now they are in the Memory folder. I have not updated the samples README.md yet, and will update, once we have a plan around where these should go. #7349 ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- dotnet/Directory.Packages.props | 1 + dotnet/samples/Concepts/Concepts.csproj | 1 + .../VectorStoreFixtures/VectorStoreInfra.cs | 108 ++++++++++ .../VectorStoreQdrantContainerFixture.cs | 31 +++ .../VectorStoreRedisContainerFixture.cs | 31 +++ .../Memory/VectorStore_DataIngestion.cs | 192 ++++++++++++++++++ 6 files changed, 364 insertions(+) create mode 100644 dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreInfra.cs create mode 100644 dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs create mode 100644 dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreRedisContainerFixture.cs create mode 100644 dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props index 152d4da183b8..c7fa1af94e1f 100644 --- a/dotnet/Directory.Packages.props +++ b/dotnet/Directory.Packages.props @@ -68,6 +68,7 @@ + diff --git a/dotnet/samples/Concepts/Concepts.csproj b/dotnet/samples/Concepts/Concepts.csproj index dd43184b6612..89cc2c897d61 100644 --- a/dotnet/samples/Concepts/Concepts.csproj +++ b/dotnet/samples/Concepts/Concepts.csproj @@ -14,6 +14,7 @@ + diff --git a/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreInfra.cs b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreInfra.cs new file mode 100644 index 000000000000..ea498f20c5ab --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreInfra.cs @@ -0,0 +1,108 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Docker.DotNet; +using Docker.DotNet.Models; + +namespace Memory.VectorStoreFixtures; + +/// +/// Helper class that creates and deletes containers for the vector store examples. +/// +internal static class VectorStoreInfra +{ + /// + /// Setup the qdrant container by pulling the image and running it. + /// + /// The docker client to create the container with. + /// The id of the container. + public static async Task SetupQdrantContainerAsync(DockerClient client) + { + await client.Images.CreateImageAsync( + new ImagesCreateParameters + { + FromImage = "qdrant/qdrant", + Tag = "latest", + }, + null, + new Progress()); + + var container = await client.Containers.CreateContainerAsync(new CreateContainerParameters() + { + Image = "qdrant/qdrant", + HostConfig = new HostConfig() + { + PortBindings = new Dictionary> + { + {"6333", new List {new() {HostPort = "6333" } }}, + {"6334", new List {new() {HostPort = "6334" } }} + }, + PublishAllPorts = true + }, + ExposedPorts = new Dictionary + { + { "6333", default }, + { "6334", default } + }, + }); + + await client.Containers.StartContainerAsync( + container.ID, + new ContainerStartParameters()); + + return container.ID; + } + + /// + /// Setup the redis container by pulling the image and running it. + /// + /// The docker client to create the container with. + /// The id of the container. + public static async Task SetupRedisContainerAsync(DockerClient client) + { + await client.Images.CreateImageAsync( + new ImagesCreateParameters + { + FromImage = "redis/redis-stack", + Tag = "latest", + }, + null, + new Progress()); + + var container = await client.Containers.CreateContainerAsync(new CreateContainerParameters() + { + Image = "redis/redis-stack", + HostConfig = new HostConfig() + { + PortBindings = new Dictionary> + { + {"6379", new List {new() {HostPort = "6379"}}}, + {"8001", new List {new() {HostPort = "8001"}}} + }, + PublishAllPorts = true + }, + ExposedPorts = new Dictionary + { + { "6379", default }, + { "8001", default } + }, + }); + + await client.Containers.StartContainerAsync( + container.ID, + new ContainerStartParameters()); + + return container.ID; + } + + /// + /// Stop and delete the container with the specified id. + /// + /// The docker client to delete the container in. + /// The id of the container to delete. + /// An async task. + public static async Task DeleteContainerAsync(DockerClient client, string containerId) + { + await client.Containers.StopContainerAsync(containerId, new ContainerStopParameters()); + await client.Containers.RemoveContainerAsync(containerId, new ContainerRemoveParameters()); + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs new file mode 100644 index 000000000000..3f69902cbcc9 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Docker.DotNet; + +namespace Memory.VectorStoreFixtures; + +/// +/// Fixture to use for creating a Qdrant container before tests and delete it after tests. +/// +public class VectorStoreQdrantContainerFixture : IAsyncLifetime +{ + private DockerClient? _dockerClient; + private string? _qdrantContainerId; + + public async Task InitializeAsync() + { + // Connect to docker and start the docker container. + using var dockerClientConfiguration = new DockerClientConfiguration(); + this._dockerClient = dockerClientConfiguration.CreateClient(); + this._qdrantContainerId = await VectorStoreInfra.SetupQdrantContainerAsync(this._dockerClient); + } + + public async Task DisposeAsync() + { + if (this._dockerClient != null && this._qdrantContainerId != null) + { + // Delete docker container. + await VectorStoreInfra.DeleteContainerAsync(this._dockerClient, this._qdrantContainerId); + } + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreRedisContainerFixture.cs b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreRedisContainerFixture.cs new file mode 100644 index 000000000000..2760e5aaabaa --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreRedisContainerFixture.cs @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Docker.DotNet; + +namespace Memory.VectorStoreFixtures; + +/// +/// Fixture to use for creating a Redis container before tests and delete it after tests. +/// +public class VectorStoreRedisContainerFixture : IAsyncLifetime +{ + private DockerClient? _dockerClient; + private string? _redisContainerId; + + public async Task InitializeAsync() + { + // Connect to docker and start the docker container. + using var dockerClientConfiguration = new DockerClientConfiguration(); + this._dockerClient = dockerClientConfiguration.CreateClient(); + this._redisContainerId = await VectorStoreInfra.SetupRedisContainerAsync(this._dockerClient); + } + + public async Task DisposeAsync() + { + if (this._dockerClient != null && this._redisContainerId != null) + { + // Delete docker container. + await VectorStoreInfra.DeleteContainerAsync(this._dockerClient, this._redisContainerId); + } + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs new file mode 100644 index 000000000000..a69c3c3ed743 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs @@ -0,0 +1,192 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json; +using Memory.VectorStoreFixtures; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Microsoft.SemanticKernel.Connectors.Redis; +using Microsoft.SemanticKernel.Data; +using Microsoft.SemanticKernel.Embeddings; + +namespace Memory; + +/// +/// An example showing how to ingest data into a vector store using , or . +/// Since Redis and Volatile supports string keys and Qdrant supports ulong or Guid keys, this example also shows how you can have common code +/// that works with both types of keys by using a generic key generator function. +/// +/// The example shows the following steps: +/// 1. Register a vector store and embedding generator with the DI container. +/// 2. Register a class (DataIngestor) with the DI container that uses the vector store and embedding generator to ingest data. +/// 3. Ingest some data into the vector store. +/// 4. Read the data back from the vector store. +/// +/// To run this sample, you need a local instance of Docker running, since the associated fixtures will try and start Redis and Qdrant containers in the local docker instance. +/// +public class VectorStore_DataIngestion(ITestOutputHelper output) : BaseTest(output), IClassFixture, IClassFixture +{ + /// + /// Main entry point for example. + /// + /// The type of database to run the example for. + [Theory] + [InlineData("Redis")] + [InlineData("Qdrant")] + [InlineData("Volatile")] + public async Task ExampleAsync(string databaseType) + { + // Use the kernel for DI purposes. + var kernelBuilder = Kernel + .CreateBuilder(); + + // Register an embedding generation service with the DI container. + kernelBuilder.AddAzureOpenAITextEmbeddingGeneration( + deploymentName: TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, + endpoint: TestConfiguration.AzureOpenAIEmbeddings.Endpoint, + apiKey: TestConfiguration.AzureOpenAIEmbeddings.ApiKey); + + // Register the chosen vector store with the DI container. + if (databaseType == "Redis") + { + kernelBuilder.AddRedisVectorStore("localhost:6379"); + } + else if (databaseType == "Qdrant") + { + kernelBuilder.AddQdrantVectorStore("localhost"); + } + else if (databaseType == "Volatile") + { + kernelBuilder.AddVolatileVectorStore(); + } + + // Register the DataIngestor with the DI container. + kernelBuilder.Services.AddTransient(); + + // Build the kernel. + var kernel = kernelBuilder.Build(); + + // Build a DataIngestor object using the DI container. + var dataIngestor = kernel.GetRequiredService(); + + // Invoke the data ingestor using an appropriate key generator function for each database type. + // Redis and Volatile supports string keys, while Qdrant supports ulong or Guid keys, so we use a different key generator for each key type. + if (databaseType == "Redis" || databaseType == "Volatile") + { + await this.UpsertDataAndReadFromVectorStoreAsync(dataIngestor, () => Guid.NewGuid().ToString()); + } + else if (databaseType == "Qdrant") + { + await this.UpsertDataAndReadFromVectorStoreAsync(dataIngestor, () => Guid.NewGuid()); + } + } + + private async Task UpsertDataAndReadFromVectorStoreAsync(DataIngestor dataIngestor, Func uniqueKeyGenerator) + { + // Ingest some data into the vector store. + var upsertedKeys = dataIngestor.ImportDataAsync(uniqueKeyGenerator).Result; + + // Get one of the upserted records. + var upsertedRecord = dataIngestor.GetGlossaryAsync(upsertedKeys.First()).Result; + + // Write upserted keys and one of the upserted records to the console. + Console.WriteLine($"Upserted keys: {string.Join(", ", upsertedKeys)}"); + Console.WriteLine($"Upserted record: {JsonSerializer.Serialize(upsertedRecord)}"); + } + + /// + /// Sample class that does ingestion of sample data into a vector store and allows retrieval of data from the vector store. + /// + /// The vector store to ingest data into. + /// Used to generate embeddings for the data being ingested. + private sealed class DataIngestor(IVectorStore vectorStore, ITextEmbeddingGenerationService textEmbeddingGenerationService) + { + /// + /// Create some glossary entries and upsert them into the vector store. + /// + /// The keys of the upserted glossary entries. + /// The type of the keys in the vector store. + public async Task> ImportDataAsync(Func uniqueKeyGenerator) + { + // Get and create collection if it doesn't exist. + var collection = vectorStore.GetCollection>("skglossary"); + await collection.CreateCollectionIfNotExistsAsync(); + + // Create glossary entries and generate embeddings for them. + var glossaryEntries = CreateGlossaryEntries(uniqueKeyGenerator).ToList(); + await Parallel.ForEachAsync(glossaryEntries, async (entry, cancellationToken) => + { + entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition, cancellationToken: cancellationToken); + }); + + // Upsert the glossary entries into the collection and return their keys. + var upsertedKeys = glossaryEntries.Select(x => collection.UpsertAsync(x)); + return await Task.WhenAll(upsertedKeys); + } + + /// + /// Get a glossary entry from the vector store. + /// + /// The key of the glossary entry to retrieve. + /// The glossary entry. + /// The type of the keys in the vector store. + public Task?> GetGlossaryAsync(TKey key) + { + var collection = vectorStore.GetCollection>("skglossary"); + return collection.GetAsync(key, new() { IncludeVectors = true }); + } + } + + /// + /// Create some sample glossary entries. + /// + /// The type of the model key. + /// A function that can be used to generate unique keys for the model in the type that the model requires. + /// A list of sample glossary entries. + private static IEnumerable> CreateGlossaryEntries(Func uniqueKeyGenerator) + { + yield return new Glossary + { + Key = uniqueKeyGenerator(), + Term = "API", + Definition = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data." + }; + + yield return new Glossary + { + Key = uniqueKeyGenerator(), + Term = "Connectors", + Definition = "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc." + }; + + yield return new Glossary + { + Key = uniqueKeyGenerator(), + Term = "RAG", + Definition = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt)." + }; + } + + /// + /// Sample model class that represents a glossary entry. + /// + /// + /// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store. + /// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration. + /// + /// The type of the model key. + private sealed class Glossary + { + [VectorStoreRecordKey] + public TKey Key { get; set; } + + [VectorStoreRecordData] + public string Term { get; set; } + + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = nameof(DefinitionEmbedding))] + public string Definition { get; set; } + + [VectorStoreRecordVector(1536)] + public ReadOnlyMemory DefinitionEmbedding { get; set; } + } +} From a2dbb2f1cc93b8ab96d9bffe0c65c48e6e6a3db3 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 24 Jul 2024 08:41:15 +0100 Subject: [PATCH 30/48] .Net: Add missing experimental attributes and fix typos. (#7411) ### Motivation and Context Adding experimental attributes to artifacts that were missed before and fixing a few typos. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../AzureAISearchVectorStoreCollectionCreateMapping.cs | 2 +- .../QdrantVectorStoreCollectionCreateMapping.cs | 2 +- .../RedisVectorStoreCollectionCreateMapping.cs | 2 +- dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs | 2 ++ .../Data/RecordDefinition/DistanceFunction.cs | 3 +++ .../Data/RecordDefinition/IndexKind.cs | 3 +++ .../Data/RecordOptions/GetRecordOptions.cs | 2 +- dotnet/src/SemanticKernel.Core/Data/KernelBuilderExtensions.cs | 3 +++ .../SemanticKernel.Core/Data/ServiceCollectionExtensions.cs | 2 ++ 9 files changed, 17 insertions(+), 4 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs index 2d8cab78ccd1..3df9084a133b 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs @@ -55,7 +55,7 @@ public static (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfigu { if (vectorProperty.Dimensions is not > 0) { - throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive ingeteger to create a collection."); + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive integer to create a collection."); } // Build a name for the profile and algorithm configuration based on the property name diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs index 4984cae771fb..348dc23ae3b1 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs @@ -57,7 +57,7 @@ public static VectorParams MapSingleVector(VectorStoreRecordVectorProperty vecto { if (vectorProperty!.Dimensions is not > 0) { - throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive ingeteger to create a collection."); + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive integer to create a collection."); } if (vectorProperty!.IndexKind is not null && vectorProperty!.IndexKind != IndexKind.Hnsw) diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs index f7a4e362eafa..8cfcc589d89a 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs @@ -87,7 +87,7 @@ public static Schema MapToSchema(IEnumerable properti { if (vectorProperty.Dimensions is not > 0) { - throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive ingeteger to create a collection."); + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive integer to create a collection."); } var indexKind = GetSDKIndexKind(vectorProperty); diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs index 9146c20d2c4a..bf09077b5b7a 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; using System.Threading; namespace Microsoft.SemanticKernel.Data; @@ -11,6 +12,7 @@ namespace Microsoft.SemanticKernel.Data; /// /// This interface can be used with collections of any schema type, but requires you to provide schema information when getting a collection. /// +[Experimental("SKEXP0001")] public interface IVectorStore { /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs index 1d87f9d2a7f2..52d181337da3 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs @@ -1,10 +1,13 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Diagnostics.CodeAnalysis; + namespace Microsoft.SemanticKernel.Data; /// /// Defines the distance functions that can be used to compare vectors. /// +[Experimental("SKEXP0001")] public static class DistanceFunction { /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs index 02451513b9ea..0a59454c42b7 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs @@ -1,10 +1,13 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Diagnostics.CodeAnalysis; + namespace Microsoft.SemanticKernel.Data; /// /// Defines the index types that can be used to index vectors. /// +[Experimental("SKEXP0001")] public static class IndexKind { /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs index fa86c2b6d5db..18e59ec7b9d9 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs @@ -27,7 +27,7 @@ public GetRecordOptions(GetRecordOptions source) } /// - /// Get or sets a value indicating whether to include vectors in the retrieval result. + /// Gets or sets a value indicating whether to include vectors in the retrieval result. /// public bool IncludeVectors { get; init; } = false; } diff --git a/dotnet/src/SemanticKernel.Core/Data/KernelBuilderExtensions.cs b/dotnet/src/SemanticKernel.Core/Data/KernelBuilderExtensions.cs index 75897402d6ee..251dee88a4f3 100644 --- a/dotnet/src/SemanticKernel.Core/Data/KernelBuilderExtensions.cs +++ b/dotnet/src/SemanticKernel.Core/Data/KernelBuilderExtensions.cs @@ -1,10 +1,13 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Diagnostics.CodeAnalysis; + namespace Microsoft.SemanticKernel.Data; /// /// Extension methods to register Data services on the . /// +[Experimental("SKEXP0001")] public static class KernelBuilderExtensions { /// diff --git a/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs b/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs index 3997a78ddd79..9d789f8ce93f 100644 --- a/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs +++ b/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System.Diagnostics.CodeAnalysis; using Microsoft.Extensions.DependencyInjection; namespace Microsoft.SemanticKernel.Data; @@ -7,6 +8,7 @@ namespace Microsoft.SemanticKernel.Data; /// /// Extension methods to register Data services on an . /// +[Experimental("SKEXP0001")] public static class ServiceCollectionExtensions { /// From 1ed097087e551779cb2f8e48092837dfdb9b025e Mon Sep 17 00:00:00 2001 From: Maurycy Markowski Date: Wed, 24 Jul 2024 02:07:52 -0700 Subject: [PATCH 31/48] .Net: dotnet Pinecone connector improvements: move to Pinecone.NET and new VectorRecordStore API (#7127) ### Motivation and Context SK memory connectors APIs are being re-designed and the individual connector implementations need to adjust accordingly. This work handles the transition for Pinecone database. To take advantage of large breaking changes, also delegating low-level operations against the database to Pinecone.NET package. Addresses https://github.com/microsoft/semantic-kernel/issues/6678 ### Description Pinecone recently went through a big overhaul of their APIs (https://docs.pinecone.io/guides/operations/migrate-to-the-new-api). Old API was deprecated and will be removed in the future. Additionally, Pinecone added support for serverless indexes which are supposed to be much cheaper in terms of operating costs (up to 50x according to their blog post - https://www.pinecone.io/blog/serverless/). SK's low-level driver was lacking those improvements, while Pinecone.NET is up to date so users can take advantage of the new features right away. Also this move avoids future effort duplication within the .net ecosystem and lowers maintainability cost on the SK side. Testing: Pinecone does not have a local development/testing story so the integration tests must run against live service. Added xuint infrastructure which allows for conditionally executing (or skipping) the tests. In order to run them, Pinecone API key must be provided via user secrets, like so: dotnet user-secrets set "PineconeApiKey" "your_Pinecone_API_key" ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations (not in the code I added) - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- dotnet/Directory.Packages.props | 1 + .../Connectors.Memory.Pinecone.csproj | 1 + ...econeVectorStoreRecordCollectionFactory.cs | 23 + .../PineconeVectorStore.cs | 83 +++ ...econeVectorStoreCollectionCreateMapping.cs | 46 ++ .../PineconeVectorStoreOptions.cs | 14 + .../PineconeVectorStoreRecordCollection.cs | 285 +++++++++ ...econeVectorStoreRecordCollectionOptions.cs | 49 ++ .../PineconeVectorStoreRecordMapper.cs | 194 ++++++ ...drantVectorStoreCollectionCreateMapping.cs | 2 +- .../Memory/Pinecone/PineconeAllTypes.cs | 64 ++ .../Memory/Pinecone/PineconeHotel.cs | 39 ++ .../Pinecone/PineconeUserSecretsExtensions.cs | 37 ++ .../Pinecone/PineconeVectorStoreFixture.cs | 345 +++++++++++ ...ineconeVectorStoreRecordCollectionTests.cs | 564 ++++++++++++++++++ .../Pinecone/PineconeVectorStoreTests.cs | 59 ++ .../Memory/Pinecone/Xunit/ITestCondition.cs | 12 + .../PineconeApiKeySetConditionAttribute.cs | 20 + .../Pinecone/Xunit/PineconeFactAttribute.cs | 11 + .../Pinecone/Xunit/PineconeFactDiscoverer.cs | 19 + .../Pinecone/Xunit/PineconeFactTestCase.cs | 42 ++ .../Pinecone/Xunit/PineconeTheoryAttribute.cs | 11 + .../Xunit/PineconeTheoryDiscoverer.cs | 36 ++ .../Pinecone/Xunit/PineconeTheoryTestCase.cs | 41 ++ .../Pinecone/Xunit/XunitTestCaseExtensions.cs | 55 ++ .../IntegrationTests/IntegrationTests.csproj | 2 + .../Data/VectorStoreRecordPropertyReader.cs | 57 +- .../VectorStoreRecordVectorAttribute.cs | 2 +- .../Data/RecordDefinition/DistanceFunction.cs | 4 +- .../VectorStoreRecordPropertyReaderTests.cs | 2 +- 30 files changed, 2090 insertions(+), 30 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Pinecone/IPineconeVectorStoreRecordCollectionFactory.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStore.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollectionOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeAllTypes.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeHotel.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeUserSecretsExtensions.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreFixture.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreTests.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/ITestCondition.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeApiKeySetConditionAttribute.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactAttribute.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactDiscoverer.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactTestCase.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryAttribute.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryDiscoverer.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryTestCase.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/XunitTestCaseExtensions.cs diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props index c7fa1af94e1f..b59aa7714c51 100644 --- a/dotnet/Directory.Packages.props +++ b/dotnet/Directory.Packages.props @@ -30,6 +30,7 @@ + diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/Connectors.Memory.Pinecone.csproj b/dotnet/src/Connectors/Connectors.Memory.Pinecone/Connectors.Memory.Pinecone.csproj index 462a89b0bd8b..69b47fe172f0 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/Connectors.Memory.Pinecone.csproj +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/Connectors.Memory.Pinecone.csproj @@ -19,6 +19,7 @@ + diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/IPineconeVectorStoreRecordCollectionFactory.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/IPineconeVectorStoreRecordCollectionFactory.cs new file mode 100644 index 000000000000..cc993159b247 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/IPineconeVectorStoreRecordCollectionFactory.cs @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Data; +using Sdk = Pinecone; + +namespace Microsoft.SemanticKernel.Connectors.Pinecone; + +/// +/// Interface for constructing Pinecone instances when using to retrieve these. +/// +public interface IPineconeVectorStoreRecordCollectionFactory +{ + /// + /// Constructs a new instance of the . + /// + /// The data type of the record key. + /// The data model to use for adding, updating and retrieving data from storage. + /// Pinecone client that can be used to manage the collections and points in a Pinecone store. + /// The name of the collection to connect to. + /// An optional record definition that defines the schema of the record type. If not present, attributes on will be used. + /// The new instance of . + IVectorStoreRecordCollection CreateVectorStoreRecordCollection(Sdk.PineconeClient pineconeClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStore.cs new file mode 100644 index 000000000000..4c2ce5d9ecc9 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStore.cs @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Threading; +using Grpc.Core; +using Microsoft.SemanticKernel.Data; +using Pinecone; +using Sdk = Pinecone; + +namespace Microsoft.SemanticKernel.Connectors.Pinecone; + +/// +/// Class for accessing the list of collections in a Pinecone vector store. +/// +/// +/// This class can be used with collections of any schema type, but requires you to provide schema information when getting a collection. +/// +public sealed class PineconeVectorStore : IVectorStore +{ + private const string DatabaseName = "Pinecone"; + private const string ListCollectionsName = "ListCollections"; + + private readonly Sdk.PineconeClient _pineconeClient; + private readonly PineconeVectorStoreOptions _options; + + /// + /// Initializes a new instance of the class. + /// + /// Pinecone client that can be used to manage the collections and points in a Pinecone store. + /// Optional configuration options for this class. + public PineconeVectorStore(Sdk.PineconeClient pineconeClient, PineconeVectorStoreOptions? options = default) + { + Verify.NotNull(pineconeClient); + + this._pineconeClient = pineconeClient; + this._options = options ?? new PineconeVectorStoreOptions(); + } + + /// + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + { + if (typeof(TKey) != typeof(string)) + { + throw new NotSupportedException("Only string keys are supported."); + } + + if (this._options.VectorStoreCollectionFactory is not null) + { + return this._options.VectorStoreCollectionFactory.CreateVectorStoreRecordCollection(this._pineconeClient, name, vectorStoreRecordDefinition); + } + + return (new PineconeVectorStoreRecordCollection( + this._pineconeClient, + name, + new PineconeVectorStoreRecordCollectionOptions() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection)!; + } + + /// + public async IAsyncEnumerable ListCollectionNamesAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) + { + IndexDetails[] collections; + + try + { + collections = await this._pineconeClient.ListIndexes(cancellationToken).ConfigureAwait(false); + } + catch (RpcException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + OperationName = ListCollectionsName + }; + } + + foreach (var collection in collections) + { + yield return collection.Name; + } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs new file mode 100644 index 000000000000..754c19f8eaa3 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Microsoft.SemanticKernel.Data; +using Pinecone; + +namespace Microsoft.SemanticKernel.Connectors.Pinecone; + +/// +/// Contains mapping helpers to use when creating a Pinecone vector collection. +/// +internal static class PineconeVectorStoreCollectionCreateMapping +{ + /// + /// Maps information stored in to a structure used by Pinecone SDK to create a serverless index. + /// + /// The property to map. + /// The structure containing settings used to create a serverless index. + /// Thrown if the property is missing information or has unsupported options specified. + public static (uint Dimension, Metric Metric) MapServerlessIndex(VectorStoreRecordVectorProperty vectorProperty) + { + if (vectorProperty!.Dimensions is not > 0) + { + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive integer to create a collection."); + } + + return (Dimension: (uint)vectorProperty.Dimensions, Metric: GetSDKMetricAlgorithm(vectorProperty)); + } + + /// + /// Get the configured from the given . + /// If none is configured, the default is . + /// + /// The vector property definition. + /// The chosen . + /// Thrown if a distance function is chosen that isn't supported by Pinecone. + public static Metric GetSDKMetricAlgorithm(VectorStoreRecordVectorProperty vectorProperty) + => vectorProperty.DistanceFunction switch + { + DistanceFunction.CosineSimilarity => Metric.Cosine, + DistanceFunction.DotProductSimilarity => Metric.DotProduct, + DistanceFunction.EuclideanDistance => Metric.Euclidean, + null => Metric.Cosine, + _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + }; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreOptions.cs new file mode 100644 index 000000000000..7a6fc9767f62 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreOptions.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Connectors.Pinecone; + +/// +/// Options when creating a . +/// +public sealed class PineconeVectorStoreOptions +{ + /// + /// An optional factory to use for constructing instances, if custom options are required. + /// + public IPineconeVectorStoreRecordCollectionFactory? VectorStoreCollectionFactory { get; init; } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs new file mode 100644 index 000000000000..8a353ed6ea96 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs @@ -0,0 +1,285 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using Grpc.Core; +using Microsoft.SemanticKernel.Data; +using Pinecone.Grpc; +using Sdk = Pinecone; + +namespace Microsoft.SemanticKernel.Connectors.Pinecone; + +/// +/// Service for storing and retrieving vector records, that uses Pinecone as the underlying storage. +/// +/// The data model to use for adding, updating and retrieving data from storage. +#pragma warning disable CA1711 // Identifiers should not have incorrect suffix +public sealed class PineconeVectorStoreRecordCollection : IVectorStoreRecordCollection +#pragma warning restore CA1711 // Identifiers should not have incorrect suffix + where TRecord : class +{ + private const string DatabaseName = "Pinecone"; + private const string CreateCollectionName = "CreateCollection"; + private const string CollectionExistsName = "CollectionExists"; + private const string DeleteCollectionName = "DeleteCollection"; + + private const string UpsertOperationName = "Upsert"; + private const string DeleteOperationName = "Delete"; + private const string GetOperationName = "Get"; + + private readonly Sdk.PineconeClient _pineconeClient; + private readonly PineconeVectorStoreRecordCollectionOptions _options; + private readonly VectorStoreRecordDefinition _vectorStoreRecordDefinition; + private readonly IVectorStoreRecordMapper _mapper; + + private Sdk.Index? _index; + + /// + public string CollectionName { get; } + + /// + /// Initializes a new instance of the class. + /// + /// Pinecone client that can be used to manage the collections and vectors in a Pinecone store. + /// Optional configuration options for this class. + /// Thrown if the is null. + /// The name of the collection that this will access. + /// Thrown for any misconfigured options. + public PineconeVectorStoreRecordCollection(Sdk.PineconeClient pineconeClient, string collectionName, PineconeVectorStoreRecordCollectionOptions? options = null) + { + Verify.NotNull(pineconeClient); + + this._pineconeClient = pineconeClient; + this.CollectionName = collectionName; + this._options = options ?? new PineconeVectorStoreRecordCollectionOptions(); + this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); + + if (this._options.VectorCustomMapper is null) + { + (PropertyInfo KeyProperty, List DataProperties, List VectorProperties) properties; + if (this._options.VectorStoreRecordDefinition is not null) + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: false); + } + else + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: false); + } + + var storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._options.VectorStoreRecordDefinition); + this._mapper = new PineconeVectorStoreRecordMapper(properties.KeyProperty, properties.DataProperties, properties.VectorProperties, storagePropertyNames); + } + else + { + this._mapper = this._options.VectorCustomMapper; + } + } + + /// + public async Task CollectionExistsAsync(CancellationToken cancellationToken = default) + { + var result = await this.RunOperationAsync( + CollectionExistsName, + async () => + { + var collections = await this._pineconeClient.ListIndexes(cancellationToken).ConfigureAwait(false); + + return collections.Any(x => x.Name == this.CollectionName); + }).ConfigureAwait(false); + + return result; + } + + /// + public async Task CreateCollectionAsync(CancellationToken cancellationToken = default) + { + // we already run through record property validation, so a single VectorStoreRecordVectorProperty is guaranteed. + var vectorProperty = this._vectorStoreRecordDefinition.Properties.OfType().First(); + var (dimension, metric) = PineconeVectorStoreCollectionCreateMapping.MapServerlessIndex(vectorProperty); + + await this.RunOperationAsync( + CreateCollectionName, + () => this._pineconeClient.CreateServerlessIndex( + this.CollectionName, + dimension, + metric, + this._options.ServerlessIndexCloud, + this._options.ServerlessIndexRegion, + cancellationToken)).ConfigureAwait(false); + } + + /// + public async Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default) + { + if (!await this.CollectionExistsAsync(cancellationToken).ConfigureAwait(false)) + { + await this.CreateCollectionAsync(cancellationToken).ConfigureAwait(false); + } + } + + /// + public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) + => this.RunOperationAsync( + DeleteCollectionName, + () => this._pineconeClient.DeleteIndex(this.CollectionName, cancellationToken)); + + /// + public async Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNull(key); + + var records = await this.GetBatchAsync([key], options, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); + + return records.FirstOrDefault(); + } + + /// + public async IAsyncEnumerable GetBatchAsync( + IEnumerable keys, + GetRecordOptions? options = default, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + var indexNamespace = this.GetIndexNamespace(); + var mapperOptions = new StorageToDataModelMapperOptions { IncludeVectors = options?.IncludeVectors ?? false }; + + var index = await this.GetIndexAsync(this.CollectionName, cancellationToken).ConfigureAwait(false); + + var results = await this.RunOperationAsync( + GetOperationName, + () => index.Fetch(keys, indexNamespace, cancellationToken)).ConfigureAwait(false); + + var records = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + this.CollectionName, + GetOperationName, + () => results.Values.Select(x => this._mapper.MapFromStorageToDataModel(x, mapperOptions)).ToList()); + + foreach (var record in records) + { + yield return record; + } + } + + /// + public Task DeleteAsync(string key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNullOrWhiteSpace(key); + + return this.DeleteBatchAsync([key], options, cancellationToken); + } + + /// + public async Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + var indexNamespace = this.GetIndexNamespace(); + + var index = await this.GetIndexAsync(this.CollectionName, cancellationToken).ConfigureAwait(false); + + await this.RunOperationAsync( + DeleteOperationName, + () => index.Delete(keys, indexNamespace, cancellationToken)).ConfigureAwait(false); + } + + /// + public async Task UpsertAsync(TRecord record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(record); + + var indexNamespace = this.GetIndexNamespace(); + + var index = await this.GetIndexAsync(this.CollectionName, cancellationToken).ConfigureAwait(false); + + var vector = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + this.CollectionName, + UpsertOperationName, + () => this._mapper.MapFromDataToStorageModel(record)); + + await this.RunOperationAsync( + UpsertOperationName, + () => index.Upsert([vector], indexNamespace, cancellationToken)).ConfigureAwait(false); + + return vector.Id; + } + + /// + public async IAsyncEnumerable UpsertBatchAsync( + IEnumerable records, + UpsertRecordOptions? options = default, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(records); + + var indexNamespace = this.GetIndexNamespace(); + + var index = await this.GetIndexAsync(this.CollectionName, cancellationToken).ConfigureAwait(false); + + var vectors = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + this.CollectionName, + UpsertOperationName, + () => records.Select(this._mapper.MapFromDataToStorageModel).ToList()); + + await this.RunOperationAsync( + UpsertOperationName, + () => index.Upsert(vectors, indexNamespace, cancellationToken)).ConfigureAwait(false); + + foreach (var vector in vectors) + { + yield return vector.Id; + } + } + + private async Task RunOperationAsync(string operationName, Func> operation) + { + try + { + return await operation.Invoke().ConfigureAwait(false); + } + catch (RpcException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = this.CollectionName, + OperationName = operationName + }; + } + } + + private async Task RunOperationAsync(string operationName, Func operation) + { + try + { + await operation.Invoke().ConfigureAwait(false); + } + catch (RpcException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = this.CollectionName, + OperationName = operationName + }; + } + } + + private async Task> GetIndexAsync(string indexName, CancellationToken cancellationToken) + { + this._index ??= await this._pineconeClient.GetIndex(indexName, cancellationToken).ConfigureAwait(false); + + return this._index; + } + + private string? GetIndexNamespace() + => this._options.IndexNamespace; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollectionOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollectionOptions.cs new file mode 100644 index 000000000000..f328524ec758 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollectionOptions.cs @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Data; +using Pinecone; + +namespace Microsoft.SemanticKernel.Connectors.Pinecone; + +/// +/// Options when creating a . +/// +public sealed class PineconeVectorStoreRecordCollectionOptions + where TRecord : class +{ + /// + /// Gets or sets an optional custom mapper to use when converting between the data model and the Pinecone vector. + /// + public IVectorStoreRecordMapper? VectorCustomMapper { get; init; } = null; + + /// + /// Gets or sets an optional record definition that defines the schema of the record type. + /// + /// + /// If not provided, the schema will be inferred from the record model class using reflection. + /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; + + /// + /// Gets or sets the value for a namespace within the Pinecone index that will be used for operations involving records (Get, Upsert, Delete)."/> + /// + public string? IndexNamespace { get; init; } = null; + + /// + /// Gets or sets the value for public cloud where the serverless index is hosted. + /// + /// + /// This value is only used when creating a new Pinecone index. Default value is 'aws'. + /// + public string ServerlessIndexCloud { get; init; } = "aws"; + + /// + /// Gets or sets the value for region where the serverless index is created. + /// + /// + /// This option is only used when creating a new Pinecone index. Default value is 'us-east-1'. + /// + public string ServerlessIndexRegion { get; init; } = "us-east-1"; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs new file mode 100644 index 000000000000..5eb3e6d5e8ca --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs @@ -0,0 +1,194 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.SemanticKernel.Data; +using Pinecone; + +namespace Microsoft.SemanticKernel.Connectors.Pinecone; + +/// +/// Mapper between a Pinecone record and the consumer data model that uses json as an intermediary to allow supporting a wide range of models. +/// +/// The consumer data model to map to or from. +internal sealed class PineconeVectorStoreRecordMapper : IVectorStoreRecordMapper + where TRecord : class +{ + /// A set of types that a key on the provided model may have. + private static readonly HashSet s_supportedKeyTypes = [typeof(string)]; + + /// A set of types that data properties on the provided model may have. + private static readonly HashSet s_supportedDataTypes = + [ + typeof(bool), + typeof(bool?), + typeof(string), + typeof(int), + typeof(int?), + typeof(long), + typeof(long?), + typeof(float), + typeof(float?), + typeof(double), + typeof(double?), + typeof(decimal), + typeof(decimal?), + ]; + + /// A set of types that enumerable data properties on the provided model may use as their element types. + private static readonly HashSet s_supportedEnumerableDataElementTypes = + [ + typeof(string) + ]; + + /// A set of types that vectors on the provided model may have. + private static readonly HashSet s_supportedVectorTypes = + [ + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory?), + ]; + + private readonly PropertyInfo _keyPropertyInfo; + + private readonly List _dataPropertiesInfo; + + private readonly PropertyInfo _vectorPropertyInfo; + + private readonly Dictionary _storagePropertyNames = []; + + private readonly Dictionary _jsonPropertyNames = []; + + /// + /// Initializes a new instance of the class. + /// + /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. + /// A list of property info objects that point at the data properties in the current model, and allows easy reading and writing of these properties. + /// A list of property info objects that point at the vector properties in the current model, and allows easy reading and writing of these properties. + /// A dictionary that maps from a property name to the configured name that should be used when storing it. + public PineconeVectorStoreRecordMapper(PropertyInfo keyProperty, List dataProperties, List vectorProperties, Dictionary storagePropertyNames) + { + Verify.True(vectorProperties.Count == 1, "There should be exactly one vector property in the data model."); + + VectorStoreRecordPropertyReader.VerifyPropertyTypes([keyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(dataProperties, s_supportedDataTypes, "Data", s_supportedEnumerableDataElementTypes); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(vectorProperties, s_supportedVectorTypes, "Vector"); + + this._keyPropertyInfo = keyProperty; + this._dataPropertiesInfo = dataProperties; + this._vectorPropertyInfo = vectorProperties[0]; + this._storagePropertyNames = storagePropertyNames; + + foreach (var property in dataProperties.Concat(vectorProperties).Concat([keyProperty])) + { + this._jsonPropertyNames[property.Name] = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, property); + } + } + + /// + public Vector MapFromDataToStorageModel(TRecord dataModel) + { + var keyObject = this._keyPropertyInfo.GetValue(dataModel); + if (keyObject is null) + { + throw new VectorStoreRecordMappingException($"Key property {this._keyPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName} may not be null."); + } + + var metadata = new MetadataMap(); + foreach (var dataPropertyInfo in this._dataPropertiesInfo) + { + var propertyName = this._storagePropertyNames[dataPropertyInfo.Name]; + var propertyValue = dataPropertyInfo.GetValue(dataModel); + if (propertyValue != null) + { + metadata[propertyName] = ConvertToMetadataValue(propertyValue); + } + } + + var valuesObject = this._vectorPropertyInfo.GetValue(dataModel); + if (valuesObject is not ReadOnlyMemory values) + { + throw new VectorStoreRecordMappingException($"Vector property {this._vectorPropertyInfo.Name} on provided record of type {typeof(TRecord).FullName} may not be null."); + } + + // TODO: what about sparse values? + var result = new Vector + { + Id = (string)keyObject, + Values = values.ToArray(), + Metadata = metadata, + SparseValues = null + }; + + return result; + } + + /// + public TRecord MapFromStorageToDataModel(Vector storageModel, StorageToDataModelMapperOptions options) + { + var keyJsonName = this._jsonPropertyNames[this._keyPropertyInfo.Name]; + var outputJsonObject = new JsonObject + { + { keyJsonName, JsonValue.Create(storageModel.Id) }, + }; + + if (options?.IncludeVectors is true) + { + var propertyName = this._storagePropertyNames[this._vectorPropertyInfo.Name]; + var jsonName = this._jsonPropertyNames[this._vectorPropertyInfo.Name]; + outputJsonObject.Add(jsonName, new JsonArray(storageModel.Values.Select(x => JsonValue.Create(x)).ToArray())); + } + + if (storageModel.Metadata != null) + { + foreach (var dataProperty in this._dataPropertiesInfo) + { + var propertyName = this._storagePropertyNames[dataProperty.Name]; + var jsonName = this._jsonPropertyNames[dataProperty.Name]; + + if (storageModel.Metadata.TryGetValue(propertyName, out var value)) + { + outputJsonObject.Add(jsonName, ConvertFromMetadataValueToJsonNode(value)); + } + } + } + + return outputJsonObject.Deserialize()!; + } + + private static JsonNode? ConvertFromMetadataValueToJsonNode(MetadataValue metadataValue) + => metadataValue.Inner switch + { + null => null, + bool boolValue => JsonValue.Create(boolValue), + string stringValue => JsonValue.Create(stringValue), + int intValue => JsonValue.Create(intValue), + long longValue => JsonValue.Create(longValue), + float floatValue => JsonValue.Create(floatValue), + double doubleValue => JsonValue.Create(doubleValue), + decimal decimalValue => JsonValue.Create(decimalValue), + MetadataValue[] array => new JsonArray(array.Select(ConvertFromMetadataValueToJsonNode).ToArray()), + List list => new JsonArray(list.Select(ConvertFromMetadataValueToJsonNode).ToArray()), + _ => throw new VectorStoreRecordMappingException($"Unsupported metadata type: '{metadataValue.Inner?.GetType().FullName}'."), + }; + + // TODO: take advantage of MetadataValue.TryCreate once we upgrade the version of Pinecone.NET + private static MetadataValue ConvertToMetadataValue(object? sourceValue) + => sourceValue switch + { + bool boolValue => boolValue, + string stringValue => stringValue, + int intValue => intValue, + long longValue => longValue, + float floatValue => floatValue, + double doubleValue => doubleValue, + decimal decimalValue => decimalValue, + string[] stringArray => stringArray, + List stringList => stringList, + IEnumerable stringEnumerable => stringEnumerable.ToArray(), + _ => throw new VectorStoreRecordMappingException($"Unsupported source value type '{sourceValue?.GetType().FullName}'.") + }; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs index 348dc23ae3b1..d0319463422d 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs @@ -98,7 +98,7 @@ public static VectorParamsMap MapNamedVectors(IEnumerable /// The vector property definition. /// The chosen . - /// Thrown if a distance function is chosen that isn't supported by Azure AI Search. + /// Thrown if a distance function is chosen that isn't supported by qdrant. public static Distance GetSDKDistanceAlgorithm(VectorStoreRecordVectorProperty vectorProperty) { if (vectorProperty.DistanceFunction is null) diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeAllTypes.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeAllTypes.cs new file mode 100644 index 000000000000..63216da7046f --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeAllTypes.cs @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using Microsoft.SemanticKernel.Data; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone; + +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. +public record PineconeAllTypes() +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. +{ + [VectorStoreRecordKey] + public string Id { get; init; } + + [VectorStoreRecordData] + public bool BoolProperty { get; set; } + [VectorStoreRecordData] + public bool? NullableBoolProperty { get; set; } + [VectorStoreRecordData] + public string StringProperty { get; set; } + [VectorStoreRecordData] + public string? NullableStringProperty { get; set; } + [VectorStoreRecordData] + public int IntProperty { get; set; } + [VectorStoreRecordData] + public int? NullableIntProperty { get; set; } + [VectorStoreRecordData] + public long LongProperty { get; set; } + [VectorStoreRecordData] + public long? NullableLongProperty { get; set; } + [VectorStoreRecordData] + public float FloatProperty { get; set; } + [VectorStoreRecordData] + public float? NullableFloatProperty { get; set; } + [VectorStoreRecordData] + public double DoubleProperty { get; set; } + [VectorStoreRecordData] + public double? NullableDoubleProperty { get; set; } + [VectorStoreRecordData] + public decimal DecimalProperty { get; set; } + [VectorStoreRecordData] + public decimal? NullableDecimalProperty { get; set; } + +#pragma warning disable CA1819 // Properties should not return arrays + [VectorStoreRecordData] + public string[] StringArray { get; set; } + [VectorStoreRecordData] + public string[]? NullableStringArray { get; set; } +#pragma warning restore CA1819 // Properties should not return arrays + + [VectorStoreRecordData] + public List StringList { get; set; } + [VectorStoreRecordData] + public List? NullableStringList { get; set; } + + [VectorStoreRecordData] + public IReadOnlyCollection Collection { get; set; } + [VectorStoreRecordData] + public IEnumerable Enumerable { get; set; } + + [VectorStoreRecordVector(Dimensions: 8, IndexKind: null, DistanceFunction: DistanceFunction.DotProductSimilarity)] + public ReadOnlyMemory? Embedding { get; set; } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeHotel.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeHotel.cs new file mode 100644 index 000000000000..c648b10f2c62 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeHotel.cs @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Text.Json.Serialization; +using Microsoft.SemanticKernel.Data; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone; + +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. +public record PineconeHotel() +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. +{ + [VectorStoreRecordKey] + public string HotelId { get; init; } + + [VectorStoreRecordData] + public string HotelName { get; set; } + + [JsonPropertyName("code_of_the_hotel")] + [VectorStoreRecordData] + public int HotelCode { get; set; } + + [VectorStoreRecordData] + public float HotelRating { get; set; } + + [JsonPropertyName("json_parking")] + [VectorStoreRecordData(StoragePropertyName = "parking_is_included")] + public bool ParkingIncluded { get; set; } + + [VectorStoreRecordData] + public List Tags { get; set; } = []; + + [VectorStoreRecordData] + public string Description { get; set; } + + [VectorStoreRecordVector(Dimensions: 8, IndexKind: null, DistanceFunction: DistanceFunction.DotProductSimilarity)] + public ReadOnlyMemory DescriptionEmbedding { get; set; } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeUserSecretsExtensions.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeUserSecretsExtensions.cs new file mode 100644 index 000000000000..1644b7427e99 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeUserSecretsExtensions.cs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.IO; +using System.Reflection; +using System.Text.Json; +using Microsoft.Extensions.Configuration.UserSecrets; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone; +public static class PineconeUserSecretsExtensions +{ + public const string PineconeApiKeyUserSecretEntry = "PineconeApiKey"; + + public static string ReadPineconeApiKey() + => JsonSerializer.Deserialize>( + File.ReadAllText(PathHelper.GetSecretsPathFromSecretsId( + typeof(PineconeUserSecretsExtensions).Assembly.GetCustomAttribute()! + .UserSecretsId)))![PineconeApiKeyUserSecretEntry].Trim(); + + public static bool ContainsPineconeApiKey() + { + var userSecretsIdAttribute = typeof(PineconeUserSecretsExtensions).Assembly.GetCustomAttribute(); + if (userSecretsIdAttribute == null) + { + return false; + } + + var path = PathHelper.GetSecretsPathFromSecretsId(userSecretsIdAttribute.UserSecretsId); + if (!File.Exists(path)) + { + return false; + } + + return JsonSerializer.Deserialize>( + File.ReadAllText(path))!.ContainsKey(PineconeApiKeyUserSecretEntry); + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreFixture.cs new file mode 100644 index 000000000000..dbae7d21bf56 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreFixture.cs @@ -0,0 +1,345 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net.Http; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.Pinecone; +using Microsoft.SemanticKernel.Data; +using Pinecone.Grpc; +using Xunit; +using Sdk = Pinecone; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone; + +public class PineconeVectorStoreFixture : IAsyncLifetime +{ + private const int MaxAttemptCount = 100; + private const int DelayInterval = 300; + + public string IndexName { get; } = "sk-index" +#pragma warning disable CA1308 // Normalize strings to uppercase + + new Regex("[^a-zA-Z0-9]", RegexOptions.None, matchTimeout: new TimeSpan(0, 0, 10)).Replace(Environment.MachineName.ToLowerInvariant(), ""); +#pragma warning restore CA1308 // Normalize strings to uppercase + + public Sdk.PineconeClient Client { get; private set; } = null!; + public PineconeVectorStore VectorStore { get; private set; } = null!; + public PineconeVectorStoreRecordCollection HotelRecordCollection { get; set; } = null!; + public PineconeVectorStoreRecordCollection AllTypesRecordCollection { get; set; } = null!; + public PineconeVectorStoreRecordCollection HotelRecordCollectionWithCustomNamespace { get; set; } = null!; + public IVectorStoreRecordCollection HotelRecordCollectionFromVectorStore { get; set; } = null!; + + public virtual Sdk.Index Index { get; set; } = null!; + + public virtual async Task InitializeAsync() + { + this.Client = new Sdk.PineconeClient(PineconeUserSecretsExtensions.ReadPineconeApiKey()); + this.VectorStore = new PineconeVectorStore(this.Client); + + var hotelRecordDefinition = new VectorStoreRecordDefinition + { + Properties = + [ + new VectorStoreRecordKeyProperty(nameof(PineconeHotel.HotelId)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.HotelName)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.HotelCode)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.ParkingIncluded)) { StoragePropertyName = "parking_is_included" }, + new VectorStoreRecordDataProperty(nameof(PineconeHotel.HotelRating)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.Tags)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.Description)), + new VectorStoreRecordVectorProperty(nameof(PineconeHotel.DescriptionEmbedding)) { Dimensions = 8, DistanceFunction = DistanceFunction.DotProductSimilarity } + ] + }; + + var allTypesRecordDefinition = new VectorStoreRecordDefinition + { + Properties = + [ + new VectorStoreRecordKeyProperty(nameof(PineconeAllTypes.Id)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.BoolProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableBoolProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.StringProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableStringProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.IntProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableIntProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.LongProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableLongProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.FloatProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableFloatProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.DoubleProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableDoubleProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.DecimalProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableDecimalProperty)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.StringArray)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableStringArray)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.StringList)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableStringList)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.Collection)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.Enumerable)), + new VectorStoreRecordVectorProperty(nameof(PineconeAllTypes.Embedding)) { Dimensions = 8, DistanceFunction = DistanceFunction.DotProductSimilarity } + ] + }; + + this.HotelRecordCollection = new PineconeVectorStoreRecordCollection( + this.Client, + this.IndexName, + new PineconeVectorStoreRecordCollectionOptions + { + VectorStoreRecordDefinition = hotelRecordDefinition + }); + + this.AllTypesRecordCollection = new PineconeVectorStoreRecordCollection( + this.Client, + this.IndexName, + new PineconeVectorStoreRecordCollectionOptions + { + VectorStoreRecordDefinition = allTypesRecordDefinition + }); + + this.HotelRecordCollectionWithCustomNamespace = new PineconeVectorStoreRecordCollection( + this.Client, + this.IndexName, + new PineconeVectorStoreRecordCollectionOptions + { + VectorStoreRecordDefinition = hotelRecordDefinition, + IndexNamespace = "my-namespace" + }); + + this.HotelRecordCollectionFromVectorStore = this.VectorStore.GetCollection( + this.IndexName, + hotelRecordDefinition); + + await this.ClearIndexesAsync(); + await this.CreateIndexAndWaitAsync(); + await this.AddSampleDataAsync(); + } + + private async Task CreateIndexAndWaitAsync() + { + var attemptCount = 0; + + await this.HotelRecordCollection.CreateCollectionAsync(); + + do + { + await Task.Delay(DelayInterval); + attemptCount++; + this.Index = await this.Client.GetIndex(this.IndexName); + } while (!this.Index.Status.IsReady && attemptCount <= MaxAttemptCount); + + if (!this.Index.Status.IsReady) + { + throw new InvalidOperationException("'Create index' operation didn't complete in time. Index name: " + this.IndexName); + } + } + + public async Task DisposeAsync() + { + if (this.Client is not null) + { + await this.ClearIndexesAsync(); + this.Client.Dispose(); + } + } + + private async Task AddSampleDataAsync() + { + var fiveSeasons = new PineconeHotel + { + HotelId = "five-seasons", + HotelName = "Five Seasons Hotel", + Description = "Great service any season.", + HotelCode = 7, + HotelRating = 4.5f, + ParkingIncluded = true, + DescriptionEmbedding = new ReadOnlyMemory([7.5f, 71.0f, 71.5f, 72.0f, 72.5f, 73.0f, 73.5f, 74.0f]), + Tags = ["wi-fi", "sauna", "gym", "pool"] + }; + + var vacationInn = new PineconeHotel + { + HotelId = "vacation-inn", + HotelName = "Vacation Inn Hotel", + Description = "On vacation? Stay with us.", + HotelCode = 11, + HotelRating = 4.3f, + ParkingIncluded = true, + DescriptionEmbedding = new ReadOnlyMemory([17.5f, 721.0f, 731.5f, 742.0f, 762.5f, 783.0f, 793.5f, 704.0f]), + Tags = ["wi-fi", "breakfast", "gym"] + }; + + var bestEastern = new PineconeHotel + { + HotelId = "best-eastern", + HotelName = "Best Eastern Hotel", + Description = "Best hotel east of New York.", + HotelCode = 42, + HotelRating = 4.7f, + ParkingIncluded = true, + DescriptionEmbedding = new ReadOnlyMemory([47.5f, 421.0f, 741.5f, 744.0f, 742.5f, 483.0f, 743.5f, 744.0f]), + Tags = ["wi-fi", "breakfast", "gym"] + }; + + var stats = await this.Index.DescribeStats(); + var vectorCountBefore = stats.TotalVectorCount; + + // use both Upsert and BatchUpsert methods and also use record collections created directly and using vector store + await this.HotelRecordCollection.UpsertAsync(fiveSeasons); + vectorCountBefore = await this.VerifyVectorCountModifiedAsync(vectorCountBefore, delta: 1); + + await this.HotelRecordCollectionFromVectorStore.UpsertBatchAsync([vacationInn, bestEastern]).ToListAsync(); + vectorCountBefore = await this.VerifyVectorCountModifiedAsync(vectorCountBefore, delta: 2); + + var allTypes1 = new PineconeAllTypes + { + Id = "all-types-1", + BoolProperty = true, + NullableBoolProperty = false, + StringProperty = "string prop 1", + NullableStringProperty = "nullable prop 1", + IntProperty = 1, + NullableIntProperty = 10, + LongProperty = 100L, + NullableLongProperty = 1000L, + FloatProperty = 10.5f, + NullableFloatProperty = 100.5f, + DoubleProperty = 23.75d, + NullableDoubleProperty = 233.75d, + DecimalProperty = 50.75m, + NullableDecimalProperty = 500.75m, + StringArray = ["one", "two"], + NullableStringArray = ["five", "six"], + StringList = ["eleven", "twelve"], + NullableStringList = ["fifteen", "sixteen"], + Collection = ["Foo", "Bar"], + Enumerable = ["another", "and another"], + Embedding = new ReadOnlyMemory([1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f, 8.5f]) + }; + + var allTypes2 = new PineconeAllTypes + { + Id = "all-types-2", + BoolProperty = false, + NullableBoolProperty = null, + StringProperty = "string prop 2", + NullableStringProperty = null, + IntProperty = 2, + NullableIntProperty = null, + LongProperty = 200L, + NullableLongProperty = null, + FloatProperty = 20.5f, + NullableFloatProperty = null, + DoubleProperty = 43.75, + NullableDoubleProperty = null, + DecimalProperty = 250.75M, + NullableDecimalProperty = null, + StringArray = [], + NullableStringArray = null, + StringList = [], + NullableStringList = null, + Collection = [], + Enumerable = [], + Embedding = new ReadOnlyMemory([10.5f, 20.5f, 30.5f, 40.5f, 50.5f, 60.5f, 70.5f, 80.5f]) + }; + + await this.AllTypesRecordCollection.UpsertBatchAsync([allTypes1, allTypes2]).ToListAsync(); + vectorCountBefore = await this.VerifyVectorCountModifiedAsync(vectorCountBefore, delta: 2); + + var custom = new PineconeHotel + { + HotelId = "custom-hotel", + HotelName = "Custom Hotel", + Description = "Everything customizable!", + HotelCode = 17, + HotelRating = 4.25f, + ParkingIncluded = true, + DescriptionEmbedding = new ReadOnlyMemory([147.5f, 1421.0f, 1741.5f, 1744.0f, 1742.5f, 1483.0f, 1743.5f, 1744.0f]), + }; + + await this.HotelRecordCollectionWithCustomNamespace.UpsertAsync(custom); + vectorCountBefore = await this.VerifyVectorCountModifiedAsync(vectorCountBefore, delta: 1); + } + + public async Task VerifyVectorCountModifiedAsync(uint vectorCountBefore, int delta) + { + var attemptCount = 0; + Sdk.IndexStats stats; + + do + { + await Task.Delay(DelayInterval); + attemptCount++; + stats = await this.Index.DescribeStats(); + } while (stats.TotalVectorCount != vectorCountBefore + delta && attemptCount <= MaxAttemptCount); + + if (stats.TotalVectorCount != vectorCountBefore + delta) + { + throw new InvalidOperationException("'Upsert'/'Delete' operation didn't complete in time."); + } + + return stats.TotalVectorCount; + } + + public async Task DeleteAndWaitAsync(IEnumerable ids, string? indexNamespace = null) + { + var stats = await this.Index.DescribeStats(); + var vectorCountBefore = stats.Namespaces.Single(x => x.Name == (indexNamespace ?? "")).VectorCount; + var idCount = ids.Count(); + + var attemptCount = 0; + await this.Index.Delete(ids, indexNamespace); + long vectorCount; + do + { + await Task.Delay(DelayInterval); + attemptCount++; + stats = await this.Index.DescribeStats(); + vectorCount = stats.Namespaces.Single(x => x.Name == (indexNamespace ?? "")).VectorCount; + } while (vectorCount > vectorCountBefore - idCount && attemptCount <= MaxAttemptCount); + + if (vectorCount > vectorCountBefore - idCount) + { + throw new InvalidOperationException("'Delete' operation didn't complete in time."); + } + } + + private async Task ClearIndexesAsync() + { + var indexes = await this.Client.ListIndexes(); + var deletions = indexes.Select(x => this.DeleteExistingIndexAndWaitAsync(x.Name)); + + await Task.WhenAll(deletions); + } + + private async Task DeleteExistingIndexAndWaitAsync(string indexName) + { + var exists = true; + try + { + var attemptCount = 0; + await this.Client.DeleteIndex(indexName); + + do + { + await Task.Delay(DelayInterval); + var indexes = (await this.Client.ListIndexes()).Select(x => x.Name).ToArray(); + if (indexes.Length == 0 || !indexes.Contains(indexName)) + { + exists = false; + } + } while (exists && attemptCount <= MaxAttemptCount); + } + catch (HttpRequestException ex) when (ex.Message.Contains("NOT_FOUND")) + { + // index was already deleted + exists = false; + } + + if (exists) + { + throw new InvalidOperationException("'Delete index' operation didn't complete in time. Index name: " + indexName); + } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs new file mode 100644 index 000000000000..2a762fd316cd --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs @@ -0,0 +1,564 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Threading.Tasks; +using Grpc.Core; +using Microsoft.SemanticKernel.Connectors.Pinecone; +using Microsoft.SemanticKernel.Data; +using Pinecone; +using SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; +using Xunit; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone; + +[Collection("PineconeVectorStoreTests")] +[PineconeApiKeySetCondition] +public class PineconeVectorStoreRecordCollectionTests(PineconeVectorStoreFixture fixture) : IClassFixture +{ + private PineconeVectorStoreFixture Fixture { get; } = fixture; + + [PineconeFact] + public async Task TryCreateExistingIndexIsNoopAsync() + { + await this.Fixture.HotelRecordCollection.CreateCollectionIfNotExistsAsync(); + } + + [PineconeFact] + public async Task CollectionExistsReturnsTrueForExistingCollectionAsync() + { + var result = await this.Fixture.HotelRecordCollection.CollectionExistsAsync(); + + Assert.True(result); + } + + [PineconeTheory] + [InlineData(true)] + [InlineData(false)] + public async Task BasicGetAsync(bool includeVectors) + { + var fiveSeasons = await this.Fixture.HotelRecordCollection.GetAsync("five-seasons", new GetRecordOptions { IncludeVectors = includeVectors }); + + Assert.NotNull(fiveSeasons); + Assert.Equal("five-seasons", fiveSeasons.HotelId); + Assert.Equal("Five Seasons Hotel", fiveSeasons.HotelName); + Assert.Equal("Great service any season.", fiveSeasons.Description); + Assert.Equal(7, fiveSeasons.HotelCode); + Assert.Equal(4.5f, fiveSeasons.HotelRating); + Assert.True(fiveSeasons.ParkingIncluded); + Assert.Contains("wi-fi", fiveSeasons.Tags); + Assert.Contains("sauna", fiveSeasons.Tags); + Assert.Contains("gym", fiveSeasons.Tags); + Assert.Contains("pool", fiveSeasons.Tags); + + if (includeVectors) + { + Assert.Equal(new ReadOnlyMemory([7.5f, 71.0f, 71.5f, 72.0f, 72.5f, 73.0f, 73.5f, 74.0f]), fiveSeasons.DescriptionEmbedding); + } + else + { + Assert.Equal(new ReadOnlyMemory([]), fiveSeasons.DescriptionEmbedding); + } + } + + [PineconeTheory] + [InlineData(true)] + [InlineData(false)] + public async Task BatchGetAsync(bool collectionFromVectorStore) + { + var hotelsCollection = collectionFromVectorStore + ? this.Fixture.HotelRecordCollection + : this.Fixture.HotelRecordCollectionFromVectorStore; + + var hotels = await hotelsCollection.GetBatchAsync(["five-seasons", "vacation-inn", "best-eastern"]).ToListAsync(); + + var fiveSeasons = hotels.Single(x => x.HotelId == "five-seasons"); + var vacationInn = hotels.Single(x => x.HotelId == "vacation-inn"); + var bestEastern = hotels.Single(x => x.HotelId == "best-eastern"); + + Assert.Equal("Five Seasons Hotel", fiveSeasons.HotelName); + Assert.Equal("Great service any season.", fiveSeasons.Description); + Assert.Equal(7, fiveSeasons.HotelCode); + Assert.Equal(4.5f, fiveSeasons.HotelRating); + Assert.True(fiveSeasons.ParkingIncluded); + Assert.Contains("wi-fi", fiveSeasons.Tags); + Assert.Contains("sauna", fiveSeasons.Tags); + Assert.Contains("gym", fiveSeasons.Tags); + Assert.Contains("pool", fiveSeasons.Tags); + + Assert.Equal("Vacation Inn Hotel", vacationInn.HotelName); + Assert.Equal("On vacation? Stay with us.", vacationInn.Description); + Assert.Equal(11, vacationInn.HotelCode); + Assert.Equal(4.3f, vacationInn.HotelRating); + Assert.True(vacationInn.ParkingIncluded); + Assert.Contains("wi-fi", vacationInn.Tags); + Assert.Contains("breakfast", vacationInn.Tags); + Assert.Contains("gym", vacationInn.Tags); + + Assert.Equal("Best Eastern Hotel", bestEastern.HotelName); + Assert.Equal("Best hotel east of New York.", bestEastern.Description); + Assert.Equal(42, bestEastern.HotelCode); + Assert.Equal(4.7f, bestEastern.HotelRating); + Assert.True(bestEastern.ParkingIncluded); + Assert.Contains("wi-fi", bestEastern.Tags); + Assert.Contains("breakfast", bestEastern.Tags); + Assert.Contains("gym", bestEastern.Tags); + } + + [PineconeTheory] + [InlineData(true)] + [InlineData(false)] + public async Task AllTypesBatchGetAsync(bool includeVectors) + { + var allTypes = await this.Fixture.AllTypesRecordCollection.GetBatchAsync(["all-types-1", "all-types-2"], new GetRecordOptions { IncludeVectors = includeVectors }).ToListAsync(); + + var allTypes1 = allTypes.Single(x => x.Id == "all-types-1"); + var allTypes2 = allTypes.Single(x => x.Id == "all-types-2"); + + Assert.True(allTypes1.BoolProperty); + Assert.Equal("string prop 1", allTypes1.StringProperty); + Assert.Equal(1, allTypes1.IntProperty); + Assert.Equal(100L, allTypes1.LongProperty); + Assert.Equal(10.5f, allTypes1.FloatProperty); + Assert.Equal(23.75d, allTypes1.DoubleProperty); + Assert.Equal(50.75m, allTypes1.DecimalProperty); + Assert.Contains("one", allTypes1.StringArray); + Assert.Contains("two", allTypes1.StringArray); + Assert.Contains("eleven", allTypes1.StringList); + Assert.Contains("twelve", allTypes1.StringList); + Assert.Contains("Foo", allTypes1.Collection); + Assert.Contains("Bar", allTypes1.Collection); + Assert.Contains("another", allTypes1.Enumerable); + Assert.Contains("and another", allTypes1.Enumerable); + + Assert.False(allTypes2.BoolProperty); + Assert.Equal("string prop 2", allTypes2.StringProperty); + Assert.Equal(2, allTypes2.IntProperty); + Assert.Equal(200L, allTypes2.LongProperty); + Assert.Equal(20.5f, allTypes2.FloatProperty); + Assert.Equal(43.75d, allTypes2.DoubleProperty); + Assert.Equal(250.75m, allTypes2.DecimalProperty); + Assert.Empty(allTypes2.StringArray); + Assert.Empty(allTypes2.StringList); + Assert.Empty(allTypes2.Collection); + Assert.Empty(allTypes2.Enumerable); + + if (includeVectors) + { + Assert.True(allTypes1.Embedding.HasValue); + Assert.Equal(new ReadOnlyMemory([1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f, 8.5f]), allTypes1.Embedding.Value); + + Assert.True(allTypes2.Embedding.HasValue); + Assert.Equal(new ReadOnlyMemory([10.5f, 20.5f, 30.5f, 40.5f, 50.5f, 60.5f, 70.5f, 80.5f]), allTypes2.Embedding.Value); + } + else + { + Assert.Null(allTypes1.Embedding); + Assert.Null(allTypes2.Embedding); + } + } + + [PineconeFact] + public async Task BatchGetIncludingNonExistingRecordAsync() + { + var hotels = await this.Fixture.HotelRecordCollection.GetBatchAsync(["vacation-inn", "non-existing"]).ToListAsync(); + + Assert.Single(hotels); + var vacationInn = hotels.Single(x => x.HotelId == "vacation-inn"); + + Assert.Equal("Vacation Inn Hotel", vacationInn.HotelName); + Assert.Equal("On vacation? Stay with us.", vacationInn.Description); + Assert.Equal(11, vacationInn.HotelCode); + Assert.Equal(4.3f, vacationInn.HotelRating); + Assert.True(vacationInn.ParkingIncluded); + Assert.Contains("wi-fi", vacationInn.Tags); + Assert.Contains("breakfast", vacationInn.Tags); + Assert.Contains("gym", vacationInn.Tags); + } + + [PineconeFact] + public async Task GetNonExistingRecordAsync() + { + var result = await this.Fixture.HotelRecordCollection.GetAsync("non-existing"); + Assert.Null(result); + } + + [PineconeTheory] + [InlineData(true)] + [InlineData(false)] + public async Task GetFromCustomNamespaceAsync(bool includeVectors) + { + var custom = await this.Fixture.HotelRecordCollectionWithCustomNamespace.GetAsync("custom-hotel", new GetRecordOptions { IncludeVectors = includeVectors }); + + Assert.NotNull(custom); + Assert.Equal("custom-hotel", custom.HotelId); + Assert.Equal("Custom Hotel", custom.HotelName); + if (includeVectors) + { + Assert.Equal(new ReadOnlyMemory([147.5f, 1421.0f, 1741.5f, 1744.0f, 1742.5f, 1483.0f, 1743.5f, 1744.0f]), custom.DescriptionEmbedding); + } + else + { + Assert.Equal(new ReadOnlyMemory([]), custom.DescriptionEmbedding); + } + } + + [PineconeFact] + public async Task TryGetVectorLocatedInDefaultNamespaceButLookInCustomNamespaceAsync() + { + var badFiveSeasons = await this.Fixture.HotelRecordCollectionWithCustomNamespace.GetAsync("five-seasons"); + + Assert.Null(badFiveSeasons); + } + + [PineconeFact] + public async Task TryGetVectorLocatedInCustomNamespaceButLookInDefaultNamespaceAsync() + { + var badCustomHotel = await this.Fixture.HotelRecordCollection.GetAsync("custom-hotel"); + + Assert.Null(badCustomHotel); + } + + [PineconeFact] + public async Task DeleteNonExistingRecordAsync() + { + await this.Fixture.HotelRecordCollection.DeleteAsync("non-existing"); + } + + [PineconeFact] + public async Task TryDeleteExistingVectorLocatedInDefaultNamespaceButUseCustomNamespaceDoesNotDoAnythingAsync() + { + await this.Fixture.HotelRecordCollectionWithCustomNamespace.DeleteAsync("five-seasons"); + + var stillThere = await this.Fixture.HotelRecordCollection.GetAsync("five-seasons"); + Assert.NotNull(stillThere); + Assert.Equal("five-seasons", stillThere.HotelId); + } + + [PineconeFact] + public async Task TryDeleteExistingVectorLocatedInCustomNamespaceButUseDefaultNamespaceDoesNotDoAnythingAsync() + { + await this.Fixture.HotelRecordCollection.DeleteAsync("custom-hotel"); + + var stillThere = await this.Fixture.HotelRecordCollectionWithCustomNamespace.GetAsync("custom-hotel"); + Assert.NotNull(stillThere); + Assert.Equal("custom-hotel", stillThere.HotelId); + } + + [PineconeTheory] + [InlineData(true)] + [InlineData(false)] + public async Task InsertGetModifyDeleteVectorAsync(bool collectionFromVectorStore) + { + var langriSha = new PineconeHotel + { + HotelId = "langri-sha", + HotelName = "Langri-Sha Hotel", + Description = "Lorem ipsum", + HotelCode = 100, + HotelRating = 4.2f, + ParkingIncluded = false, + DescriptionEmbedding = new ReadOnlyMemory([1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f]) + }; + + var stats = await this.Fixture.Index.DescribeStats(); + var vectorCountBefore = stats.TotalVectorCount; + + var hotelRecordCollection = collectionFromVectorStore + ? this.Fixture.HotelRecordCollectionFromVectorStore + : this.Fixture.HotelRecordCollection; + + // insert + await hotelRecordCollection.UpsertAsync(langriSha); + + vectorCountBefore = await this.Fixture.VerifyVectorCountModifiedAsync(vectorCountBefore, delta: 1); + + var inserted = await hotelRecordCollection.GetAsync("langri-sha", new GetRecordOptions { IncludeVectors = true }); + + Assert.NotNull(inserted); + Assert.Equal(langriSha.HotelName, inserted.HotelName); + Assert.Equal(langriSha.Description, inserted.Description); + Assert.Equal(langriSha.HotelCode, inserted.HotelCode); + Assert.Equal(langriSha.HotelRating, inserted.HotelRating); + Assert.Equal(langriSha.ParkingIncluded, inserted.ParkingIncluded); + Assert.Equal(langriSha.DescriptionEmbedding, inserted.DescriptionEmbedding); + + langriSha.Description += " dolor sit amet"; + langriSha.ParkingIncluded = true; + langriSha.DescriptionEmbedding = new ReadOnlyMemory([11f, 12f, 13f, 14f, 15f, 16f, 17f, 18f]); + + // update + await hotelRecordCollection.UpsertAsync(langriSha); + + // this is not great but no vectors are added so we can't query status for number of vectors like we do for insert/delete + await Task.Delay(2000); + + var updated = await hotelRecordCollection.GetAsync("langri-sha", new GetRecordOptions { IncludeVectors = true }); + + Assert.NotNull(updated); + Assert.Equal(langriSha.HotelName, updated.HotelName); + Assert.Equal(langriSha.Description, updated.Description); + Assert.Equal(langriSha.HotelCode, updated.HotelCode); + Assert.Equal(langriSha.HotelRating, updated.HotelRating); + Assert.Equal(langriSha.ParkingIncluded, updated.ParkingIncluded); + Assert.Equal(langriSha.DescriptionEmbedding, updated.DescriptionEmbedding); + + // delete + await hotelRecordCollection.DeleteAsync("langri-sha"); + + await this.Fixture.VerifyVectorCountModifiedAsync(vectorCountBefore, delta: -1); + } + + [PineconeFact] + public async Task UseCollectionExistsOnNonExistingStoreReturnsFalseAsync() + { + var incorrectRecordStore = new PineconeVectorStoreRecordCollection( + this.Fixture.Client, + "incorrect"); + + var result = await incorrectRecordStore.CollectionExistsAsync(); + + Assert.False(result); + } + + [PineconeFact] + public async Task UseNonExistingIndexThrowsAsync() + { + var incorrectRecordStore = new PineconeVectorStoreRecordCollection( + this.Fixture.Client, + "incorrect"); + + var statusCode = (await Assert.ThrowsAsync( + () => incorrectRecordStore.GetAsync("best-eastern"))).StatusCode; + + Assert.Equal(HttpStatusCode.NotFound, statusCode); + } + + [PineconeFact] + public async Task UseRecordStoreWithCustomMapperAsync() + { + var recordStore = new PineconeVectorStoreRecordCollection( + this.Fixture.Client, + this.Fixture.IndexName, + new PineconeVectorStoreRecordCollectionOptions { VectorCustomMapper = new CustomHotelRecordMapper() }); + + var vacationInn = await recordStore.GetAsync("vacation-inn", new GetRecordOptions { IncludeVectors = true }); + + Assert.NotNull(vacationInn); + Assert.Equal("Custom Vacation Inn Hotel", vacationInn.HotelName); + Assert.Equal("On vacation? Stay with us.", vacationInn.Description); + Assert.Equal(11, vacationInn.HotelCode); + Assert.Equal(4.3f, vacationInn.HotelRating); + Assert.True(vacationInn.ParkingIncluded); + Assert.Contains("wi-fi", vacationInn.Tags); + Assert.Contains("breakfast", vacationInn.Tags); + Assert.Contains("gym", vacationInn.Tags); + } + + private sealed class CustomHotelRecordMapper : IVectorStoreRecordMapper + { + public Vector MapFromDataToStorageModel(PineconeHotel dataModel) + { + var metadata = new MetadataMap + { + [nameof(PineconeHotel.HotelName)] = dataModel.HotelName, + [nameof(PineconeHotel.Description)] = dataModel.Description, + [nameof(PineconeHotel.HotelCode)] = dataModel.HotelCode, + [nameof(PineconeHotel.HotelRating)] = dataModel.HotelRating, + ["parking_is_included"] = dataModel.ParkingIncluded, + [nameof(PineconeHotel.Tags)] = dataModel.Tags.ToArray(), + }; + + return new Vector + { + Id = dataModel.HotelId, + Values = dataModel.DescriptionEmbedding.ToArray(), + Metadata = metadata, + }; + } + + public PineconeHotel MapFromStorageToDataModel(Vector storageModel, StorageToDataModelMapperOptions options) + { + if (storageModel.Metadata == null) + { + throw new InvalidOperationException("Missing metadata."); + } + + return new PineconeHotel + { + HotelId = storageModel.Id, + HotelName = "Custom " + (string)storageModel.Metadata[nameof(PineconeHotel.HotelName)].Inner!, + Description = (string)storageModel.Metadata[nameof(PineconeHotel.Description)].Inner!, + HotelCode = (int)(double)storageModel.Metadata[nameof(PineconeHotel.HotelCode)].Inner!, + HotelRating = (float)(double)storageModel.Metadata[nameof(PineconeHotel.HotelRating)].Inner!, + ParkingIncluded = (bool)storageModel.Metadata["parking_is_included"].Inner!, + Tags = ((MetadataValue[])storageModel.Metadata[nameof(PineconeHotel.Tags)].Inner!)!.Select(x => (string)x.Inner!).ToList(), + }; + } + } + + #region Negative + + [PineconeFact] + public void UseRecordWithNoEmbeddingThrows() + { + var exception = Assert.Throws( + () => new PineconeVectorStoreRecordCollection( + this.Fixture.Client, + "Whatever")); + + Assert.Equal( + $"No vector property found on type {typeof(PineconeRecordNoEmbedding).FullName}.", + exception.Message); + } + +#pragma warning disable CA1812 + private sealed record PineconeRecordNoEmbedding + { + [VectorStoreRecordKey] + public int Id { get; set; } + + [VectorStoreRecordData] + public string? Name { get; set; } + } +#pragma warning restore CA1812 + + [PineconeFact] + public void UseRecordWithMultipleEmbeddingsThrows() + { + var exception = Assert.Throws( + () => new PineconeVectorStoreRecordCollection( + this.Fixture.Client, + "Whatever")); + + Assert.Equal( + $"Multiple vector properties found on type {typeof(PineconeRecordMultipleEmbeddings).FullName} while only one is supported.", + exception.Message); + } + +#pragma warning disable CA1812 + private sealed record PineconeRecordMultipleEmbeddings + { + [VectorStoreRecordKey] + public string Id { get; set; } = null!; + + [VectorStoreRecordVector] + public ReadOnlyMemory Embedding1 { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory Embedding2 { get; set; } + } +#pragma warning restore CA1812 + + [PineconeFact] + public void UseRecordWithUnsupportedKeyTypeThrows() + { + var message = Assert.Throws( + () => new PineconeVectorStoreRecordCollection( + this.Fixture.Client, + "Whatever")).Message; + + Assert.Equal( + $"Key properties must be one of the supported types: {typeof(string).FullName}. Type of the property '{nameof(PineconeRecordUnsupportedKeyType.Id)}' is {typeof(int).FullName}.", + message); + } + +#pragma warning disable CA1812 + private sealed record PineconeRecordUnsupportedKeyType + { + [VectorStoreRecordKey] + public int Id { get; set; } + + [VectorStoreRecordData] + public string? Name { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory Embedding { get; set; } + } +#pragma warning restore CA1812 + + [PineconeFact] + public async Task TryAddingVectorWithUnsupportedValuesAsync() + { + var badAllTypes = new PineconeAllTypes + { + Id = "bad", + BoolProperty = true, + DecimalProperty = 1m, + DoubleProperty = 1.5d, + FloatProperty = 2.5f, + IntProperty = 1, + LongProperty = 11L, + NullableStringArray = ["foo", null!, "bar",], + Embedding = new ReadOnlyMemory([1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f]) + }; + + var exception = await Assert.ThrowsAsync( + () => this.Fixture.AllTypesRecordCollection.UpsertAsync(badAllTypes)); + + Assert.Equal("Microsoft.SemanticKernel.Connectors.Pinecone", exception.Source); + Assert.Equal("Pinecone", exception.VectorStoreType); + Assert.Equal("Upsert", exception.OperationName); + Assert.Equal(this.Fixture.IndexName, exception.CollectionName); + + var inner = exception.InnerException as RpcException; + Assert.NotNull(inner); + Assert.Equal(StatusCode.InvalidArgument, inner.StatusCode); + } + + [PineconeFact] + public async Task TryCreateIndexWithIncorrectDimensionFailsAsync() + { + var recordCollection = new PineconeVectorStoreRecordCollection( + this.Fixture.Client, + "negative-dimension"); + + var message = (await Assert.ThrowsAsync(() => recordCollection.CreateCollectionAsync())).Message; + + Assert.Equal("Property Dimensions on VectorStoreRecordVectorProperty 'Embedding' must be set to a positive integer to create a collection.", message); + } + +#pragma warning disable CA1812 + private sealed record PineconeRecordWithIncorrectDimension + { + [VectorStoreRecordKey] + public string Id { get; set; } = null!; + + [VectorStoreRecordData] + public string? Name { get; set; } + + [VectorStoreRecordVector(Dimensions: -7)] + public ReadOnlyMemory Embedding { get; set; } + } +#pragma warning restore CA1812 + + [PineconeFact] + public async Task TryCreateIndexWithUnsSupportedMetricFailsAsync() + { + var recordCollection = new PineconeVectorStoreRecordCollection( + this.Fixture.Client, + "bad-metric"); + + var message = (await Assert.ThrowsAsync(() => recordCollection.CreateCollectionAsync())).Message; + + Assert.Equal("Unsupported distance function 'just eyeball it' for VectorStoreRecordVectorProperty 'Embedding'.", message); + } + +#pragma warning disable CA1812 + private sealed record PineconeRecordWithUnsupportedMetric + { + [VectorStoreRecordKey] + public string Id { get; set; } = null!; + + [VectorStoreRecordData] + public string? Name { get; set; } + + [VectorStoreRecordVector(Dimensions: 5, IndexKind: null, DistanceFunction: "just eyeball it")] + public ReadOnlyMemory Embedding { get; set; } + } +#pragma warning restore CA1812 + + #endregion +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreTests.cs new file mode 100644 index 000000000000..d8c6f9452221 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreTests.cs @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.Pinecone; +using Microsoft.SemanticKernel.Data; +using SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; +using Xunit; +using Sdk = Pinecone; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone; + +[Collection("PineconeVectorStoreTests")] +[PineconeApiKeySetCondition] +public class PineconeVectorStoreTests(PineconeVectorStoreFixture fixture) : IClassFixture +{ + private PineconeVectorStoreFixture Fixture { get; } = fixture; + + [PineconeFact] + public async Task ListCollectionNamesAsync() + { + var collectionNames = await this.Fixture.VectorStore.ListCollectionNamesAsync().ToListAsync(); + + Assert.Equal([this.Fixture.IndexName], collectionNames); + } + + [PineconeFact] + public void CreateCollectionUsingFactory() + { + var vectorStore = new PineconeVectorStore( + this.Fixture.Client, + new PineconeVectorStoreOptions + { + VectorStoreCollectionFactory = new MyVectorStoreRecordCollectionFactory() + }); + + var factoryCollection = vectorStore.GetCollection(this.Fixture.IndexName); + + Assert.NotNull(factoryCollection); + Assert.Equal("factory" + this.Fixture.IndexName, factoryCollection.CollectionName); + } + + private sealed class MyVectorStoreRecordCollectionFactory : IPineconeVectorStoreRecordCollectionFactory + { + public IVectorStoreRecordCollection CreateVectorStoreRecordCollection( + Sdk.PineconeClient pineconeClient, + string name, + VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class + { + if (typeof(TKey) != typeof(string)) + { + throw new InvalidOperationException("Only string keys are supported."); + } + + return (new PineconeVectorStoreRecordCollection(pineconeClient, "factory" + name) as IVectorStoreRecordCollection)!; + } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/ITestCondition.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/ITestCondition.cs new file mode 100644 index 000000000000..361e13d60cd0 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/ITestCondition.cs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Threading.Tasks; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; + +public interface ITestCondition +{ + ValueTask IsMetAsync(); + + string SkipReason { get; } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeApiKeySetConditionAttribute.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeApiKeySetConditionAttribute.cs new file mode 100644 index 000000000000..ef144699fb7c --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeApiKeySetConditionAttribute.cs @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Threading.Tasks; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; + +[AttributeUsage(AttributeTargets.Method | AttributeTargets.Class)] +public sealed class PineconeApiKeySetConditionAttribute : Attribute, ITestCondition +{ + public ValueTask IsMetAsync() + { + var isMet = PineconeUserSecretsExtensions.ContainsPineconeApiKey(); + + return ValueTask.FromResult(isMet); + } + + public string SkipReason + => $"Pinecone API key was not specified in user secrets. Use the following command to set it: dotnet user-secrets set \"{PineconeUserSecretsExtensions.PineconeApiKeyUserSecretEntry}\" \"your_Pinecone_API_key\""; +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactAttribute.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactAttribute.cs new file mode 100644 index 000000000000..d4ebff8869e0 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactAttribute.cs @@ -0,0 +1,11 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Xunit; +using Xunit.Sdk; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; + +[AttributeUsage(AttributeTargets.Method)] +[XunitTestCaseDiscoverer("SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit.PineconeFactDiscoverer", "IntegrationTests")] +public sealed class PineconeFactAttribute : FactAttribute; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactDiscoverer.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactDiscoverer.cs new file mode 100644 index 000000000000..c1923ad72a2e --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactDiscoverer.cs @@ -0,0 +1,19 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Xunit.Abstractions; +using Xunit.Sdk; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; + +public class PineconeFactDiscoverer(IMessageSink messageSink) : FactDiscoverer(messageSink) +{ + protected override IXunitTestCase CreateTestCase( + ITestFrameworkDiscoveryOptions discoveryOptions, + ITestMethod testMethod, + IAttributeInfo factAttribute) + => new PineconeFactTestCase( + this.DiagnosticMessageSink, + discoveryOptions.MethodDisplayOrDefault(), + discoveryOptions.MethodDisplayOptionsOrDefault(), + testMethod); +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactTestCase.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactTestCase.cs new file mode 100644 index 000000000000..4a27031ff45b --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeFactTestCase.cs @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Threading; +using System.Threading.Tasks; +using Xunit.Abstractions; +using Xunit.Sdk; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; + +public sealed class PineconeFactTestCase : XunitTestCase +{ + [Obsolete("Called by the de-serializer; should only be called by deriving classes for de-serialization purposes")] + public PineconeFactTestCase() + { + } + + public PineconeFactTestCase( + IMessageSink diagnosticMessageSink, + TestMethodDisplay defaultMethodDisplay, + TestMethodDisplayOptions defaultMethodDisplayOptions, + ITestMethod testMethod, + object[]? testMethodArguments = null) + : base(diagnosticMessageSink, defaultMethodDisplay, defaultMethodDisplayOptions, testMethod, testMethodArguments) + { + } + + public override async Task RunAsync( + IMessageSink diagnosticMessageSink, + IMessageBus messageBus, + object[] constructorArguments, + ExceptionAggregator aggregator, + CancellationTokenSource cancellationTokenSource) + => await XunitTestCaseExtensions.TrySkipAsync(this, messageBus) + ? new RunSummary { Total = 1, Skipped = 1 } + : await base.RunAsync( + diagnosticMessageSink, + messageBus, + constructorArguments, + aggregator, + cancellationTokenSource); +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryAttribute.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryAttribute.cs new file mode 100644 index 000000000000..bff77c952c24 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryAttribute.cs @@ -0,0 +1,11 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Xunit; +using Xunit.Sdk; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; + +[AttributeUsage(AttributeTargets.Method)] +[XunitTestCaseDiscoverer("SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit.PineconeTheoryDiscoverer", "IntegrationTests")] +public sealed class PineconeTheoryAttribute : TheoryAttribute; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryDiscoverer.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryDiscoverer.cs new file mode 100644 index 000000000000..79a60afd69b8 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryDiscoverer.cs @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using Xunit.Abstractions; +using Xunit.Sdk; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; + +public class PineconeTheoryDiscoverer(IMessageSink messageSink) : TheoryDiscoverer(messageSink) +{ + protected override IEnumerable CreateTestCasesForTheory( + ITestFrameworkDiscoveryOptions discoveryOptions, + ITestMethod testMethod, + IAttributeInfo theoryAttribute) + { + yield return new PineconeTheoryTestCase( + this.DiagnosticMessageSink, + discoveryOptions.MethodDisplayOrDefault(), + discoveryOptions.MethodDisplayOptionsOrDefault(), + testMethod); + } + + protected override IEnumerable CreateTestCasesForDataRow( + ITestFrameworkDiscoveryOptions discoveryOptions, + ITestMethod testMethod, + IAttributeInfo theoryAttribute, + object[] dataRow) + { + yield return new PineconeFactTestCase( + this.DiagnosticMessageSink, + discoveryOptions.MethodDisplayOrDefault(), + discoveryOptions.MethodDisplayOptionsOrDefault(), + testMethod, + dataRow); + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryTestCase.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryTestCase.cs new file mode 100644 index 000000000000..1a9ebff92e1f --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/PineconeTheoryTestCase.cs @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Threading; +using System.Threading.Tasks; +using Xunit.Abstractions; +using Xunit.Sdk; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; + +public sealed class PineconeTheoryTestCase : XunitTheoryTestCase +{ + [Obsolete("Called by the de-serializer; should only be called by deriving classes for de-serialization purposes")] + public PineconeTheoryTestCase() + { + } + + public PineconeTheoryTestCase( + IMessageSink diagnosticMessageSink, + TestMethodDisplay defaultMethodDisplay, + TestMethodDisplayOptions defaultMethodDisplayOptions, + ITestMethod testMethod) + : base(diagnosticMessageSink, defaultMethodDisplay, defaultMethodDisplayOptions, testMethod) + { + } + + public override async Task RunAsync( + IMessageSink diagnosticMessageSink, + IMessageBus messageBus, + object[] constructorArguments, + ExceptionAggregator aggregator, + CancellationTokenSource cancellationTokenSource) + => await XunitTestCaseExtensions.TrySkipAsync(this, messageBus) + ? new RunSummary { Total = 1, Skipped = 1 } + : await base.RunAsync( + diagnosticMessageSink, + messageBus, + constructorArguments, + aggregator, + cancellationTokenSource); +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/XunitTestCaseExtensions.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/XunitTestCaseExtensions.cs new file mode 100644 index 000000000000..75d22e4e5ae9 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/Xunit/XunitTestCaseExtensions.cs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Xunit.Abstractions; +using Xunit.Sdk; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Pinecone.Xunit; + +public static class XunitTestCaseExtensions +{ + private static readonly ConcurrentDictionary> s_typeAttributes = new(); + private static readonly ConcurrentDictionary> s_assemblyAttributes = new(); + + public static async ValueTask TrySkipAsync(XunitTestCase testCase, IMessageBus messageBus) + { + var method = testCase.Method; + var type = testCase.TestMethod.TestClass.Class; + var assembly = type.Assembly; + + var skipReasons = new List(); + var attributes = + s_assemblyAttributes.GetOrAdd( + assembly.Name, + a => assembly.GetCustomAttributes(typeof(ITestCondition)).ToList()) + .Concat( + s_typeAttributes.GetOrAdd( + type.Name, + t => type.GetCustomAttributes(typeof(ITestCondition)).ToList())) + .Concat(method.GetCustomAttributes(typeof(ITestCondition))) + .OfType() + .Select(attributeInfo => (ITestCondition)attributeInfo.Attribute); + + foreach (var attribute in attributes) + { + if (!await attribute.IsMetAsync()) + { + skipReasons.Add(attribute.SkipReason); + } + } + + if (skipReasons.Count > 0) + { + messageBus.QueueMessage( + new TestSkipped(new XunitTest(testCase, testCase.DisplayName), string.Join(Environment.NewLine, skipReasons))); + + return true; + } + + return false; + } +} diff --git a/dotnet/src/IntegrationTests/IntegrationTests.csproj b/dotnet/src/IntegrationTests/IntegrationTests.csproj index 87924fd854e4..55a6ac6d1006 100644 --- a/dotnet/src/IntegrationTests/IntegrationTests.csproj +++ b/dotnet/src/IntegrationTests/IntegrationTests.csproj @@ -44,6 +44,7 @@ + @@ -59,6 +60,7 @@ + diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index 19dfe2b882d1..9048e4439344 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -254,6 +254,23 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT /// A value indicating whether versions of all the types should also be supported. /// Thrown if any of the properties are not in the given set of types. public static void VerifyPropertyTypes(List properties, HashSet supportedTypes, string propertyCategoryDescription, bool? supportEnumerable = false) + { + var supportedEnumerableTypes = supportEnumerable == true + ? supportedTypes + : []; + + VerifyPropertyTypes(properties, supportedTypes, propertyCategoryDescription, supportedEnumerableTypes); + } + + /// + /// Verify that the given properties are of the supported types. + /// + /// The properties to check. + /// A set of supported types that the provided properties may have. + /// A description of the category of properties being checked. Used for error messaging. + /// A set of supported types that the provided enumerable properties may use as their element type. + /// Thrown if any of the properties are not in the given set of types. + public static void VerifyPropertyTypes(List properties, HashSet supportedTypes, string propertyCategoryDescription, HashSet supportedEnumerableTypes) { foreach (var property in properties) { @@ -264,39 +281,29 @@ public static void VerifyPropertyTypes(List properties, HashSet 0) { - if (property.PropertyType is IEnumerable) + var typeToCheck = property.PropertyType switch { - typeToCheck = typeof(object); - } - else if (property.PropertyType.IsArray) - { - typeToCheck = property.PropertyType.GetElementType()!; - } - else if (property.PropertyType.IsGenericType && property.PropertyType.GetGenericTypeDefinition() == typeof(IEnumerable<>)) + IEnumerable => typeof(object), + var enumerableType when enumerableType.IsGenericType && enumerableType.GetGenericTypeDefinition() == typeof(IEnumerable<>) => enumerableType.GetGenericArguments()[0], + var arrayType when arrayType.IsArray => arrayType.GetElementType()!, + var interfaceType when interfaceType.GetInterfaces().FirstOrDefault(i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(IEnumerable<>)) is Type enumerableInterface => + enumerableInterface.GetGenericArguments()[0], + _ => property.PropertyType + }; + + if (!supportedEnumerableTypes.Contains(typeToCheck)) { - typeToCheck = property.PropertyType.GetGenericArguments()[0]; - } - else if (property.PropertyType.GetInterfaces().FirstOrDefault(i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(IEnumerable<>)) is Type enumerableInterface) - { - typeToCheck = enumerableInterface.GetGenericArguments()[0]; - } - else - { - typeToCheck = property.PropertyType; + var supportedEnumerableElementTypesString = string.Join(", ", supportedEnumerableTypes!.Select(t => t.FullName)); + throw new ArgumentException($"Enumerable {propertyCategoryDescription} properties must have one of the supported element types: {supportedEnumerableElementTypesString}. Element type of the property '{property.Name}' is {typeToCheck.FullName}."); } } else { - typeToCheck = property.PropertyType; - } - - if (!supportedTypes.Contains(typeToCheck)) - { + // if we got here, we know the type is not supported var supportedTypesString = string.Join(", ", supportedTypes.Select(t => t.FullName)); - throw new ArgumentException($"{propertyCategoryDescription} properties must be one of the supported types: {supportedTypesString}. Type of {property.Name} is {property.PropertyType.FullName}."); + throw new ArgumentException($"{propertyCategoryDescription} properties must be one of the supported types: {supportedTypesString}. Type of the property '{property.Name}' is {property.PropertyType.FullName}."); } } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs index f5b190417c1b..643a1cfb3233 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs @@ -34,7 +34,7 @@ public VectorStoreRecordVectorAttribute(int Dimensions) /// The number of dimensions that the vector has. /// The kind of index to use. /// The distance function to use when comparing vectors. - public VectorStoreRecordVectorAttribute(int Dimensions, string IndexKind, string DistanceFunction) + public VectorStoreRecordVectorAttribute(int Dimensions, string? IndexKind, string? DistanceFunction) { this.Dimensions = Dimensions; this.IndexKind = IndexKind; diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs index 52d181337da3..9163ebb39c87 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs @@ -11,7 +11,7 @@ namespace Microsoft.SemanticKernel.Data; public static class DistanceFunction { /// - /// The cosine (angular) similarty between two vectors. + /// The cosine (angular) similarity between two vectors. /// /// /// Measures only the angle between the two vectors, without taking into account the length of the vectors. @@ -23,7 +23,7 @@ public static class DistanceFunction public const string CosineSimilarity = nameof(CosineSimilarity); /// - /// The cosine (angular) similarty between two vectors. + /// The cosine (angular) similarity between two vectors. /// /// /// CosineDistance = 1 - CosineSimilarity. diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index b804f35a442d..7e4ca5519a1f 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -211,7 +211,7 @@ public void VerifyPropertyTypesFailsForDisallowedTypes() var ex = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(int), typeof(float)], "Data")); // Assert. - Assert.Equal("Data properties must be one of the supported types: System.Int32, System.Single. Type of Data is System.String.", ex.Message); + Assert.Equal("Data properties must be one of the supported types: System.Int32, System.Single. Type of the property 'Data' is System.String.", ex.Message); } [Fact] From 718dc5b6c679c361edb76d473e07a8ba9ac62654 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 24 Jul 2024 14:10:22 +0100 Subject: [PATCH 32/48] .Net: RedisJson rename, adr rename and AzureAISearch integration tests disable (#7424) ### Motivation and Context As part of the MemoryStore redesign making some changes to get ready to merge into main. Redis supports two modes when storing vectors. JSON and HashSets. I have an implementation for HashSets but it doesn't have tests yet. I do intend to add tests later and get it merged, but in the mean time, need to make sure that the JSON implementation is named in a way that will make it clear how it differs from the Hashset implementation. ### Description Changes included: 1. Renaming the Redis record collection class to include JSON in the name, since this implementation is specific to the JSON storage option in Redis. 2. Renaming the ADR document to match the latest free number. 3. Disabling AzureAISearch integration tests, since they require infra to be setup and isn't working as part of the build. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...md => 0050-updated-vector-store-design.md} | 0 ...> RedisJsonVectorStoreRecordCollection.cs} | 18 +++---- ...JsonVectorStoreRecordCollectionOptions.cs} | 4 +- ...cs => RedisJsonVectorStoreRecordMapper.cs} | 6 +-- .../RedisVectorStore.cs | 2 +- .../RedisVectorStoreOptions.cs | 2 +- ...isJsonVectorStoreRecordCollectionTests.cs} | 18 +++---- ... RedisJsonVectorStoreRecordMapperTests.cs} | 8 ++-- .../RedisVectorStoreTests.cs | 2 +- ...ISearchVectorStoreRecordCollectionTests.cs | 2 +- .../AzureAISearchVectorStoreTests.cs | 2 +- ...isJsonVectorStoreRecordCollectionTests.cs} | 48 +++++++++---------- 12 files changed, 56 insertions(+), 56 deletions(-) rename docs/decisions/{0045-updated-vector-store-design.md => 0050-updated-vector-store-design.md} (100%) rename dotnet/src/Connectors/Connectors.Memory.Redis/{RedisVectorStoreRecordCollection.cs => RedisJsonVectorStoreRecordCollection.cs} (95%) rename dotnet/src/Connectors/Connectors.Memory.Redis/{RedisVectorStoreRecordCollectionOptions.cs => RedisJsonVectorStoreRecordCollectionOptions.cs} (93%) rename dotnet/src/Connectors/Connectors.Memory.Redis/{RedisVectorStoreRecordMapper.cs => RedisJsonVectorStoreRecordMapper.cs} (91%) rename dotnet/src/Connectors/Connectors.Redis.UnitTests/{RedisVectorStoreRecordCollectionTests.cs => RedisJsonVectorStoreRecordCollectionTests.cs} (95%) rename dotnet/src/Connectors/Connectors.Redis.UnitTests/{RedisVectorStoreRecordMapperTests.cs => RedisJsonVectorStoreRecordMapperTests.cs} (90%) rename dotnet/src/IntegrationTests/Connectors/Memory/Redis/{RedisVectorStoreRecordCollectionTests.cs => RedisJsonVectorStoreRecordCollectionTests.cs} (82%) diff --git a/docs/decisions/0045-updated-vector-store-design.md b/docs/decisions/0050-updated-vector-store-design.md similarity index 100% rename from docs/decisions/0045-updated-vector-store-design.md rename to docs/decisions/0050-updated-vector-store-design.md diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs similarity index 95% rename from dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs rename to dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs index fdb293b429b6..54701a96e7ed 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs @@ -19,11 +19,11 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; /// -/// Service for storing and retrieving vector records, that uses Redis as the underlying storage. +/// Service for storing and retrieving vector records, that uses Redis JSON as the underlying storage. /// /// The data model to use for adding, updating and retrieving data from storage. #pragma warning disable CA1711 // Identifiers should not have incorrect suffix -public sealed class RedisVectorStoreRecordCollection : IVectorStoreRecordCollection +public sealed class RedisJsonVectorStoreRecordCollection : IVectorStoreRecordCollection #pragma warning restore CA1711 // Identifiers should not have incorrect suffix where TRecord : class { @@ -48,11 +48,11 @@ public sealed class RedisVectorStoreRecordCollection : IVectorStoreReco /// The Redis database to read/write records from. private readonly IDatabase _database; - /// The name of the collection that this will access. + /// The name of the collection that this will access. private readonly string _collectionName; /// Optional configuration options for this class. - private readonly RedisVectorStoreRecordCollectionOptions _options; + private readonly RedisJsonVectorStoreRecordCollectionOptions _options; /// A definition of the current storage model. private readonly VectorStoreRecordDefinition _vectorStoreRecordDefinition; @@ -73,13 +73,13 @@ public sealed class RedisVectorStoreRecordCollection : IVectorStoreReco private readonly JsonSerializerOptions _jsonSerializerOptions; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The Redis database to read/write records from. - /// The name of the collection that this will access. + /// The name of the collection that this will access. /// Optional configuration options for this class. /// Throw when parameters are invalid. - public RedisVectorStoreRecordCollection(IDatabase database, string collectionName, RedisVectorStoreRecordCollectionOptions? options = null) + public RedisJsonVectorStoreRecordCollection(IDatabase database, string collectionName, RedisJsonVectorStoreRecordCollectionOptions? options = null) { // Verify. Verify.NotNull(database); @@ -88,7 +88,7 @@ public RedisVectorStoreRecordCollection(IDatabase database, string collectionNam // Assign. this._database = database; this._collectionName = collectionName; - this._options = options ?? new RedisVectorStoreRecordCollectionOptions(); + this._options = options ?? new RedisJsonVectorStoreRecordCollectionOptions(); this._jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); @@ -122,7 +122,7 @@ public RedisVectorStoreRecordCollection(IDatabase database, string collectionNam } else { - this._mapper = new RedisVectorStoreRecordMapper(this._keyJsonPropertyName); + this._mapper = new RedisJsonVectorStoreRecordMapper(this._keyJsonPropertyName); } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollectionOptions.cs similarity index 93% rename from dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs rename to dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollectionOptions.cs index 3e3d647d20e9..382484e9cea9 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordCollectionOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollectionOptions.cs @@ -7,9 +7,9 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; /// -/// Options when creating a . +/// Options when creating a . /// -public sealed class RedisVectorStoreRecordCollectionOptions +public sealed class RedisJsonVectorStoreRecordCollectionOptions where TRecord : class { /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordMapper.cs similarity index 91% rename from dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs rename to dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordMapper.cs index bf9c65bdccfa..cb48b61902bc 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordMapper.cs @@ -10,17 +10,17 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; /// Class for mapping between a json node stored in redis, and the consumer data model. /// /// The consumer data model to map to or from. -internal sealed class RedisVectorStoreRecordMapper : IVectorStoreRecordMapper +internal sealed class RedisJsonVectorStoreRecordMapper : IVectorStoreRecordMapper where TConsumerDataModel : class { /// The name of the temporary json property that the key field will be serialized / parsed from. private readonly string _keyFieldJsonPropertyName; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// The name of the key field on the model when serialized to json. - public RedisVectorStoreRecordMapper(string keyFieldJsonPropertyName) + public RedisJsonVectorStoreRecordMapper(string keyFieldJsonPropertyName) { Verify.NotNullOrWhiteSpace(keyFieldJsonPropertyName); this._keyFieldJsonPropertyName = keyFieldJsonPropertyName; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs index 498e4be9bb94..5dfd4aa69d97 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs @@ -53,7 +53,7 @@ public IVectorStoreRecordCollection GetCollection( return this._options.VectorStoreCollectionFactory.CreateVectorStoreRecordCollection(this._database, name, vectorStoreRecordDefinition); } - var directlyCreatedStore = new RedisVectorStoreRecordCollection(this._database, name, new RedisVectorStoreRecordCollectionOptions() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; + var directlyCreatedStore = new RedisJsonVectorStoreRecordCollection(this._database, name, new RedisJsonVectorStoreRecordCollectionOptions() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; return directlyCreatedStore!; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs index b36747bb29e4..290aa4399af0 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs @@ -8,7 +8,7 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; public sealed class RedisVectorStoreOptions { /// - /// An optional factory to use for constructing instances, if custom options are required. + /// An optional factory to use for constructing instances, if custom options are required. /// public IRedisVectorStoreRecordCollectionFactory? VectorStoreCollectionFactory { get; init; } } diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs similarity index 95% rename from dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs rename to dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs index 2ecc113c60f6..4e977163b900 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs @@ -14,9 +14,9 @@ namespace Microsoft.SemanticKernel.Connectors.Redis.UnitTests; /// -/// Contains tests for the class. +/// Contains tests for the class. /// -public class RedisVectorStoreRecordCollectionTests +public class RedisJsonVectorStoreRecordCollectionTests { private const string TestCollectionName = "testcollection"; private const string TestRecordKey1 = "testid1"; @@ -24,7 +24,7 @@ public class RedisVectorStoreRecordCollectionTests private readonly Mock _redisDatabaseMock; - public RedisVectorStoreRecordCollectionTests() + public RedisJsonVectorStoreRecordCollectionTests() { this._redisDatabaseMock = new Mock(MockBehavior.Strict); @@ -46,7 +46,7 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN { SetupExecuteMock(this._redisDatabaseMock, new RedisServerException("Unknown index name")); } - var sut = new RedisVectorStoreRecordCollection( + var sut = new RedisJsonVectorStoreRecordCollection( this._redisDatabaseMock.Object, collectionName); @@ -69,7 +69,7 @@ public async Task CanCreateCollectionAsync() { // Arrange. SetupExecuteMock(this._redisDatabaseMock, string.Empty); - var sut = new RedisVectorStoreRecordCollection(this._redisDatabaseMock.Object, TestCollectionName); + var sut = new RedisJsonVectorStoreRecordCollection(this._redisDatabaseMock.Object, TestCollectionName); // Act. await sut.CreateCollectionAsync(); @@ -213,7 +213,7 @@ public async Task CanGetRecordWithCustomMapperAsync() .Returns(CreateModel(TestRecordKey1, true)); // Arrange target with custom mapper. - var sut = new RedisVectorStoreRecordCollection( + var sut = new RedisJsonVectorStoreRecordCollection( this._redisDatabaseMock.Object, TestCollectionName, new() @@ -360,7 +360,7 @@ public async Task CanUpsertRecordWithCustomMapperAsync() .Returns((TestRecordKey1, JsonNode.Parse(jsonNode)!)); // Arrange target with custom mapper. - var sut = new RedisVectorStoreRecordCollection( + var sut = new RedisJsonVectorStoreRecordCollection( this._redisDatabaseMock.Object, TestCollectionName, new() @@ -380,9 +380,9 @@ public async Task CanUpsertRecordWithCustomMapperAsync() Times.Once); } - private RedisVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) + private RedisJsonVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) { - return new RedisVectorStoreRecordCollection( + return new RedisJsonVectorStoreRecordCollection( this._redisDatabaseMock.Object, TestCollectionName, new() diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs similarity index 90% rename from dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs rename to dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs index 4c8e9c5cc792..328ba02e4fb6 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs @@ -10,15 +10,15 @@ namespace SemanticKernel.Connectors.Redis.UnitTests; /// -/// Contains tests for the class. +/// Contains tests for the class. /// -public sealed class RedisVectorStoreRecordMapperTests +public sealed class RedisJsonVectorStoreRecordMapperTests { [Fact] public void MapsAllFieldsFromDataToStorageModel() { // Arrange. - var sut = new RedisVectorStoreRecordMapper("Key"); + var sut = new RedisJsonVectorStoreRecordMapper("Key"); // Act. var actual = sut.MapFromDataToStorageModel(CreateModel("test key")); @@ -37,7 +37,7 @@ public void MapsAllFieldsFromDataToStorageModel() public void MapsAllFieldsFromStorageToDataModel() { // Arrange. - var sut = new RedisVectorStoreRecordMapper("Key"); + var sut = new RedisJsonVectorStoreRecordMapper("Key"); // Act. var actual = sut.MapFromStorageToDataModel(("test key", CreateJsonNode()), new()); diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs index 91ecd526dcfd..fc25d4c8c151 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs @@ -38,7 +38,7 @@ public void GetCollectionReturnsCollection() // Assert. Assert.NotNull(actual); - Assert.IsType>>(actual); + Assert.IsType>>(actual); } [Fact] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs index 84d24af2bd1f..9ac37d122285 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -22,7 +22,7 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; public sealed class AzureAISearchVectorStoreRecordCollectionTests(ITestOutputHelper output, AzureAISearchVectorStoreFixture fixture) { // If null, all tests will be enabled - private const string SkipReason = null; //"Requires Azure AI Search Service instance up and running"; + private const string SkipReason = "Requires Azure AI Search Service instance up and running"; [Theory(Skip = SkipReason)] [InlineData(true)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreTests.cs index 1b198da2b2a0..7bda8cb0fff9 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreTests.cs @@ -17,7 +17,7 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.AzureAISearch; public class AzureAISearchVectorStoreTests(ITestOutputHelper output, AzureAISearchVectorStoreFixture fixture) { // If null, all tests will be enabled - private const string SkipReason = null; //"Requires Azure AI Search Service instance up and running"; + private const string SkipReason = "Requires Azure AI Search Service instance up and running"; [Fact(Skip = SkipReason)] public async Task ItCanGetAListOfExistingCollectionNamesAsync() diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs similarity index 82% rename from dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs rename to dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs index b53321433daf..938bbefd119d 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs @@ -15,12 +15,12 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.Redis; /// -/// Contains tests for the class. +/// Contains tests for the class. /// /// Used for logging. /// Redis setup and teardown. [Collection("RedisVectorStoreCollection")] -public sealed class RedisVectorStoreRecordCollectionTests(ITestOutputHelper output, RedisVectorStoreFixture fixture) +public sealed class RedisJsonVectorStoreRecordCollectionTests(ITestOutputHelper output, RedisVectorStoreFixture fixture) { [Theory] [InlineData("hotels", true)] @@ -28,7 +28,7 @@ public sealed class RedisVectorStoreRecordCollectionTests(ITestOutputHelper outp public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) { // Arrange. - var sut = new RedisVectorStoreRecordCollection(fixture.Database, collectionName); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, collectionName); // Act. var actual = await sut.CollectionExistsAsync(); @@ -46,12 +46,12 @@ public async Task ItCanCreateACollectionAsync(bool useRecordDefinition) var collectionNamePostfix = useRecordDefinition ? "WithDefinition" : "WithType"; var testCollectionName = $"createtest{collectionNamePostfix}"; - var options = new RedisVectorStoreRecordCollectionOptions + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, testCollectionName, options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, testCollectionName, options); // Act await sut.CreateCollectionAsync(); @@ -76,7 +76,7 @@ public async Task ItCanDeleteCollectionAsync() createParams.AddPrefix(tempCollectionName); await fixture.Database.FT().CreateAsync(tempCollectionName, createParams, schema); - var sut = new RedisVectorStoreRecordCollection(fixture.Database, tempCollectionName); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, tempCollectionName); // Act await sut.DeleteCollectionAsync(); @@ -91,12 +91,12 @@ public async Task ItCanDeleteCollectionAsync() public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition) { // Arrange. - var options = new RedisVectorStoreRecordCollectionOptions + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); Hotel record = CreateTestHotel("Upsert-1", 1); // Act. @@ -128,12 +128,12 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition public async Task ItCanUpsertManyDocumentsToVectorStoreAsync(bool useRecordDefinition) { // Arrange. - var options = new RedisVectorStoreRecordCollectionOptions + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act. var results = sut.UpsertBatchAsync( @@ -167,12 +167,12 @@ public async Task ItCanUpsertManyDocumentsToVectorStoreAsync(bool useRecordDefin public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool useRecordDefinition) { // Arrange. - var options = new RedisVectorStoreRecordCollectionOptions + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act. var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); @@ -204,8 +204,8 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool public async Task ItCanGetManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. @@ -227,8 +227,8 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() public async Task ItFailsToGetDocumentsWithInvalidSchemaAsync() { // Arrange. - var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act & Assert. await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-4-Invalid", new GetRecordOptions { IncludeVectors = true })); @@ -240,12 +240,12 @@ public async Task ItFailsToGetDocumentsWithInvalidSchemaAsync() public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefinition) { // Arrange. - var options = new RedisVectorStoreRecordCollectionOptions + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); var address = new HotelAddress { City = "Seattle", Country = "USA" }; var record = new Hotel { @@ -271,8 +271,8 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() { // Arrange - var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); await sut.UpsertAsync(CreateTestHotel("RemoveMany-1", 1)); await sut.UpsertAsync(CreateTestHotel("RemoveMany-2", 2)); await sut.UpsertAsync(CreateTestHotel("RemoveMany-3", 3)); @@ -291,8 +291,8 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() { // Arrange - var options = new RedisVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act & Assert Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); @@ -302,12 +302,12 @@ public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() public async Task ItThrowsMappingExceptionForFailedMapperAsync() { // Arrange - var options = new RedisVectorStoreRecordCollectionOptions + var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true, JsonNodeCustomMapper = new FailingMapper() }; - var sut = new RedisVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); From 11dce8f62ac0afe645362c42f883aa9bf504a44e Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 25 Jul 2024 09:58:07 +0100 Subject: [PATCH 33/48] .Net: Fix collection create field naming bug and add tests to cover scenario. (#7428) ### Motivation and Context For some connectors users can use `JsonPropertyName` to indicate that a different name should be used in storage than on the model. In other cases users can use `StoragePropertyName` in the definition. We need to use this information when creating indexes / collections to name the properties in storage correctly, otherwise there will be a mismatch when reading/writing. ### Description 1. Updating create logic to use the right storage or json property names everywhere. 2. Adding tests to specifically cover these scenarios. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...VectorStoreCollectionCreateMappingTests.cs | 43 ++++++----- ...earchVectorStoreCollectionCreateMapping.cs | 21 +++--- ...zureAISearchVectorStoreRecordCollection.cs | 48 +++++++++---- .../RedisJsonVectorStoreRecordCollection.cs | 29 +++++--- ...RedisVectorStoreCollectionCreateMapping.cs | 12 ++-- .../QdrantVectorStoreRecordCollectionTests.cs | 36 +++++++--- ...disJsonVectorStoreRecordCollectionTests.cs | 72 +++++++++++++++---- ...VectorStoreCollectionCreateMappingTests.cs | 20 ++++-- ...ISearchVectorStoreRecordCollectionTests.cs | 26 +++++-- .../QdrantVectorStoreRecordCollectionTests.cs | 23 ++++-- ...disJsonVectorStoreRecordCollectionTests.cs | 32 +++++++-- 11 files changed, 270 insertions(+), 92 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs index da7733720551..debafaa60e35 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs @@ -19,13 +19,14 @@ public void MapKeyFieldCreatesSearchableField() { // Arrange var keyProperty = new VectorStoreRecordKeyProperty("testkey"); + var storagePropertyName = "test_key"; // Act - var result = AzureAISearchVectorStoreCollectionCreateMapping.MapKeyField(keyProperty); + var result = AzureAISearchVectorStoreCollectionCreateMapping.MapKeyField(keyProperty, storagePropertyName); // Assert Assert.NotNull(result); - Assert.Equal(keyProperty.PropertyName, result.Name); + Assert.Equal(storagePropertyName, result.Name); Assert.True(result.IsKey); Assert.True(result.IsFilterable); } @@ -37,14 +38,15 @@ public void MapStringDataFieldCreatesSearchableField(bool isFilterable) { // Arrange var dataProperty = new VectorStoreRecordDataProperty("testdata") { IsFilterable = isFilterable, PropertyType = typeof(string) }; + var storagePropertyName = "test_data"; // Act - var result = AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty); + var result = AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, storagePropertyName); // Assert Assert.NotNull(result); Assert.IsType(result); - Assert.Equal(dataProperty.PropertyName, result.Name); + Assert.Equal(storagePropertyName, result.Name); Assert.False(result.IsKey); Assert.Equal(isFilterable, result.IsFilterable); } @@ -56,14 +58,15 @@ public void MapDataFieldCreatesSimpleField(bool isFilterable) { // Arrange var dataProperty = new VectorStoreRecordDataProperty("testdata") { IsFilterable = isFilterable, PropertyType = typeof(int) }; + var storagePropertyName = "test_data"; // Act - var result = AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty); + var result = AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, storagePropertyName); // Assert Assert.NotNull(result); Assert.IsType(result); - Assert.Equal(dataProperty.PropertyName, result.Name); + Assert.Equal(storagePropertyName, result.Name); Assert.Equal(SearchFieldDataType.Int32, result.Type); Assert.False(result.IsKey); Assert.Equal(isFilterable, result.IsFilterable); @@ -74,9 +77,10 @@ public void MapDataFieldFailsForNullType() { // Arrange var dataProperty = new VectorStoreRecordDataProperty("testdata"); + var storagePropertyName = "test_data"; // Act & Assert - Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty)); + Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, storagePropertyName)); } [Fact] @@ -84,24 +88,25 @@ public void MapVectorFieldCreatesVectorSearchField() { // Arrange var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.DotProductSimilarity }; + var storagePropertyName = "test_vector"; // Act - var (vectorSearchField, algorithmConfiguration, vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty); + var (vectorSearchField, algorithmConfiguration, vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty, storagePropertyName); // Assert Assert.NotNull(vectorSearchField); Assert.NotNull(algorithmConfiguration); Assert.NotNull(vectorSearchProfile); - Assert.Equal(vectorProperty.PropertyName, vectorSearchField.Name); + Assert.Equal(storagePropertyName, vectorSearchField.Name); Assert.Equal(vectorProperty.Dimensions, vectorSearchField.VectorSearchDimensions); - Assert.Equal("testvectorAlgoConfig", algorithmConfiguration.Name); + Assert.Equal("test_vectorAlgoConfig", algorithmConfiguration.Name); Assert.IsType(algorithmConfiguration); var flatConfig = algorithmConfiguration as ExhaustiveKnnAlgorithmConfiguration; Assert.Equal(VectorSearchAlgorithmMetric.DotProduct, flatConfig!.Parameters.Metric); - Assert.Equal("testvectorProfile", vectorSearchProfile.Name); - Assert.Equal("testvectorAlgoConfig", vectorSearchProfile.AlgorithmConfigurationName); + Assert.Equal("test_vectorProfile", vectorSearchProfile.Name); + Assert.Equal("test_vectorAlgoConfig", vectorSearchProfile.AlgorithmConfigurationName); } [Theory] @@ -111,12 +116,13 @@ public void MapVectorFieldCreatesExpectedAlgoConfigTypes(string indexKind, Type { // Arrange var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10, IndexKind = indexKind, DistanceFunction = DistanceFunction.DotProductSimilarity }; + var storagePropertyName = "test_vector"; // Act - var (vectorSearchField, algorithmConfiguration, vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty); + var (vectorSearchField, algorithmConfiguration, vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty, storagePropertyName); // Assert - Assert.Equal("testvectorAlgoConfig", algorithmConfiguration.Name); + Assert.Equal("test_vectorAlgoConfig", algorithmConfiguration.Name); Assert.Equal(algoConfigType, algorithmConfiguration.GetType()); } @@ -125,9 +131,10 @@ public void MapVectorFieldDefaultsToHsnwAndCosine() { // Arrange var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10 }; + var storagePropertyName = "test_vector"; // Act - var (vectorSearchField, algorithmConfiguration, vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty); + var (vectorSearchField, algorithmConfiguration, vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty, storagePropertyName); // Assert Assert.IsType(algorithmConfiguration); @@ -140,9 +147,10 @@ public void MapVectorFieldThrowsForUnsupportedDistanceFunction() { // Arrange var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10, DistanceFunction = DistanceFunction.ManhattanDistance }; + var storagePropertyName = "test_vector"; // Act - Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty)); + Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty, storagePropertyName)); } [Fact] @@ -150,9 +158,10 @@ public void MapVectorFieldThrowsForMissingDimensionsCount() { // Arrange var vectorProperty = new VectorStoreRecordVectorProperty("testvector"); + var storagePropertyName = "test_vector"; // Act - Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty)); + Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty, storagePropertyName)); } [Theory] diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs index 3df9084a133b..d4972b5a62fd 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs @@ -18,23 +18,25 @@ internal static class AzureAISearchVectorStoreCollectionCreateMapping /// Map from a to an Azure AI Search . /// /// The key property definition. + /// The name of the property in storage. /// The for the provided property definition. - public static SearchableField MapKeyField(VectorStoreRecordKeyProperty keyProperty) + public static SearchableField MapKeyField(VectorStoreRecordKeyProperty keyProperty, string storagePropertyName) { - return new SearchableField(keyProperty.PropertyName) { IsKey = true, IsFilterable = true }; + return new SearchableField(storagePropertyName) { IsKey = true, IsFilterable = true }; } /// /// Map from a to an Azure AI Search . /// /// The data property definition. + /// The name of the property in storage. /// The for the provided property definition. /// Throws when the definition is missing required information. - public static SimpleField MapDataField(VectorStoreRecordDataProperty dataProperty) + public static SimpleField MapDataField(VectorStoreRecordDataProperty dataProperty, string storagePropertyName) { if (dataProperty.PropertyType == typeof(string)) { - return new SearchableField(dataProperty.PropertyName) { IsFilterable = dataProperty.IsFilterable }; + return new SearchableField(storagePropertyName) { IsFilterable = dataProperty.IsFilterable }; } if (dataProperty.PropertyType is null) @@ -42,16 +44,17 @@ public static SimpleField MapDataField(VectorStoreRecordDataProperty dataPropert throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.PropertyName}' must be set to create a collection."); } - return new SimpleField(dataProperty.PropertyName, AzureAISearchVectorStoreCollectionCreateMapping.GetSDKFieldDataType(dataProperty.PropertyType)) { IsFilterable = dataProperty.IsFilterable }; + return new SimpleField(storagePropertyName, AzureAISearchVectorStoreCollectionCreateMapping.GetSDKFieldDataType(dataProperty.PropertyType)) { IsFilterable = dataProperty.IsFilterable }; } /// /// Map form a to an Azure AI Search and generate the required index configuration. /// /// The vector property definition. + /// The name of the property in storage. /// The and required index configuration. /// Throws when the definition is missing required information, or unsupported options are configured. - public static (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfiguration algorithmConfiguration, VectorSearchProfile vectorSearchProfile) MapVectorField(VectorStoreRecordVectorProperty vectorProperty) + public static (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfiguration algorithmConfiguration, VectorSearchProfile vectorSearchProfile) MapVectorField(VectorStoreRecordVectorProperty vectorProperty, string storagePropertyName) { if (vectorProperty.Dimensions is not > 0) { @@ -60,8 +63,8 @@ public static (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfigu // Build a name for the profile and algorithm configuration based on the property name // since we'll just create a separate one for each vector property. - var vectorSearchProfileName = $"{vectorProperty.PropertyName}Profile"; - var algorithmConfigName = $"{vectorProperty.PropertyName}AlgoConfig"; + var vectorSearchProfileName = $"{storagePropertyName}Profile"; + var algorithmConfigName = $"{storagePropertyName}AlgoConfig"; // Read the vector index settings from the property definition and create the right index configuration. var indexKind = AzureAISearchVectorStoreCollectionCreateMapping.GetSKIndexKind(vectorProperty); @@ -75,7 +78,7 @@ public static (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfigu }; var vectorSearchProfile = new VectorSearchProfile(vectorSearchProfileName, algorithmConfigName); - return (new VectorSearchField(vectorProperty.PropertyName, vectorProperty.Dimensions.Value, vectorSearchProfileName), algorithmConfiguration, vectorSearchProfile); + return (new VectorSearchField(storagePropertyName, vectorProperty.Dimensions.Value, vectorSearchProfileName), algorithmConfiguration, vectorSearchProfile); } /// diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index 679b663f87b1..f096db6918ed 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -81,11 +81,14 @@ public sealed class AzureAISearchVectorStoreRecordCollection : IVectorS /// A definition of the current storage model. private readonly VectorStoreRecordDefinition _vectorStoreRecordDefinition; - /// The name of the key field for the collections that this class is used with. - private readonly string _keyPropertyName; + /// The storage name of the key field for the collections that this class is used with. + private readonly string _keyStoragePropertyName; - /// The names of all non vector fields on the current model. - private readonly List _nonVectorPropertyNames; + /// The storage names of all non vector fields on the current model. + private readonly List _nonVectorStoragePropertyNames = new(); + + /// A dictionary that maps from a property name to the storage name that should be used when serializing it to json for data and vector properties. + private readonly Dictionary _storagePropertyNames = new(); /// /// Initializes a new instance of the class. @@ -119,15 +122,30 @@ public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexCli properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); } - // Validate property types and store for later use. + // Validate property types. var jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); - this._keyPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, properties.keyProperty); - // Build the list of property names from the current model that are either key or data fields. - this._nonVectorPropertyNames = properties.dataProperties.Concat([properties.keyProperty]).Select(x => VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, x)).ToList(); + // Get storage name for key property and store for later use. + this._keyStoragePropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, properties.keyProperty); + this._nonVectorStoragePropertyNames.Add(this._keyStoragePropertyName); + + // Get storage names for data properties and store for later use. + foreach (var property in properties.dataProperties) + { + var jsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, property); + this._storagePropertyNames[property.Name] = jsonPropertyName; + this._nonVectorStoragePropertyNames.Add(jsonPropertyName); + } + + // Get storage names for vector properties and store for later use. + foreach (var property in properties.vectorProperties) + { + var jsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, property); + this._storagePropertyNames[property.Name] = jsonPropertyName; + } } /// @@ -168,19 +186,21 @@ public Task CreateCollectionAsync(CancellationToken cancellationToken = default) // Key property. if (property is VectorStoreRecordKeyProperty keyProperty) { - searchFields.Add(AzureAISearchVectorStoreCollectionCreateMapping.MapKeyField(keyProperty)); + searchFields.Add(AzureAISearchVectorStoreCollectionCreateMapping.MapKeyField(keyProperty, this._keyStoragePropertyName)); } // Data property. if (property is VectorStoreRecordDataProperty dataProperty) { - searchFields.Add(AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty)); + searchFields.Add(AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, this._storagePropertyNames[dataProperty.PropertyName])); } // Vector property. if (property is VectorStoreRecordVectorProperty vectorProperty) { - (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfiguration algorithmConfiguration, VectorSearchProfile vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty); + (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfiguration algorithmConfiguration, VectorSearchProfile vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField( + vectorProperty, + this._storagePropertyNames[vectorProperty.PropertyName]); // Add the search field, plus its profile and algorithm configuration to the search config. searchFields.Add(vectorSearchField); @@ -257,7 +277,7 @@ public Task DeleteAsync(string key, DeleteRecordOptions? options = default, Canc // Remove record. return this.RunOperationAsync( "DeleteDocuments", - () => this._searchClient.DeleteDocumentsAsync(this._keyPropertyName, [key], new IndexDocumentsOptions(), cancellationToken)); + () => this._searchClient.DeleteDocumentsAsync(this._keyStoragePropertyName, [key], new IndexDocumentsOptions(), cancellationToken)); } /// @@ -268,7 +288,7 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? opti // Remove records. return this.RunOperationAsync( "DeleteDocuments", - () => this._searchClient.DeleteDocumentsAsync(this._keyPropertyName, keys, new IndexDocumentsOptions(), cancellationToken)); + () => this._searchClient.DeleteDocumentsAsync(this._keyStoragePropertyName, keys, new IndexDocumentsOptions(), cancellationToken)); } /// @@ -385,7 +405,7 @@ private GetDocumentOptions ConvertGetDocumentOptions(GetRecordOptions? options) var innerOptions = new GetDocumentOptions(); if (options?.IncludeVectors is false) { - innerOptions.SelectedFields.AddRange(this._nonVectorPropertyNames); + innerOptions.SelectedFields.AddRange(this._nonVectorStoragePropertyNames); } return innerOptions; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs index 54701a96e7ed..009e983cdc5d 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs @@ -63,8 +63,11 @@ public sealed class RedisJsonVectorStoreRecordCollection : IVectorStore /// The name of the temporary JSON property that the key property will be serialized / parsed from. private readonly string _keyJsonPropertyName; - /// An array of the names of all the data properties that are part of the Redis payload, i.e. all properties except the key and vector properties. - private readonly string[] _dataPropertyNames; + /// An array of the storage names of all the data properties that are part of the Redis payload, i.e. all properties except the key and vector properties. + private readonly string[] _dataStoragePropertyNames; + + /// A dictionary that maps from a property name to the storage name that should be used when serializing it to json for data and vector properties. + private readonly Dictionary _storagePropertyNames = new(); /// The mapper to use when mapping between the consumer data model and the Redis record. private readonly IVectorStoreRecordMapper _mapper; @@ -110,10 +113,20 @@ public RedisJsonVectorStoreRecordCollection(IDatabase database, string collectio this._keyPropertyInfo = properties.keyProperty; this._keyJsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(this._jsonSerializerOptions, this._keyPropertyInfo); - this._dataPropertyNames = properties - .dataProperties - .Select(x => VectorStoreRecordPropertyReader.GetJsonPropertyName(this._jsonSerializerOptions, x)) - .ToArray(); + this._dataStoragePropertyNames = new string[properties.dataProperties.Count]; + var index = 0; + foreach (var property in properties.dataProperties) + { + var storagePropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(this._jsonSerializerOptions, property); + this._storagePropertyNames[property.Name] = storagePropertyName; + this._dataStoragePropertyNames[index++] = storagePropertyName; + } + + foreach (var property in properties.vectorProperties) + { + var storagePropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(this._jsonSerializerOptions, property); + this._storagePropertyNames[property.Name] = storagePropertyName; + } // Assign Mapper. if (this._options.JsonNodeCustomMapper is not null) @@ -156,7 +169,7 @@ public async Task CollectionExistsAsync(CancellationToken cancellationToke public Task CreateCollectionAsync(CancellationToken cancellationToken = default) { // Map the record definition to a schema. - var schema = RedisVectorStoreCollectionCreateMapping.MapToSchema(this._vectorStoreRecordDefinition.Properties); + var schema = RedisVectorStoreCollectionCreateMapping.MapToSchema(this._vectorStoreRecordDefinition.Properties, this._storagePropertyNames); // Create the index creation params. // Add the collection name and colon as the index prefix, which means that any record where the key is prefixed with this text will be indexed by this index @@ -201,7 +214,7 @@ public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) .GetAsync(maybePrefixedKey) : this._database .JSON() - .GetAsync(maybePrefixedKey, this._dataPropertyNames)).ConfigureAwait(false); + .GetAsync(maybePrefixedKey, this._dataStoragePropertyNames)).ConfigureAwait(false); // Check if the key was found before trying to parse the result. if (redisResult.IsNull || redisResult is null) diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs index 8cfcc589d89a..209b958273e7 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs @@ -45,9 +45,10 @@ internal static class RedisVectorStoreCollectionCreateMapping /// Map from the given list of items to the Redis . /// /// The property definitions to map from. + /// A dictionary that maps from a property name to the storage name that should be used when serializing it to json for data and vector properties. /// The mapped Redis . /// Thrown if there are missing required or unsupported configuration options set. - public static Schema MapToSchema(IEnumerable properties) + public static Schema MapToSchema(IEnumerable properties, Dictionary storagePropertyNames) { var schema = new Schema(); @@ -69,14 +70,16 @@ public static Schema MapToSchema(IEnumerable properti throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.PropertyName}' must be set to create a collection, since the property is filterable."); } + var storageName = storagePropertyNames[dataProperty.PropertyName]; + if (dataProperty.PropertyType == typeof(string)) { - schema.AddTextField(new FieldName($"$.{dataProperty.PropertyName}", dataProperty.PropertyName)); + schema.AddTextField(new FieldName($"$.{storageName}", storageName)); } if (RedisVectorStoreCollectionCreateMapping.s_supportedFilterableNumericDataTypes.Contains(dataProperty.PropertyType)) { - schema.AddNumericField(new FieldName($"$.{dataProperty.PropertyName}", dataProperty.PropertyName)); + schema.AddNumericField(new FieldName($"$.{storageName}", storageName)); } continue; @@ -90,10 +93,11 @@ public static Schema MapToSchema(IEnumerable properti throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive integer to create a collection."); } + var storageName = storagePropertyNames[vectorProperty.PropertyName]; var indexKind = GetSDKIndexKind(vectorProperty); var distanceAlgorithm = GetSDKDistanceAlgorithm(vectorProperty); var dimensions = vectorProperty.Dimensions.Value.ToString(CultureInfo.InvariantCulture); - schema.AddVectorField(new FieldName($"$.{vectorProperty.PropertyName}", vectorProperty.PropertyName), indexKind, new Dictionary() + schema.AddVectorField(new FieldName($"$.{storageName}", storageName), indexKind, new Dictionary() { ["TYPE"] = "FLOAT32", ["DIM"] = dimensions, diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs index 9db90fb7fe82..d7e533364881 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text.Json.Serialization; using System.Threading; using System.Threading.Tasks; using Microsoft.SemanticKernel.Data; @@ -90,7 +91,16 @@ public async Task CanCreateCollectionAsync() .Verify( x => x.CreatePayloadIndexAsync( TestCollectionName, - "Data", + "OriginalNameData", + PayloadSchemaType.Text, + this._testCancellationToken), + Times.Once); + + this._qdrantClientMock + .Verify( + x => x.CreatePayloadIndexAsync( + TestCollectionName, + "data_storage_name", PayloadSchemaType.Text, this._testCancellationToken), Times.Once); @@ -153,6 +163,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool ha Assert.NotNull(actual); Assert.Equal(testRecordKey, actual.Key); + Assert.Equal("data 1", actual.OriginalNameData); Assert.Equal("data 1", actual.Data); Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); } @@ -187,6 +198,7 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool Assert.NotNull(actual); Assert.Equal(testRecordKey, actual.Key); + Assert.Equal("data 1", actual.OriginalNameData); Assert.Equal("data 1", actual.Data); Assert.Null(actual.Vector); } @@ -263,6 +275,7 @@ public async Task CanGetRecordWithCustomMapperAsync() // Assert Assert.NotNull(actual); Assert.Equal(UlongTestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.OriginalNameData); Assert.Equal("data 1", actual.Data); Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); @@ -460,7 +473,7 @@ public async Task CanUpsertRecordWithCustomMapperAsync() var pointStruct = new PointStruct { Id = new() { Num = UlongTestRecordKey1 }, - Payload = { ["Data"] = "data 1" }, + Payload = { ["OriginalNameData"] = "data 1", ["data_storage_name"] = "data 1" }, Vectors = new[] { 1f, 2f, 3f, 4f } }; @@ -571,10 +584,10 @@ private static RetrievedPoint CreateRetrievedPoint(bool hasNamedVectors, T if (hasNamedVectors) { var namedVectors = new NamedVectors(); - namedVectors.Vectors.Add("Vector", new[] { 1f, 2f, 3f, 4f }); + namedVectors.Vectors.Add("vector_storage_name", new[] { 1f, 2f, 3f, 4f }); point = new RetrievedPoint() { - Payload = { ["Data"] = "data 1" }, + Payload = { ["OriginalNameData"] = "data 1", ["data_storage_name"] = "data 1" }, Vectors = new Vectors { Vectors_ = namedVectors } }; } @@ -582,7 +595,7 @@ private static RetrievedPoint CreateRetrievedPoint(bool hasNamedVectors, T { point = new RetrievedPoint() { - Payload = { ["Data"] = "data 1" }, + Payload = { ["OriginalNameData"] = "data 1", ["data_storage_name"] = "data 1" }, Vectors = new[] { 1f, 2f, 3f, 4f } }; } @@ -618,6 +631,7 @@ private static SinglePropsModel CreateModel(T key, bool withVectors) return new SinglePropsModel { Key = key, + OriginalNameData = "data 1", Data = "data 1", Vector = withVectors ? new float[] { 1, 2, 3, 4 } : null, NotAnnotated = null, @@ -629,8 +643,9 @@ private static SinglePropsModel CreateModel(T key, bool withVectors) Properties = [ new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data"), - new VectorStoreRecordVectorProperty("Vector") + new VectorStoreRecordDataProperty("OriginalNameData") { IsFilterable = true }, + new VectorStoreRecordDataProperty("Data") { IsFilterable = true, StoragePropertyName = "data_storage_name" }, + new VectorStoreRecordVectorProperty("Vector") { StoragePropertyName = "vector_storage_name" } ] }; @@ -640,9 +655,14 @@ public sealed class SinglePropsModel public required T Key { get; set; } [VectorStoreRecordData(IsFilterable = true)] + public string OriginalNameData { get; set; } = string.Empty; + + [JsonPropertyName("ignored_data_json_name")] + [VectorStoreRecordData(IsFilterable = true, StoragePropertyName = "data_storage_name")] public string Data { get; set; } = string.Empty; - [VectorStoreRecordVector(4)] + [JsonPropertyName("ignored_vector_json_name")] + [VectorStoreRecordVector(4, StoragePropertyName = "vector_storage_name")] public ReadOnlyMemory? Vector { get; set; } public string? NotAnnotated { get; set; } diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs index 4e977163b900..d9d1a6616091 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Linq; using System.Text.Json.Nodes; +using System.Text.Json.Serialization; using System.Threading.Tasks; using Microsoft.SemanticKernel.Data; using Moq; @@ -75,7 +76,34 @@ public async Task CanCreateCollectionAsync() await sut.CreateCollectionAsync(); // Assert. - var expectedArgs = new object[] { "testcollection", "ON", "JSON", "PREFIX", 1, "testcollection:", "SCHEMA", "$.Vector", "AS", "Vector", "VECTOR", "HNSW", 6, "TYPE", "FLOAT32", "DIM", "4", "DISTANCE_METRIC", "COSINE" }; + var expectedArgs = new object[] { + "testcollection", + "ON", + "JSON", + "PREFIX", + 1, + "testcollection:", + "SCHEMA", + "$.OriginalNameData", + "AS", + "OriginalNameData", + "TEXT", + "$.data_json_name", + "AS", + "data_json_name", + "TEXT", + "$.vector_json_name", + "AS", + "vector_json_name", + "VECTOR", + "HNSW", + 6, + "TYPE", + "FLOAT32", + "DIM", + "4", + "DISTANCE_METRIC", + "COSINE" }; this._redisDatabaseMock .Verify( x => x.ExecuteAsync( @@ -110,7 +138,7 @@ public async Task CanDeleteCollectionAsync() public async Task CanGetRecordWithVectorsAsync(bool useDefinition) { // Arrange - var redisResultString = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; + var redisResultString = """{ "OriginalNameData": "data 1", "data_json_name": "data 1", "vector_json_name": [1, 2, 3, 4] }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); var sut = this.CreateRecordCollection(useDefinition); @@ -130,6 +158,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) Assert.NotNull(actual); Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.OriginalNameData); Assert.Equal("data 1", actual.Data); Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); } @@ -140,7 +169,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) { // Arrange - var redisResultString = """{ "Data": "data 1" }"""; + var redisResultString = """{ "OriginalNameData": "data 1", "data_json_name": "data 1" }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); var sut = this.CreateRecordCollection(useDefinition); @@ -150,7 +179,7 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) new() { IncludeVectors = false }); // Assert - var expectedArgs = new object[] { TestRecordKey1, "Data" }; + var expectedArgs = new object[] { TestRecordKey1, "OriginalNameData", "data_json_name" }; this._redisDatabaseMock .Verify( x => x.ExecuteAsync( @@ -160,6 +189,7 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) Assert.NotNull(actual); Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.OriginalNameData); Assert.Equal("data 1", actual.Data); Assert.False(actual.Vector.HasValue); } @@ -170,8 +200,8 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange - var redisResultString1 = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; - var redisResultString2 = """{ "Data": "data 2", "Vector": [5, 6, 7, 8] }"""; + var redisResultString1 = """{ "OriginalNameData": "data 1", "data_json_name": "data 1", "vector_json_name": [1, 2, 3, 4] }"""; + var redisResultString2 = """{ "OriginalNameData": "data 2", "data_json_name": "data 2", "vector_json_name": [5, 6, 7, 8] }"""; SetupExecuteMock(this._redisDatabaseMock, [redisResultString1, redisResultString2]); var sut = this.CreateRecordCollection(useDefinition); @@ -192,16 +222,20 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) Assert.NotNull(actual); Assert.Equal(2, actual.Count); Assert.Equal(TestRecordKey1, actual[0].Key); + Assert.Equal("data 1", actual[0].OriginalNameData); Assert.Equal("data 1", actual[0].Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual[0].Vector!.Value.ToArray()); Assert.Equal(TestRecordKey2, actual[1].Key); + Assert.Equal("data 2", actual[1].OriginalNameData); Assert.Equal("data 2", actual[1].Data); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual[1].Vector!.Value.ToArray()); } [Fact] public async Task CanGetRecordWithCustomMapperAsync() { // Arrange. - var redisResultString = """{ "Data": "data 1", "Vector": [1, 2, 3, 4] }"""; + var redisResultString = """{ "OriginalNameData": "data 1", "data_json_name": "data 1", "vector_json_name": [1, 2, 3, 4] }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); // Arrange mapper mock from JsonNode to data model. @@ -229,6 +263,7 @@ public async Task CanGetRecordWithCustomMapperAsync() // Assert Assert.NotNull(actual); Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.OriginalNameData); Assert.Equal("data 1", actual.Data); Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); @@ -306,7 +341,7 @@ public async Task CanUpsertRecordAsync(bool useDefinition) // Assert // TODO: Fix issue where NotAnnotated is being included in the JSON. - var expectedArgs = new object[] { TestRecordKey1, "$", """{"Data":"data 1","Vector":[1,2,3,4],"NotAnnotated":null}""" }; + var expectedArgs = new object[] { TestRecordKey1, "$", """{"OriginalNameData":"data 1","data_json_name":"data 1","vector_json_name":[1,2,3,4],"NotAnnotated":null}""" }; this._redisDatabaseMock .Verify( x => x.ExecuteAsync( @@ -337,7 +372,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) Assert.Equal(TestRecordKey2, actual[1]); // TODO: Fix issue where NotAnnotated is being included in the JSON. - var expectedArgs = new object[] { TestRecordKey1, "$", """{"Data":"data 1","Vector":[1,2,3,4],"NotAnnotated":null}""", TestRecordKey2, "$", """{"Data":"data 1","Vector":[1,2,3,4],"NotAnnotated":null}""" }; + var expectedArgs = new object[] { TestRecordKey1, "$", """{"OriginalNameData":"data 1","data_json_name":"data 1","vector_json_name":[1,2,3,4],"NotAnnotated":null}""", TestRecordKey2, "$", """{"OriginalNameData":"data 1","data_json_name":"data 1","vector_json_name":[1,2,3,4],"NotAnnotated":null}""" }; this._redisDatabaseMock .Verify( x => x.ExecuteAsync( @@ -354,7 +389,7 @@ public async Task CanUpsertRecordWithCustomMapperAsync() // Arrange mapper mock from data model to JsonNode. var mapperMock = new Mock>(MockBehavior.Strict); - var jsonNode = """{"Data":"data 1","Vector":[1,2,3,4],"NotAnnotated":null}"""; + var jsonNode = """{"OriginalNameData": "data 1", "data_json_name":"data 1","vector_json_name":[1,2,3,4],"NotAnnotated":null}"""; mapperMock .Setup(x => x.MapFromDataToStorageModel(It.IsAny())) .Returns((TestRecordKey1, JsonNode.Parse(jsonNode)!)); @@ -422,6 +457,10 @@ private static void SetupExecuteMock(Mock redisDatabaseMock, string r x => x.ExecuteAsync( It.IsAny(), It.IsAny())) + .Callback((string command, object[] args) => + { + Console.WriteLine(args); + }) .ReturnsAsync(RedisResult.Create(new RedisValue(redisResultString))); } @@ -430,6 +469,7 @@ private static SinglePropsModel CreateModel(string key, bool withVectors) return new SinglePropsModel { Key = key, + OriginalNameData = "data 1", Data = "data 1", Vector = withVectors ? new float[] { 1, 2, 3, 4 } : null, NotAnnotated = null, @@ -441,7 +481,8 @@ private static SinglePropsModel CreateModel(string key, bool withVectors) Properties = [ new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data"), + new VectorStoreRecordDataProperty("OriginalNameData"), + new VectorStoreRecordDataProperty("Data") { StoragePropertyName = "ignored_data_json_name" }, new VectorStoreRecordVectorProperty("Vector") ] }; @@ -451,10 +492,15 @@ public sealed class SinglePropsModel [VectorStoreRecordKey] public string Key { get; set; } = string.Empty; - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] + public string OriginalNameData { get; set; } = string.Empty; + + [JsonPropertyName("data_json_name")] + [VectorStoreRecordData(IsFilterable = true, StoragePropertyName = "ignored_data_json_name")] public string Data { get; set; } = string.Empty; - [VectorStoreRecordVector(4)] + [JsonPropertyName("vector_json_name")] + [VectorStoreRecordVector(4, StoragePropertyName = "ignored_vector_storage_name")] public ReadOnlyMemory? Vector { get; set; } public string? NotAnnotated { get; set; } diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs index 277a8f57a983..f487656b43e0 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs @@ -32,8 +32,18 @@ public void MapToSchemaCreatesSchema() new VectorStoreRecordVectorProperty("VectorSpecificIndexingOptions") { Dimensions = 20, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.EuclideanDistance }, }; + var storagePropertyNames = new Dictionary() + { + { "FilterableString", "FilterableString" }, + { "FilterableInt", "FilterableInt" }, + { "FilterableNullableInt", "FilterableNullableInt" }, + { "NonFilterableString", "NonFilterableString" }, + { "VectorDefaultIndexingOptions", "VectorDefaultIndexingOptions" }, + { "VectorSpecificIndexingOptions", "vector_specific_indexing_options" }, + }; + // Act. - var schema = RedisVectorStoreCollectionCreateMapping.MapToSchema(properties); + var schema = RedisVectorStoreCollectionCreateMapping.MapToSchema(properties, storagePropertyNames); // Assert. Assert.NotNull(schema); @@ -50,7 +60,7 @@ public void MapToSchemaCreatesSchema() VerifyFieldName(schema.Fields[2].FieldName, new List { "$.FilterableNullableInt", "AS", "FilterableNullableInt" }); VerifyFieldName(schema.Fields[3].FieldName, new List { "$.VectorDefaultIndexingOptions", "AS", "VectorDefaultIndexingOptions" }); - VerifyFieldName(schema.Fields[4].FieldName, new List { "$.VectorSpecificIndexingOptions", "AS", "VectorSpecificIndexingOptions" }); + VerifyFieldName(schema.Fields[4].FieldName, new List { "$.vector_specific_indexing_options", "AS", "vector_specific_indexing_options" }); Assert.Equal("10", ((VectorField)schema.Fields[3]).Attributes!["DIM"]); Assert.Equal("FLOAT32", ((VectorField)schema.Fields[3]).Attributes!["TYPE"]); @@ -66,9 +76,10 @@ public void MapToSchemaThrowsOnMissingPropertyType() { // Arrange. var properties = new VectorStoreRecordProperty[] { new VectorStoreRecordDataProperty("FilterableString") { IsFilterable = true } }; + var storagePropertyNames = new Dictionary() { { "FilterableString", "FilterableString" } }; // Act and assert. - Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.MapToSchema(properties)); + Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.MapToSchema(properties, storagePropertyNames)); } [Theory] @@ -78,9 +89,10 @@ public void MapToSchemaThrowsOnInvalidVectorDimensions(int? dimensions) { // Arrange. var properties = new VectorStoreRecordProperty[] { new VectorStoreRecordVectorProperty("VectorProperty") { Dimensions = dimensions } }; + var storagePropertyNames = new Dictionary() { { "VectorProperty", "VectorProperty" } }; // Act and assert. - Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.MapToSchema(properties)); + Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.MapToSchema(properties, storagePropertyNames)); } [Fact] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs index 9ac37d122285..7f810dc87fbd 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -43,9 +43,10 @@ public async Task CollectionExistsReturnsCollectionStateAsync(bool expectedExist [Theory(Skip = SkipReason)] [InlineData(true)] [InlineData(false)] - public async Task ItCanCreateACollectionAsync(bool useRecordDefinition) + public async Task ItCanCreateACollectionUpsertAndGetAsync(bool useRecordDefinition) { // Arrange + var hotel = CreateTestHotel("Upsert-1"); var testCollectionName = $"{fixture.TestIndexName}-createtest"; var options = new AzureAISearchVectorStoreRecordCollectionOptions { @@ -57,14 +58,31 @@ public async Task ItCanCreateACollectionAsync(bool useRecordDefinition) // Act await sut.CreateCollectionAsync(); + var upsertResult = await sut.UpsertAsync(hotel); + var getResult = await sut.GetAsync("Upsert-1"); // Assert - var existResult = await sut.CollectionExistsAsync(); - Assert.True(existResult); + var collectionExistResult = await sut.CollectionExistsAsync(); + Assert.True(collectionExistResult); await sut.DeleteCollectionAsync(); + Assert.NotNull(upsertResult); + Assert.Equal("Upsert-1", upsertResult); + + Assert.NotNull(getResult); + Assert.Equal(hotel.HotelName, getResult.HotelName); + Assert.Equal(hotel.Description, getResult.Description); + Assert.NotNull(getResult.DescriptionEmbedding); + Assert.Equal(hotel.DescriptionEmbedding?.ToArray(), getResult.DescriptionEmbedding?.ToArray()); + Assert.Equal(hotel.Tags, getResult.Tags); + Assert.Equal(hotel.ParkingIncluded, getResult.ParkingIncluded); + Assert.Equal(hotel.LastRenovationDate, getResult.LastRenovationDate); + Assert.Equal(hotel.Rating, getResult.Rating); + // Output - output.WriteLine(existResult.ToString()); + output.WriteLine(collectionExistResult.ToString()); + output.WriteLine(upsertResult); + output.WriteLine(getResult.ToString()); } [Fact(Skip = SkipReason)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs index 906d9b41b957..7e2e9b1f7d78 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs @@ -41,7 +41,7 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN [InlineData(true, false)] [InlineData(false, true)] [InlineData(false, false)] - public async Task ItCanCreateACollectionAsync(bool hasNamedVectors, bool useRecordDefinition) + public async Task ItCanCreateACollectionUpsertAndGetAsync(bool hasNamedVectors, bool useRecordDefinition) { // Arrange var collectionNamePostfix1 = useRecordDefinition ? "WithDefinition" : "WithType"; @@ -55,16 +55,31 @@ public async Task ItCanCreateACollectionAsync(bool hasNamedVectors, bool useReco }; var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, testCollectionName, options); + var record = this.CreateTestHotel(30); + // Act await sut.CreateCollectionAsync(); + var upsertResult = await sut.UpsertAsync(record); + var getResult = await sut.GetAsync(30, new GetRecordOptions { IncludeVectors = true }); // Assert - var existResult = await sut.CollectionExistsAsync(); - Assert.True(existResult); + var collectionExistResult = await sut.CollectionExistsAsync(); + Assert.True(collectionExistResult); await sut.DeleteCollectionAsync(); + Assert.Equal(30ul, upsertResult); + Assert.Equal(record.HotelId, getResult?.HotelId); + Assert.Equal(record.HotelName, getResult?.HotelName); + Assert.Equal(record.HotelCode, getResult?.HotelCode); + Assert.Equal(record.HotelRating, getResult?.HotelRating); + Assert.Equal(record.ParkingIncluded, getResult?.ParkingIncluded); + Assert.Equal(record.Tags.ToArray(), getResult?.Tags.ToArray()); + Assert.Equal(record.Description, getResult?.Description); + // Output - output.WriteLine(existResult.ToString()); + output.WriteLine(collectionExistResult.ToString()); + output.WriteLine(upsertResult.ToString(CultureInfo.InvariantCulture)); + output.WriteLine(getResult?.ToString()); } [Fact] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs index 938bbefd119d..20885ea076de 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs @@ -40,9 +40,10 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN [Theory] [InlineData(true)] [InlineData(false)] - public async Task ItCanCreateACollectionAsync(bool useRecordDefinition) + public async Task ItCanCreateACollectionUpsertAndGetAsync(bool useRecordDefinition) { // Arrange + var record = CreateTestHotel("Upsert-1", 1); var collectionNamePostfix = useRecordDefinition ? "WithDefinition" : "WithType"; var testCollectionName = $"createtest{collectionNamePostfix}"; @@ -55,14 +56,31 @@ public async Task ItCanCreateACollectionAsync(bool useRecordDefinition) // Act await sut.CreateCollectionAsync(); + var upsertResult = await sut.UpsertAsync(record); + var getResult = await sut.GetAsync("Upsert-1", new GetRecordOptions { IncludeVectors = true }); // Assert - var existResult = await sut.CollectionExistsAsync(); - Assert.True(existResult); + var collectionExistResult = await sut.CollectionExistsAsync(); + Assert.True(collectionExistResult); await sut.DeleteCollectionAsync(); + Assert.Equal("Upsert-1", upsertResult); + Assert.Equal(record.HotelId, getResult?.HotelId); + Assert.Equal(record.HotelName, getResult?.HotelName); + Assert.Equal(record.HotelCode, getResult?.HotelCode); + Assert.Equal(record.Tags, getResult?.Tags); + Assert.Equal(record.ParkingIncluded, getResult?.ParkingIncluded); + Assert.Equal(record.LastRenovationDate, getResult?.LastRenovationDate); + Assert.Equal(record.Rating, getResult?.Rating); + Assert.Equal(record.Address.Country, getResult?.Address.Country); + Assert.Equal(record.Address.City, getResult?.Address.City); + Assert.Equal(record.Description, getResult?.Description); + Assert.Equal(record.DescriptionEmbedding?.ToArray(), getResult?.DescriptionEmbedding?.ToArray()); + // Output - output.WriteLine(existResult.ToString()); + output.WriteLine(collectionExistResult.ToString()); + output.WriteLine(upsertResult); + output.WriteLine(getResult?.ToString()); } [Fact] @@ -97,14 +115,14 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); - Hotel record = CreateTestHotel("Upsert-1", 1); + Hotel record = CreateTestHotel("Upsert-2", 2); // Act. var upsertResult = await sut.UpsertAsync(record); // Assert. - var getResult = await sut.GetAsync("Upsert-1", new GetRecordOptions { IncludeVectors = true }); - Assert.Equal("Upsert-1", upsertResult); + var getResult = await sut.GetAsync("Upsert-2", new GetRecordOptions { IncludeVectors = true }); + Assert.Equal("Upsert-2", upsertResult); Assert.Equal(record.HotelId, getResult?.HotelId); Assert.Equal(record.HotelName, getResult?.HotelName); Assert.Equal(record.HotelCode, getResult?.HotelCode); From 92220f2f2dd63cd514b89753089d9877612c1429 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 25 Jul 2024 14:48:41 +0100 Subject: [PATCH 34/48] .Net: Add support for redis hashsets. (#7445) ### Motivation and Context Redis supports two modes when storing vectors, one with hash sets and one with JSON. Both the commands used and the way in which data is communicated to redis is diferent between the two modes, so we have a different collection implementation and a different mapper implementation. ### Description This PR adds support for also using the HashSets mode in Redis, in addition to the JSON mode that is already supported, with unit tests and integration tests. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...RedisHashSetVectorStoreRecordCollection.cs | 384 +++++++++++++ ...shSetVectorStoreRecordCollectionOptions.cs | 38 ++ .../RedisHashSetVectorStoreRecordMapper.cs | 173 ++++++ .../RedisStorageType.cs | 19 + .../RedisVectorStore.cs | 12 +- .../RedisVectorStoreOptions.cs | 5 + ...HashSetVectorStoreRecordCollectionTests.cs | 508 ++++++++++++++++++ ...edisHashSetVectorStoreRecordMapperTests.cs | 246 +++++++++ ...disJsonVectorStoreRecordCollectionTests.cs | 4 +- .../RedisVectorStoreTests.cs | 16 +- ...HashSetVectorStoreRecordCollectionTests.cs | 340 ++++++++++++ ...disJsonVectorStoreRecordCollectionTests.cs | 24 +- .../Memory/Redis/RedisVectorStoreFixture.cs | 114 +++- .../Memory/Redis/RedisVectorStoreTests.cs | 5 +- 14 files changed, 1856 insertions(+), 32 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollectionOptions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordMapper.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Redis/RedisStorageType.cs create mode 100644 dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs create mode 100644 dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordMapperTests.cs create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisHashSetVectorStoreRecordCollectionTests.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs new file mode 100644 index 000000000000..ea7d15c38e60 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs @@ -0,0 +1,384 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Data; +using NRedisStack.RedisStackCommands; +using NRedisStack.Search; +using NRedisStack.Search.Literals.Enums; +using StackExchange.Redis; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Service for storing and retrieving vector records, that uses Redis HashSets as the underlying storage. +/// +/// The data model to use for adding, updating and retrieving data from storage. +#pragma warning disable CA1711 // Identifiers should not have incorrect suffix +public sealed class RedisHashSetVectorStoreRecordCollection : IVectorStoreRecordCollection +#pragma warning restore CA1711 // Identifiers should not have incorrect suffix + where TRecord : class +{ + /// The name of this database for telemetry purposes. + private const string DatabaseName = "Redis"; + + /// A set of types that a key on the provided model may have. + private static readonly HashSet s_supportedKeyTypes = + [ + typeof(string) + ]; + + /// A set of types that data properties on the provided model may have. + private static readonly HashSet s_supportedDataTypes = + [ + typeof(string), + typeof(int), + typeof(uint), + typeof(long), + typeof(ulong), + typeof(double), + typeof(float), + typeof(bool), + typeof(int?), + typeof(uint?), + typeof(long?), + typeof(ulong?), + typeof(double?), + typeof(float?), + typeof(bool?) + ]; + + /// A set of types that vectors on the provided model may have. + private static readonly HashSet s_supportedVectorTypes = + [ + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory), + typeof(ReadOnlyMemory?), + typeof(ReadOnlyMemory?) + ]; + + /// The Redis database to read/write records from. + private readonly IDatabase _database; + + /// The name of the collection that this will access. + private readonly string _collectionName; + + /// Optional configuration options for this class. + private readonly RedisHashSetVectorStoreRecordCollectionOptions _options; + + /// A definition of the current storage model. + private readonly VectorStoreRecordDefinition _vectorStoreRecordDefinition; + + /// An array of the names of all the data properties that are part of the Redis payload, i.e. all properties except the key and vector properties. + private readonly RedisValue[] _dataStoragePropertyNames; + + /// A dictionary that maps from a property name to the storage name that should be used when serializing it to json for data and vector properties. + private readonly Dictionary _storagePropertyNames = new(); + + /// The mapper to use when mapping between the consumer data model and the Redis record. + private readonly IVectorStoreRecordMapper _mapper; + + /// + /// Initializes a new instance of the class. + /// + /// The Redis database to read/write records from. + /// The name of the collection that this will access. + /// Optional configuration options for this class. + /// Throw when parameters are invalid. + public RedisHashSetVectorStoreRecordCollection(IDatabase database, string collectionName, RedisHashSetVectorStoreRecordCollectionOptions? options = null) + { + // Verify. + Verify.NotNull(database); + Verify.NotNullOrWhiteSpace(collectionName); + + // Assign. + this._database = database; + this._collectionName = collectionName; + this._options = options ?? new RedisHashSetVectorStoreRecordCollectionOptions(); + this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); + + // Enumerate public properties using configuration or attributes. + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; + if (this._options.VectorStoreRecordDefinition is not null) + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: true); + } + else + { + properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); + } + + // Validate property types and store for later use. + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); + + this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._options.VectorStoreRecordDefinition); + this._dataStoragePropertyNames = properties + .dataProperties + .Select(x => this._storagePropertyNames[x.Name]) + .Select(RedisValue.Unbox) + .ToArray(); + + // Assign Mapper. + if (this._options.HashEntriesCustomMapper is not null) + { + this._mapper = this._options.HashEntriesCustomMapper; + } + else + { + this._mapper = new RedisHashSetVectorStoreRecordMapper(properties.keyProperty, properties.dataProperties, properties.vectorProperties, this._storagePropertyNames); + } + } + + /// + public string CollectionName => this._collectionName; + + /// + public async Task CollectionExistsAsync(CancellationToken cancellationToken = default) + { + try + { + await this._database.FT().InfoAsync(this._collectionName).ConfigureAwait(false); + return true; + } + catch (RedisServerException ex) when (ex.Message.Contains("Unknown index name")) + { + return false; + } + catch (RedisConnectionException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = this._collectionName, + OperationName = "FT.INFO" + }; + } + } + + /// + public Task CreateCollectionAsync(CancellationToken cancellationToken = default) + { + // Map the record definition to a schema. + var schema = RedisVectorStoreCollectionCreateMapping.MapToSchema(this._vectorStoreRecordDefinition.Properties, this._storagePropertyNames); + + // Create the index creation params. + // Add the collection name and colon as the index prefix, which means that any record where the key is prefixed with this text will be indexed by this index + var createParams = new FTCreateParams() + .AddPrefix($"{this._collectionName}:") + .On(IndexDataType.HASH); + + // Create the index. + return this.RunOperationAsync("FT.CREATE", () => this._database.FT().CreateAsync(this._collectionName, createParams, schema)); + } + + /// + public async Task CreateCollectionIfNotExistsAsync(CancellationToken cancellationToken = default) + { + if (!await this.CollectionExistsAsync(cancellationToken).ConfigureAwait(false)) + { + await this.CreateCollectionAsync(cancellationToken).ConfigureAwait(false); + } + } + + /// + public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) + { + return this.RunOperationAsync("FT.DROPINDEX", () => this._database.FT().DropIndexAsync(this._collectionName)); + } + + /// + public async Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + { + Verify.NotNullOrWhiteSpace(key); + + // Create Options + var maybePrefixedKey = this.PrefixKeyIfNeeded(key); + var includeVectors = options?.IncludeVectors ?? false; + var operationName = includeVectors ? "HGETALL" : "HMGET"; + + // Get the Redis value. + HashEntry[] retrievedHashEntries; + if (includeVectors) + { + retrievedHashEntries = await this.RunOperationAsync( + operationName, + () => this._database.HashGetAllAsync(maybePrefixedKey)).ConfigureAwait(false); + } + else + { + var fieldKeys = this._dataStoragePropertyNames; + var retrievedValues = await this.RunOperationAsync( + operationName, + () => this._database.HashGetAsync(maybePrefixedKey, fieldKeys)).ConfigureAwait(false); + retrievedHashEntries = fieldKeys.Zip(retrievedValues, (field, value) => new HashEntry(field, value)).Where(x => x.Value.HasValue).ToArray(); + } + + // Return null if we found nothing. + if (retrievedHashEntries == null || retrievedHashEntries.Length == 0) + { + return null; + } + + // Convert to the caller's data model. + return VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + this._collectionName, + operationName, + () => + { + return this._mapper.MapFromStorageToDataModel((key, retrievedHashEntries), new() { IncludeVectors = includeVectors }); + }); + } + + /// + public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + // Get records in parallel. + var tasks = keys.Select(x => this.GetAsync(x, options, cancellationToken)); + var results = await Task.WhenAll(tasks).ConfigureAwait(false); + foreach (var result in results) + { + if (result is not null) + { + yield return result; + } + } + } + + /// + public Task DeleteAsync(string key, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNullOrWhiteSpace(key); + + // Create Options + var maybePrefixedKey = this.PrefixKeyIfNeeded(key); + + // Remove. + return this.RunOperationAsync( + "DEL", + () => this._database + .KeyDeleteAsync(maybePrefixedKey)); + } + + /// + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(keys); + + // Remove records in parallel. + var tasks = keys.Select(key => this.DeleteAsync(key, options, cancellationToken)); + return Task.WhenAll(tasks); + } + + /// + public async Task UpsertAsync(TRecord record, UpsertRecordOptions? options = default, CancellationToken cancellationToken = default) + { + Verify.NotNull(record); + + // Map. + var redisHashSetRecord = VectorStoreErrorHandler.RunModelConversion( + DatabaseName, + this._collectionName, + "HSET", + () => this._mapper.MapFromDataToStorageModel(record)); + + // Upsert. + var maybePrefixedKey = this.PrefixKeyIfNeeded(redisHashSetRecord.Key); + await this.RunOperationAsync( + "HSET", + () => this._database + .HashSetAsync( + maybePrefixedKey, + redisHashSetRecord.HashEntries)).ConfigureAwait(false); + + return redisHashSetRecord.Key; + } + + /// + public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = default, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Verify.NotNull(records); + + // Upsert records in parallel. + var tasks = records.Select(x => this.UpsertAsync(x, options, cancellationToken)); + var results = await Task.WhenAll(tasks).ConfigureAwait(false); + foreach (var result in results) + { + if (result is not null) + { + yield return result; + } + } + } + + /// + /// Prefix the key with the collection name if the option is set. + /// + /// The key to prefix. + /// The updated key if updating is required, otherwise the input key. + private string PrefixKeyIfNeeded(string key) + { + if (this._options.PrefixCollectionNameToKeyNames) + { + return $"{this._collectionName}:{key}"; + } + + return key; + } + + /// + /// Run the given operation and wrap any Redis exceptions with ."/> + /// + /// The response type of the operation. + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + private async Task RunOperationAsync(string operationName, Func> operation) + { + try + { + return await operation.Invoke().ConfigureAwait(false); + } + catch (RedisConnectionException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = this._collectionName, + OperationName = operationName + }; + } + } + + /// + /// Run the given operation and wrap any Redis exceptions with ."/> + /// + /// The type of database operation being run. + /// The operation to run. + /// The result of the operation. + private async Task RunOperationAsync(string operationName, Func operation) + { + try + { + await operation.Invoke().ConfigureAwait(false); + } + catch (RedisConnectionException ex) + { + throw new VectorStoreOperationException("Call to vector store failed.", ex) + { + VectorStoreType = DatabaseName, + CollectionName = this._collectionName, + OperationName = operationName + }; + } + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollectionOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollectionOptions.cs new file mode 100644 index 000000000000..7e17859ae0c9 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollectionOptions.cs @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Data; +using StackExchange.Redis; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Options when creating a . +/// +public sealed class RedisHashSetVectorStoreRecordCollectionOptions + where TRecord : class +{ + /// + /// Gets or sets a value indicating whether the collection name should be prefixed to the + /// key names before reading or writing to the Redis store. Default is false. + /// + /// + /// For a record to be indexed by a specific Redis index, the key name must be prefixed with the matching prefix configured on the Redis index. + /// You can either pass in keys that are already prefixed, or set this option to true to have the collection name prefixed to the key names automatically. + /// + public bool PrefixCollectionNameToKeyNames { get; init; } = false; + + /// + /// Gets or sets an optional custom mapper to use when converting between the data model and the Redis record. + /// + public IVectorStoreRecordMapper? HashEntriesCustomMapper { get; init; } = null; + + /// + /// Gets or sets an optional record definition that defines the schema of the record type. + /// + /// + /// If not provided, the schema will be inferred from the record model class using reflection. + /// In this case, the record model properties must be annotated with the appropriate attributes to indicate their usage. + /// See , and . + /// + public VectorStoreRecordDefinition? VectorStoreRecordDefinition { get; init; } = null; +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordMapper.cs new file mode 100644 index 000000000000..60b0700edb99 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordMapper.cs @@ -0,0 +1,173 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Runtime.InteropServices; +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.SemanticKernel.Data; +using StackExchange.Redis; + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Class for mapping between a hashset stored in redis, and the consumer data model. +/// +/// The consumer data model to map to or from. +internal sealed class RedisHashSetVectorStoreRecordMapper : IVectorStoreRecordMapper + where TConsumerDataModel : class +{ + /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. + private readonly PropertyInfo _keyPropertyInfo; + + /// The name of the temporary json property that the key field will be serialized / parsed from. + private readonly string _keyFieldJsonPropertyName; + + /// A list of property info objects that point at the data properties in the current model, and allows easy reading and writing of these properties. + private readonly IEnumerable _dataPropertiesInfo; + + /// A list of property info objects that point at the vector properties in the current model, and allows easy reading and writing of these properties. + private readonly IEnumerable _vectorPropertiesInfo; + + /// A dictionary that maps from a property name to the configured name that should be used when storing it. + private readonly Dictionary _storagePropertyNames; + + /// A dictionary that maps from a property name to the configured name that should be used when serializing it to json for data and vector properties. + private readonly Dictionary _jsonPropertyNames = new(); + + /// + /// Initializes a new instance of the class. + /// + /// The property info object that points at the key property for the current model. + /// The property info objects that point at the payload properties in the current model. + /// The property info objects that point at the vector properties in the current model. + /// A dictionary that maps from a property name to the configured name that should be used when storing it. + public RedisHashSetVectorStoreRecordMapper( + PropertyInfo keyPropertyInfo, + IEnumerable dataPropertiesInfo, + IEnumerable vectorPropertiesInfo, + Dictionary storagePropertyNames) + { + Verify.NotNull(keyPropertyInfo); + Verify.NotNull(dataPropertiesInfo); + Verify.NotNull(vectorPropertiesInfo); + Verify.NotNull(storagePropertyNames); + + this._keyPropertyInfo = keyPropertyInfo; + this._dataPropertiesInfo = dataPropertiesInfo; + this._vectorPropertiesInfo = vectorPropertiesInfo; + this._storagePropertyNames = storagePropertyNames; + + this._keyFieldJsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, keyPropertyInfo); + foreach (var property in dataPropertiesInfo.Concat(vectorPropertiesInfo)) + { + this._jsonPropertyNames[property.Name] = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, property); + } + } + + /// + public (string Key, HashEntry[] HashEntries) MapFromDataToStorageModel(TConsumerDataModel dataModel) + { + var keyValue = this._keyPropertyInfo.GetValue(dataModel) as string ?? throw new VectorStoreRecordMappingException($"Missing key property {this._keyPropertyInfo.Name} on provided record of type {typeof(TConsumerDataModel).FullName}."); + + var hashEntries = new List(); + foreach (var property in this._dataPropertiesInfo) + { + var storageName = this._storagePropertyNames[property.Name]; + var value = property.GetValue(dataModel); + hashEntries.Add(new HashEntry(storageName, RedisValue.Unbox(value))); + } + + foreach (var property in this._vectorPropertiesInfo) + { + var storageName = this._storagePropertyNames[property.Name]; + var value = property.GetValue(dataModel); + if (value is not null) + { + // Convert the vector to a byte array and store it in the hash entry. + // We only support float and double vectors and we do checking in the + // collection constructor to ensure that the model has no other vector types. + if (value is ReadOnlyMemory rom) + { + hashEntries.Add(new HashEntry(storageName, ConvertVectorToBytes(rom))); + } + else if (value is ReadOnlyMemory rod) + { + hashEntries.Add(new HashEntry(storageName, ConvertVectorToBytes(rod))); + } + } + } + + return (keyValue, hashEntries.ToArray()); + } + + /// + public TConsumerDataModel MapFromStorageToDataModel((string Key, HashEntry[] HashEntries) storageModel, StorageToDataModelMapperOptions options) + { + var jsonObject = new JsonObject(); + + foreach (var property in this._dataPropertiesInfo) + { + var storageName = this._storagePropertyNames[property.Name]; + var jsonName = this._jsonPropertyNames[property.Name]; + var hashEntry = storageModel.HashEntries.FirstOrDefault(x => x.Name == storageName); + if (hashEntry.Name.HasValue) + { + var typeOrNullableType = Nullable.GetUnderlyingType(property.PropertyType) ?? property.PropertyType; + var convertedValue = Convert.ChangeType(hashEntry.Value, typeOrNullableType); + jsonObject.Add(jsonName, JsonValue.Create(convertedValue)); + } + } + + if (options.IncludeVectors) + { + foreach (var property in this._vectorPropertiesInfo) + { + var storageName = this._storagePropertyNames[property.Name]; + var jsonName = this._jsonPropertyNames[property.Name]; + + var hashEntry = storageModel.HashEntries.FirstOrDefault(x => x.Name == storageName); + if (hashEntry.Name.HasValue) + { + if (property.PropertyType == typeof(ReadOnlyMemory) || property.PropertyType == typeof(ReadOnlyMemory?)) + { + var array = MemoryMarshal.Cast((byte[])hashEntry.Value!).ToArray(); + jsonObject.Add(jsonName, JsonValue.Create(array)); + } + else if (property.PropertyType == typeof(ReadOnlyMemory) || property.PropertyType == typeof(ReadOnlyMemory?)) + { + var array = MemoryMarshal.Cast((byte[])hashEntry.Value!).ToArray(); + jsonObject.Add(jsonName, JsonValue.Create(array)); + } + else + { + throw new VectorStoreRecordMappingException($"Invalid vector type '{property.PropertyType.Name}' found on property '{property.Name}' on provided record of type '{typeof(TConsumerDataModel).FullName}'. Only float and double vectors are supported."); + } + } + } + } + + // Check that the key field is not already present in the redis value. + if (jsonObject.ContainsKey(this._keyFieldJsonPropertyName)) + { + throw new VectorStoreRecordMappingException($"Invalid data format for document with key '{storageModel.Key}'. Key property '{this._keyFieldJsonPropertyName}' is already present on retrieved object."); + } + + // Since the key is not stored in the redis value, add it back in before deserializing into the data model. + jsonObject.Add(this._keyFieldJsonPropertyName, storageModel.Key); + + return JsonSerializer.Deserialize(jsonObject)!; + } + + private static byte[] ConvertVectorToBytes(ReadOnlyMemory vector) + { + return MemoryMarshal.AsBytes(vector.Span).ToArray(); + } + + private static byte[] ConvertVectorToBytes(ReadOnlyMemory vector) + { + return MemoryMarshal.AsBytes(vector.Span).ToArray(); + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisStorageType.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisStorageType.cs new file mode 100644 index 000000000000..9360fe448998 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisStorageType.cs @@ -0,0 +1,19 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.SemanticKernel.Connectors.Redis; + +/// +/// Indicates the way in which data is stored in redis. +/// +public enum RedisStorageType +{ + /// + /// Data is stored as JSON. + /// + Json, + + /// + /// Data is stored as collections of field-value pairs. + /// + HashSet +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs index 5dfd4aa69d97..98cfc0020dba 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs @@ -53,8 +53,16 @@ public IVectorStoreRecordCollection GetCollection( return this._options.VectorStoreCollectionFactory.CreateVectorStoreRecordCollection(this._database, name, vectorStoreRecordDefinition); } - var directlyCreatedStore = new RedisJsonVectorStoreRecordCollection(this._database, name, new RedisJsonVectorStoreRecordCollectionOptions() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; - return directlyCreatedStore!; + if (this._options.StorageType == RedisStorageType.HashSet) + { + var directlyCreatedStore = new RedisHashSetVectorStoreRecordCollection(this._database, name, new RedisHashSetVectorStoreRecordCollectionOptions() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; + return directlyCreatedStore!; + } + else + { + var directlyCreatedStore = new RedisJsonVectorStoreRecordCollection(this._database, name, new RedisJsonVectorStoreRecordCollectionOptions() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; + return directlyCreatedStore!; + } } /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs index 290aa4399af0..0434b3c633ec 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreOptions.cs @@ -11,4 +11,9 @@ public sealed class RedisVectorStoreOptions /// An optional factory to use for constructing instances, if custom options are required. /// public IRedisVectorStoreRecordCollectionFactory? VectorStoreCollectionFactory { get; init; } + + /// + /// Indicates the way in which data should be stored in redis. Default is . + /// + public RedisStorageType? StorageType { get; init; } = RedisStorageType.Json; } diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs new file mode 100644 index 000000000000..152ae5920195 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs @@ -0,0 +1,508 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text.Json.Serialization; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Data; +using Moq; +using NRedisStack; +using StackExchange.Redis; +using Xunit; + +namespace Microsoft.SemanticKernel.Connectors.Redis.UnitTests; + +/// +/// Contains tests for the class. +/// +public class RedisHashSetVectorStoreRecordCollectionTests +{ + private const string TestCollectionName = "testcollection"; + private const string TestRecordKey1 = "testid1"; + private const string TestRecordKey2 = "testid2"; + + private readonly Mock _redisDatabaseMock; + + public RedisHashSetVectorStoreRecordCollectionTests() + { + this._redisDatabaseMock = new Mock(MockBehavior.Strict); + + var batchMock = new Mock(); + this._redisDatabaseMock.Setup(x => x.CreateBatch(It.IsAny())).Returns(batchMock.Object); + } + + [Theory] + [InlineData(TestCollectionName, true)] + [InlineData("nonexistentcollection", false)] + public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) + { + // Arrange + if (expectedExists) + { + SetupExecuteMock(this._redisDatabaseMock, ["index_name", collectionName]); + } + else + { + SetupExecuteMock(this._redisDatabaseMock, new RedisServerException("Unknown index name")); + } + var sut = new RedisHashSetVectorStoreRecordCollection( + this._redisDatabaseMock.Object, + collectionName); + + // Act + var actual = await sut.CollectionExistsAsync(); + + // Assert + var expectedArgs = new object[] { collectionName }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "FT.INFO", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + Assert.Equal(expectedExists, actual); + } + + [Fact] + public async Task CanCreateCollectionAsync() + { + // Arrange. + SetupExecuteMock(this._redisDatabaseMock, string.Empty); + var sut = new RedisHashSetVectorStoreRecordCollection(this._redisDatabaseMock.Object, TestCollectionName); + + // Act. + await sut.CreateCollectionAsync(); + + // Assert. + var expectedArgs = new object[] { + "testcollection", + "PREFIX", + 1, + "testcollection:", + "SCHEMA", + "$.OriginalNameData", + "AS", + "OriginalNameData", + "TEXT", + "$.data_storage_name", + "AS", + "data_storage_name", + "TEXT", + "$.vector_storage_name", + "AS", + "vector_storage_name", + "VECTOR", + "HNSW", + 6, + "TYPE", + "FLOAT32", + "DIM", + "4", + "DISTANCE_METRIC", + "COSINE" }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "FT.CREATE", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + } + + [Fact] + public async Task CanDeleteCollectionAsync() + { + // Arrange + SetupExecuteMock(this._redisDatabaseMock, string.Empty); + var sut = this.CreateRecordCollection(false); + + // Act + await sut.DeleteCollectionAsync(); + + // Assert + var expectedArgs = new object[] { TestCollectionName }; + this._redisDatabaseMock + .Verify( + x => x.ExecuteAsync( + "FT.DROPINDEX", + It.Is(x => x.SequenceEqual(expectedArgs))), + Times.Once); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition) + { + // Arrange + var hashEntries = new HashEntry[] + { + new("OriginalNameData", "data 1"), + new("data_storage_name", "data 1"), + new("vector_storage_name", MemoryMarshal.AsBytes(new ReadOnlySpan(new float[] { 1, 2, 3, 4 })).ToArray()) + }; + this._redisDatabaseMock.Setup(x => x.HashGetAllAsync(It.IsAny(), CommandFlags.None)).ReturnsAsync(hashEntries); + var sut = this.CreateRecordCollection(useDefinition); + + // Act + var actual = await sut.GetAsync( + TestRecordKey1, + new() { IncludeVectors = true }); + + // Assert + this._redisDatabaseMock.Verify(x => x.HashGetAllAsync(TestRecordKey1, CommandFlags.None), Times.Once); + + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.OriginalNameData); + Assert.Equal("data 1", actual.Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) + { + // Arrange + var redisValues = new RedisValue[] { new("data 1"), new("data 1") }; + this._redisDatabaseMock.Setup(x => x.HashGetAsync(It.IsAny(), It.IsAny(), CommandFlags.None)).ReturnsAsync(redisValues); + var sut = this.CreateRecordCollection(useDefinition); + + // Act + var actual = await sut.GetAsync( + TestRecordKey1, + new() { IncludeVectors = false }); + + // Assert + var fieldNames = new RedisValue[] { "OriginalNameData", "data_storage_name" }; + this._redisDatabaseMock.Verify(x => x.HashGetAsync(TestRecordKey1, fieldNames, CommandFlags.None), Times.Once); + + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.OriginalNameData); + Assert.Equal("data 1", actual.Data); + Assert.False(actual.Vector.HasValue); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) + { + // Arrange + var hashEntries1 = new HashEntry[] + { + new("OriginalNameData", "data 1"), + new("data_storage_name", "data 1"), + new("vector_storage_name", MemoryMarshal.AsBytes(new ReadOnlySpan(new float[] { 1, 2, 3, 4 })).ToArray()) + }; + var hashEntries2 = new HashEntry[] + { + new("OriginalNameData", "data 2"), + new("data_storage_name", "data 2"), + new("vector_storage_name", MemoryMarshal.AsBytes(new ReadOnlySpan(new float[] { 5, 6, 7, 8 })).ToArray()) + }; + this._redisDatabaseMock.Setup(x => x.HashGetAllAsync(It.IsAny(), CommandFlags.None)).Returns((RedisKey key, CommandFlags flags) => + { + return key switch + { + RedisKey k when k == TestRecordKey1 => Task.FromResult(hashEntries1), + RedisKey k when k == TestRecordKey2 => Task.FromResult(hashEntries2), + _ => throw new ArgumentException("Unexpected key."), + }; + }); + var sut = this.CreateRecordCollection(useDefinition); + + // Act + var actual = await sut.GetBatchAsync( + [TestRecordKey1, TestRecordKey2], + new() { IncludeVectors = true }).ToListAsync(); + + // Assert + this._redisDatabaseMock.Verify(x => x.HashGetAllAsync(TestRecordKey1, CommandFlags.None), Times.Once); + this._redisDatabaseMock.Verify(x => x.HashGetAllAsync(TestRecordKey2, CommandFlags.None), Times.Once); + + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(TestRecordKey1, actual[0].Key); + Assert.Equal("data 1", actual[0].OriginalNameData); + Assert.Equal("data 1", actual[0].Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual[0].Vector!.Value.ToArray()); + Assert.Equal(TestRecordKey2, actual[1].Key); + Assert.Equal("data 2", actual[1].OriginalNameData); + Assert.Equal("data 2", actual[1].Data); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual[1].Vector!.Value.ToArray()); + } + + [Fact] + public async Task CanGetRecordWithCustomMapperAsync() + { + // Arrange. + var hashEntries = new HashEntry[] + { + new("OriginalNameData", "data 1"), + new("data_storage_name", "data 1"), + new("vector_storage_name", MemoryMarshal.AsBytes(new ReadOnlySpan(new float[] { 1, 2, 3, 4 })).ToArray()) + }; + this._redisDatabaseMock.Setup(x => x.HashGetAllAsync(It.IsAny(), CommandFlags.None)).ReturnsAsync(hashEntries); + + // Arrange mapper mock from JsonNode to data model. + var mapperMock = new Mock>(MockBehavior.Strict); + mapperMock.Setup( + x => x.MapFromStorageToDataModel( + It.IsAny<(string key, HashEntry[] hashEntries)>(), + It.IsAny())) + .Returns(CreateModel(TestRecordKey1, true)); + + // Arrange target with custom mapper. + var sut = new RedisHashSetVectorStoreRecordCollection( + this._redisDatabaseMock.Object, + TestCollectionName, + new() + { + HashEntriesCustomMapper = mapperMock.Object + }); + + // Act + var actual = await sut.GetAsync( + TestRecordKey1, + new() { IncludeVectors = true }); + + // Assert + Assert.NotNull(actual); + Assert.Equal(TestRecordKey1, actual.Key); + Assert.Equal("data 1", actual.OriginalNameData); + Assert.Equal("data 1", actual.Data); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + + mapperMock + .Verify( + x => x.MapFromStorageToDataModel( + It.Is<(string key, HashEntry[] hashEntries)>(x => x.key == TestRecordKey1), + It.Is(x => x.IncludeVectors)), + Times.Once); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task CanDeleteRecordAsync(bool useDefinition) + { + // Arrange + this._redisDatabaseMock.Setup(x => x.KeyDeleteAsync(It.IsAny(), CommandFlags.None)).ReturnsAsync(true); + var sut = this.CreateRecordCollection(useDefinition); + + // Act + await sut.DeleteAsync(TestRecordKey1); + + // Assert + this._redisDatabaseMock.Verify(x => x.KeyDeleteAsync(TestRecordKey1, CommandFlags.None), Times.Once); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) + { + // Arrange + this._redisDatabaseMock.Setup(x => x.KeyDeleteAsync(It.IsAny(), CommandFlags.None)).ReturnsAsync(true); + var sut = this.CreateRecordCollection(useDefinition); + + // Act + await sut.DeleteBatchAsync([TestRecordKey1, TestRecordKey2]); + + // Assert + this._redisDatabaseMock.Verify(x => x.KeyDeleteAsync(TestRecordKey1, CommandFlags.None), Times.Once); + this._redisDatabaseMock.Verify(x => x.KeyDeleteAsync(TestRecordKey2, CommandFlags.None), Times.Once); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task CanUpsertRecordAsync(bool useDefinition) + { + // Arrange + this._redisDatabaseMock.Setup(x => x.HashSetAsync(It.IsAny(), It.IsAny(), CommandFlags.None)).Returns(Task.CompletedTask); + var sut = this.CreateRecordCollection(useDefinition); + var model = CreateModel(TestRecordKey1, true); + + // Act + await sut.UpsertAsync(model); + + // Assert + this._redisDatabaseMock.Verify( + x => x.HashSetAsync( + TestRecordKey1, + It.Is(x => x.Length == 3 && x[0].Name == "OriginalNameData" && x[1].Name == "data_storage_name" && x[2].Name == "vector_storage_name"), + CommandFlags.None), + Times.Once); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task CanUpsertManyRecordsAsync(bool useDefinition) + { + // Arrange + this._redisDatabaseMock.Setup(x => x.HashSetAsync(It.IsAny(), It.IsAny(), CommandFlags.None)).Returns(Task.CompletedTask); + var sut = this.CreateRecordCollection(useDefinition); + + var model1 = CreateModel(TestRecordKey1, true); + var model2 = CreateModel(TestRecordKey2, true); + + // Act + var actual = await sut.UpsertBatchAsync([model1, model2]).ToListAsync(); + + // Assert + Assert.NotNull(actual); + Assert.Equal(2, actual.Count); + Assert.Equal(TestRecordKey1, actual[0]); + Assert.Equal(TestRecordKey2, actual[1]); + + this._redisDatabaseMock.Verify( + x => x.HashSetAsync( + TestRecordKey1, + It.Is(x => x.Length == 3 && x[0].Name == "OriginalNameData" && x[1].Name == "data_storage_name" && x[2].Name == "vector_storage_name"), + CommandFlags.None), + Times.Once); + this._redisDatabaseMock.Verify( + x => x.HashSetAsync( + TestRecordKey2, + It.Is(x => x.Length == 3 && x[0].Name == "OriginalNameData" && x[1].Name == "data_storage_name" && x[2].Name == "vector_storage_name"), + CommandFlags.None), + Times.Once); + } + + [Fact] + public async Task CanUpsertRecordWithCustomMapperAsync() + { + // Arrange. + this._redisDatabaseMock.Setup(x => x.HashSetAsync(It.IsAny(), It.IsAny(), CommandFlags.None)).Returns(Task.CompletedTask); + + // Arrange mapper mock from data model to JsonNode. + var mapperMock = new Mock>(MockBehavior.Strict); + var hashEntries = new HashEntry[] + { + new("OriginalNameData", "data 1"), + new("data_storage_name", "data 1"), + new("vector_storage_name", "[1,2,3,4]"), + new("NotAnnotated", RedisValue.Null) + }; + mapperMock + .Setup(x => x.MapFromDataToStorageModel(It.IsAny())) + .Returns((TestRecordKey1, hashEntries)); + + // Arrange target with custom mapper. + var sut = new RedisHashSetVectorStoreRecordCollection( + this._redisDatabaseMock.Object, + TestCollectionName, + new() + { + HashEntriesCustomMapper = mapperMock.Object + }); + + var model = CreateModel(TestRecordKey1, true); + + // Act + await sut.UpsertAsync(model); + + // Assert + mapperMock + .Verify( + x => x.MapFromDataToStorageModel(It.Is(x => x == model)), + Times.Once); + } + + private RedisHashSetVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) + { + return new RedisHashSetVectorStoreRecordCollection( + this._redisDatabaseMock.Object, + TestCollectionName, + new() + { + PrefixCollectionNameToKeyNames = false, + VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null + }); + } + + private static void SetupExecuteMock(Mock redisDatabaseMock, Exception exception) + { + redisDatabaseMock + .Setup( + x => x.ExecuteAsync( + It.IsAny(), + It.IsAny())) + .ThrowsAsync(exception); + } + + private static void SetupExecuteMock(Mock redisDatabaseMock, IEnumerable redisResultStrings) + { + var results = redisResultStrings + .Select(x => RedisResult.Create(new RedisValue(x))) + .ToArray(); + redisDatabaseMock + .Setup( + x => x.ExecuteAsync( + It.IsAny(), + It.IsAny())) + .ReturnsAsync(RedisResult.Create(results)); + } + + private static void SetupExecuteMock(Mock redisDatabaseMock, string redisResultString) + { + redisDatabaseMock + .Setup( + x => x.ExecuteAsync( + It.IsAny(), + It.IsAny())) + .Callback((string command, object[] args) => + { + Console.WriteLine(args); + }) + .ReturnsAsync(RedisResult.Create(new RedisValue(redisResultString))); + } + + private static SinglePropsModel CreateModel(string key, bool withVectors) + { + return new SinglePropsModel + { + Key = key, + OriginalNameData = "data 1", + Data = "data 1", + Vector = withVectors ? new float[] { 1, 2, 3, 4 } : null, + NotAnnotated = null, + }; + } + + private readonly VectorStoreRecordDefinition _singlePropsDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key"), + new VectorStoreRecordDataProperty("OriginalNameData"), + new VectorStoreRecordDataProperty("Data") { StoragePropertyName = "data_storage_name" }, + new VectorStoreRecordVectorProperty("Vector") { StoragePropertyName = "vector_storage_name" } + ] + }; + + public sealed class SinglePropsModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData(IsFilterable = true)] + public string OriginalNameData { get; set; } = string.Empty; + + [JsonPropertyName("ignored_data_json_name")] + [VectorStoreRecordData(IsFilterable = true, StoragePropertyName = "data_storage_name")] + public string Data { get; set; } = string.Empty; + + [JsonPropertyName("ignored_vector_json_name")] + [VectorStoreRecordVector(4, StoragePropertyName = "vector_storage_name")] + public ReadOnlyMemory? Vector { get; set; } + + public string? NotAnnotated { get; set; } + } +} diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordMapperTests.cs new file mode 100644 index 000000000000..cb1a13de8822 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordMapperTests.cs @@ -0,0 +1,246 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Reflection; +using System.Runtime.InteropServices; +using Microsoft.SemanticKernel.Connectors.Redis; +using Microsoft.SemanticKernel.Data; +using StackExchange.Redis; +using Xunit; + +namespace SemanticKernel.Connectors.Redis.UnitTests; + +/// +/// Contains tests for the class. +/// +public sealed class RedisHashSetVectorStoreRecordMapperTests +{ + [Fact] + public void MapsAllFieldsFromDataToStorageModel() + { + // Arrange. + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(AllTypesModel), supportsMultipleVectors: true); + var sut = new RedisHashSetVectorStoreRecordMapper(keyProperty, dataProperties, vectorProperties, s_storagePropertyNames); + + // Act. + var actual = sut.MapFromDataToStorageModel(CreateModel("test key")); + + // Assert. + Assert.NotNull(actual.HashEntries); + Assert.Equal("test key", actual.Key); + + Assert.Equal("storage_string_data", actual.HashEntries[0].Name.ToString()); + Assert.Equal("data 1", actual.HashEntries[0].Value.ToString()); + + Assert.Equal("IntData", actual.HashEntries[1].Name.ToString()); + Assert.Equal(1, (int)actual.HashEntries[1].Value); + + Assert.Equal("UIntData", actual.HashEntries[2].Name.ToString()); + Assert.Equal(2u, (uint)actual.HashEntries[2].Value); + + Assert.Equal("LongData", actual.HashEntries[3].Name.ToString()); + Assert.Equal(3, (long)actual.HashEntries[3].Value); + + Assert.Equal("ULongData", actual.HashEntries[4].Name.ToString()); + Assert.Equal(4ul, (ulong)actual.HashEntries[4].Value); + + Assert.Equal("DoubleData", actual.HashEntries[5].Name.ToString()); + Assert.Equal(5.5d, (double)actual.HashEntries[5].Value); + + Assert.Equal("FloatData", actual.HashEntries[6].Name.ToString()); + Assert.Equal(6.6f, (float)actual.HashEntries[6].Value); + + Assert.Equal("BoolData", actual.HashEntries[7].Name.ToString()); + Assert.True((bool)actual.HashEntries[7].Value); + + Assert.Equal("NullableIntData", actual.HashEntries[8].Name.ToString()); + Assert.Equal(7, (int)actual.HashEntries[8].Value); + + Assert.Equal("NullableUIntData", actual.HashEntries[9].Name.ToString()); + Assert.Equal(8u, (uint)actual.HashEntries[9].Value); + + Assert.Equal("NullableLongData", actual.HashEntries[10].Name.ToString()); + Assert.Equal(9, (long)actual.HashEntries[10].Value); + + Assert.Equal("NullableULongData", actual.HashEntries[11].Name.ToString()); + Assert.Equal(10ul, (ulong)actual.HashEntries[11].Value); + + Assert.Equal("NullableDoubleData", actual.HashEntries[12].Name.ToString()); + Assert.Equal(11.1d, (double)actual.HashEntries[12].Value); + + Assert.Equal("NullableFloatData", actual.HashEntries[13].Name.ToString()); + Assert.Equal(12.2f, (float)actual.HashEntries[13].Value); + + Assert.Equal("NullableBoolData", actual.HashEntries[14].Name.ToString()); + Assert.False((bool)actual.HashEntries[14].Value); + + Assert.Equal("FloatVector", actual.HashEntries[15].Name.ToString()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, MemoryMarshal.Cast((byte[])actual.HashEntries[15].Value!).ToArray()); + + Assert.Equal("DoubleVector", actual.HashEntries[16].Name.ToString()); + Assert.Equal(new double[] { 5, 6, 7, 8 }, MemoryMarshal.Cast((byte[])actual.HashEntries[16].Value!).ToArray()); + } + + [Fact] + public void MapsAllFieldsFromStorageToDataModel() + { + // Arrange. + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(AllTypesModel), supportsMultipleVectors: true); + var sut = new RedisHashSetVectorStoreRecordMapper(keyProperty, dataProperties, vectorProperties, s_storagePropertyNames); + + // Act. + var actual = sut.MapFromStorageToDataModel(("test key", CreateHashSet()), new() { IncludeVectors = true }); + + // Assert. + Assert.NotNull(actual); + Assert.Equal("test key", actual.Key); + Assert.Equal("data 1", actual.StringData); + Assert.Equal(1, actual.IntData); + Assert.Equal(2u, actual.UIntData); + Assert.Equal(3, actual.LongData); + Assert.Equal(4ul, actual.ULongData); + Assert.Equal(5.5d, actual.DoubleData); + Assert.Equal(6.6f, actual.FloatData); + Assert.True(actual.BoolData); + Assert.Equal(7, actual.NullableIntData); + Assert.Equal(8u, actual.NullableUIntData); + Assert.Equal(9, actual.NullableLongData); + Assert.Equal(10ul, actual.NullableULongData); + Assert.Equal(11.1d, actual.NullableDoubleData); + Assert.Equal(12.2f, actual.NullableFloatData); + Assert.False(actual.NullableBoolData); + + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.FloatVector!.Value.ToArray()); + Assert.Equal(new double[] { 5, 6, 7, 8 }, actual.DoubleVector!.Value.ToArray()); + } + + private static AllTypesModel CreateModel(string key) + { + return new AllTypesModel + { + Key = key, + StringData = "data 1", + IntData = 1, + UIntData = 2, + LongData = 3, + ULongData = 4, + DoubleData = 5.5d, + FloatData = 6.6f, + BoolData = true, + NullableIntData = 7, + NullableUIntData = 8, + NullableLongData = 9, + NullableULongData = 10, + NullableDoubleData = 11.1d, + NullableFloatData = 12.2f, + NullableBoolData = false, + FloatVector = new float[] { 1, 2, 3, 4 }, + DoubleVector = new double[] { 5, 6, 7, 8 }, + NotAnnotated = "notAnnotated", + }; + } + + private static HashEntry[] CreateHashSet() + { + var hashSet = new HashEntry[17]; + hashSet[0] = new HashEntry("storage_string_data", "data 1"); + hashSet[1] = new HashEntry("IntData", 1); + hashSet[2] = new HashEntry("UIntData", 2); + hashSet[3] = new HashEntry("LongData", 3); + hashSet[4] = new HashEntry("ULongData", 4); + hashSet[5] = new HashEntry("DoubleData", 5.5); + hashSet[6] = new HashEntry("FloatData", 6.6); + hashSet[7] = new HashEntry("BoolData", true); + hashSet[8] = new HashEntry("NullableIntData", 7); + hashSet[9] = new HashEntry("NullableUIntData", 8); + hashSet[10] = new HashEntry("NullableLongData", 9); + hashSet[11] = new HashEntry("NullableULongData", 10); + hashSet[12] = new HashEntry("NullableDoubleData", 11.1); + hashSet[13] = new HashEntry("NullableFloatData", 12.2); + hashSet[14] = new HashEntry("NullableBoolData", false); + hashSet[15] = new HashEntry("FloatVector", MemoryMarshal.AsBytes(new ReadOnlySpan(new float[] { 1, 2, 3, 4 })).ToArray()); + hashSet[16] = new HashEntry("DoubleVector", MemoryMarshal.AsBytes(new ReadOnlySpan(new double[] { 5, 6, 7, 8 })).ToArray()); + return hashSet; + } + + private static readonly Dictionary s_storagePropertyNames = new() + { + ["StringData"] = "storage_string_data", + ["IntData"] = "IntData", + ["UIntData"] = "UIntData", + ["LongData"] = "LongData", + ["ULongData"] = "ULongData", + ["DoubleData"] = "DoubleData", + ["FloatData"] = "FloatData", + ["BoolData"] = "BoolData", + ["NullableIntData"] = "NullableIntData", + ["NullableUIntData"] = "NullableUIntData", + ["NullableLongData"] = "NullableLongData", + ["NullableULongData"] = "NullableULongData", + ["NullableDoubleData"] = "NullableDoubleData", + ["NullableFloatData"] = "NullableFloatData", + ["NullableBoolData"] = "NullableBoolData", + ["FloatVector"] = "FloatVector", + ["DoubleVector"] = "DoubleVector", + }; + + private sealed class AllTypesModel + { + [VectorStoreRecordKey] + public string Key { get; set; } = string.Empty; + + [VectorStoreRecordData] + public string StringData { get; set; } = string.Empty; + + [VectorStoreRecordData] + public int IntData { get; set; } + + [VectorStoreRecordData] + public uint UIntData { get; set; } + + [VectorStoreRecordData] + public long LongData { get; set; } + + [VectorStoreRecordData] + public ulong ULongData { get; set; } + + [VectorStoreRecordData] + public double DoubleData { get; set; } + + [VectorStoreRecordData] + public float FloatData { get; set; } + + [VectorStoreRecordData] + public bool BoolData { get; set; } + + [VectorStoreRecordData] + public int? NullableIntData { get; set; } + + [VectorStoreRecordData] + public uint? NullableUIntData { get; set; } + + [VectorStoreRecordData] + public long? NullableLongData { get; set; } + + [VectorStoreRecordData] + public ulong? NullableULongData { get; set; } + + [VectorStoreRecordData] + public double? NullableDoubleData { get; set; } + + [VectorStoreRecordData] + public float? NullableFloatData { get; set; } + + [VectorStoreRecordData] + public bool? NullableBoolData { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory? FloatVector { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory? DoubleVector { get; set; } + + public string NotAnnotated { get; set; } = string.Empty; + } +} diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs index d9d1a6616091..1a621975de01 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs @@ -482,7 +482,7 @@ private static SinglePropsModel CreateModel(string key, bool withVectors) [ new VectorStoreRecordKeyProperty("Key"), new VectorStoreRecordDataProperty("OriginalNameData"), - new VectorStoreRecordDataProperty("Data") { StoragePropertyName = "ignored_data_json_name" }, + new VectorStoreRecordDataProperty("Data") { StoragePropertyName = "ignored_data_storage_name" }, new VectorStoreRecordVectorProperty("Vector") ] }; @@ -496,7 +496,7 @@ public sealed class SinglePropsModel public string OriginalNameData { get; set; } = string.Empty; [JsonPropertyName("data_json_name")] - [VectorStoreRecordData(IsFilterable = true, StoragePropertyName = "ignored_data_json_name")] + [VectorStoreRecordData(IsFilterable = true, StoragePropertyName = "ignored_data_storage_name")] public string Data { get; set; } = string.Empty; [JsonPropertyName("vector_json_name")] diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs index fc25d4c8c151..28f8f6cc5bcb 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreTests.cs @@ -28,7 +28,7 @@ public RedisVectorStoreTests() } [Fact] - public void GetCollectionReturnsCollection() + public void GetCollectionReturnsJsonCollection() { // Arrange. var sut = new RedisVectorStore(this._redisDatabaseMock.Object); @@ -41,6 +41,20 @@ public void GetCollectionReturnsCollection() Assert.IsType>>(actual); } + [Fact] + public void GetCollectionReturnsHashSetCollection() + { + // Arrange. + var sut = new RedisVectorStore(this._redisDatabaseMock.Object, new() { StorageType = RedisStorageType.HashSet }); + + // Act. + var actual = sut.GetCollection>(TestCollectionName); + + // Assert. + Assert.NotNull(actual); + Assert.IsType>>(actual); + } + [Fact] public void GetCollectionCallsFactoryIfProvided() { diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisHashSetVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisHashSetVectorStoreRecordCollectionTests.cs new file mode 100644 index 000000000000..b80d85551e6d --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisHashSetVectorStoreRecordCollectionTests.cs @@ -0,0 +1,340 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.Redis; +using Microsoft.SemanticKernel.Data; +using NRedisStack.RedisStackCommands; +using NRedisStack.Search; +using StackExchange.Redis; +using Xunit; +using Xunit.Abstractions; +using static SemanticKernel.IntegrationTests.Connectors.Memory.Redis.RedisVectorStoreFixture; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Redis; + +/// +/// Contains tests for the class. +/// +/// Used for logging. +/// Redis setup and teardown. +[Collection("RedisVectorStoreCollection")] +public sealed class RedisHashSetVectorStoreRecordCollectionTests(ITestOutputHelper output, RedisVectorStoreFixture fixture) +{ + private const string TestCollectionName = "hashhotels"; + + [Theory] + [InlineData(TestCollectionName, true)] + [InlineData("nonexistentcollection", false)] + public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) + { + // Arrange. + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, collectionName); + + // Act. + var actual = await sut.CollectionExistsAsync(); + + // Assert. + Assert.Equal(expectedExists, actual); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanCreateACollectionUpsertAndGetAsync(bool useRecordDefinition) + { + // Arrange + var record = CreateTestHotel("Upsert-1", 1); + var collectionNamePostfix = useRecordDefinition ? "WithDefinition" : "WithType"; + var testCollectionName = $"hashsetcreatetest{collectionNamePostfix}"; + + var options = new RedisHashSetVectorStoreRecordCollectionOptions + { + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.BasicVectorStoreRecordDefinition : null + }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, testCollectionName, options); + + // Act + await sut.CreateCollectionAsync(); + var upsertResult = await sut.UpsertAsync(record); + var getResult = await sut.GetAsync("Upsert-1", new GetRecordOptions { IncludeVectors = true }); + + // Assert + var collectionExistResult = await sut.CollectionExistsAsync(); + Assert.True(collectionExistResult); + await sut.DeleteCollectionAsync(); + + Assert.Equal("Upsert-1", upsertResult); + Assert.Equal(record.HotelId, getResult?.HotelId); + Assert.Equal(record.HotelName, getResult?.HotelName); + Assert.Equal(record.HotelCode, getResult?.HotelCode); + Assert.Equal(record.ParkingIncluded, getResult?.ParkingIncluded); + Assert.Equal(record.Rating, getResult?.Rating); + Assert.Equal(record.Description, getResult?.Description); + Assert.Equal(record.DescriptionEmbedding?.ToArray(), getResult?.DescriptionEmbedding?.ToArray()); + + // Output + output.WriteLine(collectionExistResult.ToString()); + output.WriteLine(upsertResult); + output.WriteLine(getResult?.ToString()); + } + + [Fact] + public async Task ItCanDeleteCollectionAsync() + { + // Arrange + var tempCollectionName = "temp-test"; + var schema = new Schema(); + schema.AddTextField("HotelName"); + var createParams = new FTCreateParams(); + createParams.AddPrefix(tempCollectionName); + await fixture.Database.FT().CreateAsync(tempCollectionName, createParams, schema); + + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, tempCollectionName); + + // Act + await sut.DeleteCollectionAsync(); + + // Assert + Assert.False(await sut.CollectionExistsAsync()); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition) + { + // Arrange. + var options = new RedisHashSetVectorStoreRecordCollectionOptions + { + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.BasicVectorStoreRecordDefinition : null + }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + var record = CreateTestHotel("Upsert-2", 2); + + // Act. + var upsertResult = await sut.UpsertAsync(record); + + // Assert. + var getResult = await sut.GetAsync("Upsert-2", new GetRecordOptions { IncludeVectors = true }); + Assert.Equal("Upsert-2", upsertResult); + Assert.Equal(record.HotelId, getResult?.HotelId); + Assert.Equal(record.HotelName, getResult?.HotelName); + Assert.Equal(record.HotelCode, getResult?.HotelCode); + Assert.Equal(record.ParkingIncluded, getResult?.ParkingIncluded); + Assert.Equal(record.Rating, getResult?.Rating); + Assert.Equal(record.Description, getResult?.Description); + Assert.Equal(record.DescriptionEmbedding?.ToArray(), getResult?.DescriptionEmbedding?.ToArray()); + + // Output. + output.WriteLine(upsertResult); + output.WriteLine(getResult?.ToString()); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanUpsertManyDocumentsToVectorStoreAsync(bool useRecordDefinition) + { + // Arrange. + var options = new RedisHashSetVectorStoreRecordCollectionOptions + { + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.BasicVectorStoreRecordDefinition : null + }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + + // Act. + var results = sut.UpsertBatchAsync( + [ + CreateTestHotel("UpsertMany-1", 1), + CreateTestHotel("UpsertMany-2", 2), + CreateTestHotel("UpsertMany-3", 3), + ]); + + // Assert. + Assert.NotNull(results); + var resultsList = await results.ToListAsync(); + + Assert.Equal(3, resultsList.Count); + Assert.Contains("UpsertMany-1", resultsList); + Assert.Contains("UpsertMany-2", resultsList); + Assert.Contains("UpsertMany-3", resultsList); + + // Output + foreach (var result in resultsList) + { + output.WriteLine(result); + } + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool useRecordDefinition) + { + // Arrange. + var options = new RedisHashSetVectorStoreRecordCollectionOptions + { + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.BasicVectorStoreRecordDefinition : null + }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + + // Act. + var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); + + // Assert. + Assert.Equal("BaseSet-1", getResult?.HotelId); + Assert.Equal("My Hotel 1", getResult?.HotelName); + Assert.Equal(1, getResult?.HotelCode); + Assert.True(getResult?.ParkingIncluded); + Assert.Equal(3.6, getResult?.Rating); + Assert.Equal("This is a great hotel.", getResult?.Description); + if (includeVectors) + { + Assert.Equal(new[] { 30f, 31f, 32f, 33f }, getResult?.DescriptionEmbedding?.ToArray()); + } + else + { + Assert.Null(getResult?.DescriptionEmbedding); + } + + // Output. + output.WriteLine(getResult?.ToString()); + } + + [Fact] + public async Task ItCanGetManyDocumentsFromVectorStoreAsync() + { + // Arrange + var options = new RedisHashSetVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + + // Act + // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. + var hotels = sut.GetBatchAsync(["BaseSet-1", "BaseSet-5", "BaseSet-2"], new GetRecordOptions { IncludeVectors = true }); + + // Assert + Assert.NotNull(hotels); + var hotelsList = await hotels.ToListAsync(); + Assert.Equal(2, hotelsList.Count); + + // Output + foreach (var hotel in hotelsList) + { + output.WriteLine(hotel?.ToString() ?? "Null"); + } + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefinition) + { + // Arrange. + var options = new RedisHashSetVectorStoreRecordCollectionOptions + { + PrefixCollectionNameToKeyNames = true, + VectorStoreRecordDefinition = useRecordDefinition ? fixture.BasicVectorStoreRecordDefinition : null + }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + var record = new BasicHotel + { + HotelId = "Remove-1", + HotelName = "Remove Test Hotel", + HotelCode = 20, + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f } + }; + + await sut.UpsertAsync(record); + + // Act. + await sut.DeleteAsync("Remove-1"); + // Also delete a non-existing key to test that the operation does not fail for these. + await sut.DeleteAsync("Remove-2"); + + // Assert. + Assert.Null(await sut.GetAsync("Remove-1")); + } + + [Fact] + public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() + { + // Arrange + var options = new RedisHashSetVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + await sut.UpsertAsync(CreateTestHotel("RemoveMany-1", 1)); + await sut.UpsertAsync(CreateTestHotel("RemoveMany-2", 2)); + await sut.UpsertAsync(CreateTestHotel("RemoveMany-3", 3)); + + // Act + // Also include a non-existing key to test that the operation does not fail for these. + await sut.DeleteBatchAsync(["RemoveMany-1", "RemoveMany-2", "RemoveMany-3", "RemoveMany-4"]); + + // Assert + Assert.Null(await sut.GetAsync("RemoveMany-1", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-2", new GetRecordOptions { IncludeVectors = true })); + Assert.Null(await sut.GetAsync("RemoveMany-3", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact] + public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() + { + // Arrange + var options = new RedisHashSetVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + + // Act & Assert + Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); + } + + [Fact] + public async Task ItThrowsMappingExceptionForFailedMapperAsync() + { + // Arrange + var options = new RedisHashSetVectorStoreRecordCollectionOptions + { + PrefixCollectionNameToKeyNames = true, + HashEntriesCustomMapper = new FailingMapper() + }; + var sut = new RedisHashSetVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); + + // Act & Assert + await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); + } + + private static BasicHotel CreateTestHotel(string hotelId, int hotelCode) + { + var record = new BasicHotel + { + HotelId = hotelId, + HotelName = $"My Hotel {hotelCode}", + HotelCode = 1, + ParkingIncluded = true, + Rating = 3.6, + Description = "This is a great hotel.", + DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f } + }; + return record; + } + + private sealed class FailingMapper : IVectorStoreRecordMapper + { + public (string Key, HashEntry[] HashEntries) MapFromDataToStorageModel(BasicHotel dataModel) + { + throw new NotImplementedException(); + } + + public BasicHotel MapFromStorageToDataModel((string Key, HashEntry[] HashEntries) storageModel, StorageToDataModelMapperOptions options) + { + throw new NotImplementedException(); + } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs index 20885ea076de..a24f11dc7f51 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs @@ -22,8 +22,10 @@ namespace SemanticKernel.IntegrationTests.Connectors.Memory.Redis; [Collection("RedisVectorStoreCollection")] public sealed class RedisJsonVectorStoreRecordCollectionTests(ITestOutputHelper output, RedisVectorStoreFixture fixture) { + private const string TestCollectionName = "jsonhotels"; + [Theory] - [InlineData("hotels", true)] + [InlineData(TestCollectionName, true)] [InlineData("nonexistentcollection", false)] public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) { @@ -45,7 +47,7 @@ public async Task ItCanCreateACollectionUpsertAndGetAsync(bool useRecordDefiniti // Arrange var record = CreateTestHotel("Upsert-1", 1); var collectionNamePostfix = useRecordDefinition ? "WithDefinition" : "WithType"; - var testCollectionName = $"createtest{collectionNamePostfix}"; + var testCollectionName = $"jsoncreatetest{collectionNamePostfix}"; var options = new RedisJsonVectorStoreRecordCollectionOptions { @@ -114,7 +116,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); Hotel record = CreateTestHotel("Upsert-2", 2); // Act. @@ -151,7 +153,7 @@ public async Task ItCanUpsertManyDocumentsToVectorStoreAsync(bool useRecordDefin PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); // Act. var results = sut.UpsertBatchAsync( @@ -190,7 +192,7 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); // Act. var getResult = await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = includeVectors }); @@ -223,7 +225,7 @@ public async Task ItCanGetManyDocumentsFromVectorStoreAsync() { // Arrange var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); // Act // Also include one non-existing key to test that the operation does not fail for these and returns only the found ones. @@ -246,7 +248,7 @@ public async Task ItFailsToGetDocumentsWithInvalidSchemaAsync() { // Arrange. var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); // Act & Assert. await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-4-Invalid", new GetRecordOptions { IncludeVectors = true })); @@ -263,7 +265,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti PrefixCollectionNameToKeyNames = true, VectorStoreRecordDefinition = useRecordDefinition ? fixture.VectorStoreRecordDefinition : null }; - var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); var address = new HotelAddress { City = "Seattle", Country = "USA" }; var record = new Hotel { @@ -290,7 +292,7 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync() { // Arrange var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); await sut.UpsertAsync(CreateTestHotel("RemoveMany-1", 1)); await sut.UpsertAsync(CreateTestHotel("RemoveMany-2", 2)); await sut.UpsertAsync(CreateTestHotel("RemoveMany-3", 3)); @@ -310,7 +312,7 @@ public async Task ItReturnsNullWhenGettingNonExistentRecordAsync() { // Arrange var options = new RedisJsonVectorStoreRecordCollectionOptions { PrefixCollectionNameToKeyNames = true }; - var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); // Act & Assert Assert.Null(await sut.GetAsync("BaseSet-5", new GetRecordOptions { IncludeVectors = true })); @@ -325,7 +327,7 @@ public async Task ItThrowsMappingExceptionForFailedMapperAsync() PrefixCollectionNameToKeyNames = true, JsonNodeCustomMapper = new FailingMapper() }; - var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, "hotels", options); + var sut = new RedisJsonVectorStoreRecordCollection(fixture.Database, TestCollectionName, options); // Act & Assert await Assert.ThrowsAsync(async () => await sut.GetAsync("BaseSet-1", new GetRecordOptions { IncludeVectors = true })); diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs index c79069982824..f42370753f68 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; +using System.Runtime.InteropServices; using System.Text.Json.Serialization; using System.Threading.Tasks; using Docker.DotNet; @@ -9,6 +10,7 @@ using Microsoft.SemanticKernel.Data; using NRedisStack.RedisStackCommands; using NRedisStack.Search; +using NRedisStack.Search.Literals.Enums; using StackExchange.Redis; using Xunit; @@ -43,12 +45,25 @@ public RedisVectorStoreFixture() new VectorStoreRecordDataProperty("Description"), new VectorStoreRecordVectorProperty("DescriptionEmbedding") { Dimensions = 4 }, new VectorStoreRecordDataProperty("Tags"), - new VectorStoreRecordDataProperty("ParkingIncluded"), + new VectorStoreRecordDataProperty("ParkingIncluded") { StoragePropertyName = "parking_is_included" }, new VectorStoreRecordDataProperty("LastRenovationDate"), new VectorStoreRecordDataProperty("Rating"), new VectorStoreRecordDataProperty("Address") } }; + this.BasicVectorStoreRecordDefinition = new VectorStoreRecordDefinition + { + Properties = new List + { + new VectorStoreRecordKeyProperty("HotelId"), + new VectorStoreRecordDataProperty("HotelName") { IsFilterable = true, PropertyType = typeof(string) }, + new VectorStoreRecordDataProperty("HotelCode") { IsFilterable = true, PropertyType = typeof(int) }, + new VectorStoreRecordDataProperty("Description"), + new VectorStoreRecordVectorProperty("DescriptionEmbedding") { Dimensions = 4 }, + new VectorStoreRecordDataProperty("ParkingIncluded") { StoragePropertyName = "parking_is_included" }, + new VectorStoreRecordDataProperty("Rating") + } + }; } /// Gets the redis database connection to use for tests. @@ -57,6 +72,9 @@ public RedisVectorStoreFixture() /// Gets the manually created vector store record definition for our test model. public VectorStoreRecordDefinition VectorStoreRecordDefinition { get; private set; } + /// Gets the manually created vector store record definition for our basic test model. + public VectorStoreRecordDefinition BasicVectorStoreRecordDefinition { get; private set; } + /// /// Create / Recreate redis docker container, create an index and add test data. /// @@ -71,24 +89,28 @@ public async Task InitializeAsync() // Create a schema for the vector store. var schema = new Schema(); - schema.AddTextField("HotelName"); - schema.AddNumericField("hotelCode"); - schema.AddTextField("Description"); - schema.AddVectorField("DescriptionEmbedding", Schema.VectorField.VectorAlgo.HNSW, new Dictionary() + schema.AddTextField(new FieldName("$.HotelName", "HotelName")); + schema.AddNumericField(new FieldName("$.HotelCode", "HotelCode")); + schema.AddTextField(new FieldName("$.Description", "Description")); + schema.AddVectorField(new FieldName("$.DescriptionEmbedding", "DescriptionEmbedding"), Schema.VectorField.VectorAlgo.HNSW, new Dictionary() { ["TYPE"] = "FLOAT32", ["DIM"] = "4", ["DISTANCE_METRIC"] = "L2" }); - var createParams = new FTCreateParams(); - createParams.AddPrefix("hotels"); - await this.Database.FT().CreateAsync("hotels", createParams, schema); + var jsonCreateParams = new FTCreateParams().AddPrefix("jsonhotels:").On(IndexDataType.JSON); + await this.Database.FT().CreateAsync("jsonhotels", jsonCreateParams, schema); + + // Create a hashset index. + var hashsetCreateParams = new FTCreateParams().AddPrefix("hashhotels:").On(IndexDataType.HASH); + await this.Database.FT().CreateAsync("hashhotels", hashsetCreateParams, schema); // Create some test data. var address = new HotelAddress { City = "Seattle", Country = "USA" }; var embedding = new[] { 30f, 31f, 32f, 33f }; - await this.Database.JSON().SetAsync("hotels:BaseSet-1", "$", new + // Add JSON test data. + await this.Database.JSON().SetAsync("jsonhotels:BaseSet-1", "$", new { HotelName = "My Hotel 1", HotelCode = 1, @@ -100,9 +122,45 @@ public async Task InitializeAsync() Rating = 3.6, Address = address }); - await this.Database.JSON().SetAsync("hotels:BaseSet-2", "$", new { HotelName = "My Hotel 2", HotelCode = 2, Description = "This is a great hotel.", DescriptionEmbedding = embedding, parking_is_included = false }); - await this.Database.JSON().SetAsync("hotels:BaseSet-3", "$", new { HotelName = "My Hotel 3", HotelCode = 3, Description = "This is a great hotel.", DescriptionEmbedding = embedding, parking_is_included = false }); - await this.Database.JSON().SetAsync("hotels:BaseSet-4-Invalid", "$", new { HotelId = "AnotherId", HotelName = "My Invalid Hotel", HotelCode = 4, Description = "This is an invalid hotel.", DescriptionEmbedding = embedding, parking_is_included = false }); + await this.Database.JSON().SetAsync("jsonhotels:BaseSet-2", "$", new { HotelName = "My Hotel 2", HotelCode = 2, Description = "This is a great hotel.", DescriptionEmbedding = embedding, parking_is_included = false }); + await this.Database.JSON().SetAsync("jsonhotels:BaseSet-3", "$", new { HotelName = "My Hotel 3", HotelCode = 3, Description = "This is a great hotel.", DescriptionEmbedding = embedding, parking_is_included = false }); + await this.Database.JSON().SetAsync("jsonhotels:BaseSet-4-Invalid", "$", new { HotelId = "AnotherId", HotelName = "My Invalid Hotel", HotelCode = 4, Description = "This is an invalid hotel.", DescriptionEmbedding = embedding, parking_is_included = false }); + + // Add hashset test data. + await this.Database.HashSetAsync("hashhotels:BaseSet-1", new HashEntry[] + { + new("HotelName", "My Hotel 1"), + new("HotelCode", 1), + new("Description", "This is a great hotel."), + new("DescriptionEmbedding", MemoryMarshal.AsBytes(new ReadOnlySpan(embedding)).ToArray()), + new("parking_is_included", true), + new("Rating", 3.6) + }); + await this.Database.HashSetAsync("hashhotels:BaseSet-2", new HashEntry[] + { + new("HotelName", "My Hotel 2"), + new("HotelCode", 2), + new("Description", "This is a great hotel."), + new("DescriptionEmbedding", MemoryMarshal.AsBytes(new ReadOnlySpan(embedding)).ToArray()), + new("parking_is_included", false), + }); + await this.Database.HashSetAsync("hashhotels:BaseSet-3", new HashEntry[] + { + new("HotelName", "My Hotel 3"), + new("HotelCode", 3), + new("Description", "This is a great hotel."), + new("DescriptionEmbedding", MemoryMarshal.AsBytes(new ReadOnlySpan(embedding)).ToArray()), + new("parking_is_included", false), + }); + await this.Database.HashSetAsync("hashhotels:BaseSet-4-Invalid", new HashEntry[] + { + new("HotelId", "AnotherId"), + new("HotelName", "My Invalid Hotel"), + new("HotelCode", 4), + new("Description", "This is an invalid hotel."), + new("DescriptionEmbedding", MemoryMarshal.AsBytes(new ReadOnlySpan(embedding)).ToArray()), + new("parking_is_included", false), + }); } /// @@ -159,7 +217,7 @@ await client.Containers.StartContainerAsync( } /// - /// A test model for the vector store. + /// A test model for the vector store that has complex properties as supported by JSON redis mode. /// public class Hotel { @@ -184,7 +242,7 @@ public class Hotel #pragma warning restore CA1819 // Properties should not return arrays [JsonPropertyName("parking_is_included")] - [VectorStoreRecordData] + [VectorStoreRecordData(StoragePropertyName = "parking_is_included")] public bool ParkingIncluded { get; init; } [VectorStoreRecordData] @@ -205,5 +263,33 @@ public class HotelAddress public string City { get; init; } public string Country { get; init; } } + + /// + /// A test model for the vector store that only uses basic types as supported by HashSets Redis mode. + /// + public class BasicHotel + { + [VectorStoreRecordKey] + public string HotelId { get; init; } + + [VectorStoreRecordData(IsFilterable = true)] + public string HotelName { get; init; } + + [VectorStoreRecordData(IsFilterable = true)] + public int HotelCode { get; init; } + + [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + public string Description { get; init; } + + [VectorStoreRecordVector(4)] + public ReadOnlyMemory? DescriptionEmbedding { get; init; } + + [JsonPropertyName("parking_is_included")] + [VectorStoreRecordData(StoragePropertyName = "parking_is_included")] + public bool ParkingIncluded { get; init; } + + [VectorStoreRecordData] + public double Rating { get; init; } + } } #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreTests.cs index cbc1ce74c01c..a6bda9559480 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreTests.cs @@ -26,8 +26,9 @@ public async Task ItCanGetAListOfExistingCollectionNamesAsync() var collectionNames = await sut.ListCollectionNamesAsync().ToListAsync(); // Assert - Assert.Single(collectionNames); - Assert.Contains("hotels", collectionNames); + Assert.Equal(2, collectionNames.Count); + Assert.Contains("jsonhotels", collectionNames); + Assert.Contains("hashhotels", collectionNames); // Output output.WriteLine(string.Join(",", collectionNames)); From da2653636122cf352d81b50b851e408938b296a6 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Fri, 26 Jul 2024 13:55:05 +0100 Subject: [PATCH 35/48] .Net: VectorStore: Add DI extensions for Pinecone and reorder params for others. (#7458) ### Description Adding DI extensions for the new Pinecone VectorStore implementation. Reordering the parameters for all VectorStore DI extensions so that ServiceId is last. This reduces the risk of ambiguous method signatures where we have to have multiple method overloads, since options is now before the serviceId, meaning that any optional string parameters to construct a client will not clash with the optional serviceId. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../AzureAISearchKernelBuilderExtensions.cs | 18 +++--- ...zureAISearchServiceCollectionExtensions.cs | 12 ++-- .../PineconeKernelBuilderExtensions.cs | 26 +++++++++ .../PineconeServiceCollectionExtensions.cs | 38 +++++++++++++ .../QdrantKernelBuilderExtensions.cs | 6 +- .../QdrantServiceCollectionExtensions.cs | 4 +- .../RedisKernelBuilderExtensions.cs | 6 +- .../RedisServiceCollectionExtensions.cs | 4 +- .../PineconeKernelBuilderExtensionsTests.cs | 55 +++++++++++++++++++ ...ineconeServiceCollectionExtensionsTests.cs | 54 ++++++++++++++++++ 10 files changed, 198 insertions(+), 25 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeKernelBuilderExtensions.cs create mode 100644 dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs create mode 100644 dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeKernelBuilderExtensionsTests.cs create mode 100644 dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeServiceCollectionExtensionsTests.cs diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchKernelBuilderExtensions.cs index c1eb8f2a2faf..16d48e60a66d 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchKernelBuilderExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchKernelBuilderExtensions.cs @@ -17,12 +17,12 @@ public static class AzureAISearchKernelBuilderExtensions /// Register an Azure AI Search with the specified service ID and where is retrieved from the dependency injection container. /// /// The builder to register the on. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder builder, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder builder, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { - builder.Services.AddAzureAISearchVectorStore(serviceId, options); + builder.Services.AddAzureAISearchVectorStore(options, serviceId); return builder; } @@ -32,12 +32,12 @@ public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder bui /// The builder to register the on. /// The service endpoint for Azure AI Search. /// The credential to authenticate to Azure AI Search with. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder builder, Uri endpoint, TokenCredential tokenCredential, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder builder, Uri endpoint, TokenCredential tokenCredential, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { - builder.Services.AddAzureAISearchVectorStore(endpoint, tokenCredential, serviceId, options); + builder.Services.AddAzureAISearchVectorStore(endpoint, tokenCredential, options, serviceId); return builder; } @@ -47,12 +47,12 @@ public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder bui /// The builder to register the on. /// The service endpoint for Azure AI Search. /// The credential to authenticate to Azure AI Search with. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder builder, Uri endpoint, AzureKeyCredential credential, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + public static IKernelBuilder AddAzureAISearchVectorStore(this IKernelBuilder builder, Uri endpoint, AzureKeyCredential credential, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { - builder.Services.AddAzureAISearchVectorStore(endpoint, credential, serviceId, options); + builder.Services.AddAzureAISearchVectorStore(endpoint, credential, options, serviceId); return builder; } } diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs index 32ad0ed2b1fa..fdb280733a74 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs @@ -18,10 +18,10 @@ public static class AzureAISearchServiceCollectionExtensions /// Register an Azure AI Search with the specified service ID and where is retrieved from the dependency injection container. /// /// The to register the on. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { services.AddKeyedTransient( serviceId, @@ -44,10 +44,10 @@ public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollec /// The to register the on. /// The service endpoint for Azure AI Search. /// The credential to authenticate to Azure AI Search with. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, Uri endpoint, TokenCredential tokenCredential, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, Uri endpoint, TokenCredential tokenCredential, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { Verify.NotNull(endpoint); Verify.NotNull(tokenCredential); @@ -73,10 +73,10 @@ public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollec /// The to register the on. /// The service endpoint for Azure AI Search. /// The credential to authenticate to Azure AI Search with. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, Uri endpoint, AzureKeyCredential credential, string? serviceId = default, AzureAISearchVectorStoreOptions? options = default) + public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, Uri endpoint, AzureKeyCredential credential, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { Verify.NotNull(endpoint); Verify.NotNull(credential); diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeKernelBuilderExtensions.cs new file mode 100644 index 000000000000..ff6460ae21d6 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeKernelBuilderExtensions.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.SemanticKernel.Data; +using Sdk = Pinecone; + +namespace Microsoft.SemanticKernel.Connectors.Pinecone; + +/// +/// Extension methods to register Pinecone instances on the . +/// +public static class PineconeKernelBuilderExtensions +{ + /// + /// Register a Pinecone with the specified service ID and where is retrieved from the dependency injection container. + /// + /// The builder to register the on. + /// The api key for Pinecone. + /// Optional options to further configure the . + /// An optional service id to use as the service key. + /// The kernel builder. + public static IKernelBuilder AddPineconeVectorStore(this IKernelBuilder builder, string? apiKey = default, PineconeVectorStoreOptions? options = default, string? serviceId = default) + { + builder.Services.AddPineconeVectorStore(apiKey, options, serviceId); + return builder; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs new file mode 100644 index 000000000000..0b6013ccc9be --- /dev/null +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.Data; +using Sdk = Pinecone; + +namespace Microsoft.SemanticKernel.Connectors.Pinecone; + +/// +/// Extension methods to register Pinecone instances on an . +/// +public static class PineconeServiceCollectionExtensions +{ + /// + /// Register a Pinecone with the specified service ID and where is retrieved from the dependency injection container. + /// + /// The to register the on. + /// The api key for Pinecone. + /// Optional options to further configure the . + /// An optional service id to use as the service key. + /// The kernel builder. + public static IServiceCollection AddPineconeVectorStore(this IServiceCollection services, string? apiKey = default, PineconeVectorStoreOptions? options = default, string? serviceId = default) + { + services.AddKeyedTransient( + serviceId, + (sp, obj) => + { + var pineconeClient = apiKey == null ? sp.GetRequiredService() : new Sdk.PineconeClient(apiKey); + var selectedOptions = options ?? sp.GetService(); + + return new PineconeVectorStore( + pineconeClient, + selectedOptions); + }); + + return services; + } +} diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs index 4d0605cceca8..6afad06c33f5 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs @@ -17,12 +17,12 @@ public static class QdrantKernelBuilderExtensions /// The Qdrant service port. /// A value indicating whether to use HTTPS for communicating with Qdrant. /// The Qdrant service API key. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IKernelBuilder AddQdrantVectorStore(this IKernelBuilder builder, string? host = default, int port = 6334, bool https = false, string? apiKey = default, string? serviceId = default, QdrantVectorStoreOptions? options = default) + public static IKernelBuilder AddQdrantVectorStore(this IKernelBuilder builder, string? host = default, int port = 6334, bool https = false, string? apiKey = default, QdrantVectorStoreOptions? options = default, string? serviceId = default) { - builder.Services.AddQdrantVectorStore(host, port, https, apiKey, serviceId, options); + builder.Services.AddQdrantVectorStore(host, port, https, apiKey, options, serviceId); return builder; } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs index 3470274e7a02..c8ac4479e15f 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs @@ -19,10 +19,10 @@ public static class QdrantServiceCollectionExtensions /// The Qdrant service port. /// A value indicating whether to use HTTPS for communicating with Qdrant. /// The Qdrant service API key. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IServiceCollection AddQdrantVectorStore(this IServiceCollection services, string? host = default, int port = 6334, bool https = false, string? apiKey = default, string? serviceId = default, QdrantVectorStoreOptions? options = default) + public static IServiceCollection AddQdrantVectorStore(this IServiceCollection services, string? host = default, int port = 6334, bool https = false, string? apiKey = default, QdrantVectorStoreOptions? options = default, string? serviceId = default) { services.AddKeyedTransient( serviceId, diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs index 1bc6374d6f93..1618d9f7391f 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs @@ -15,12 +15,12 @@ public static class RedisKernelBuilderExtensions /// /// The builder to register the on. /// The Redis connection configuration string. If not provided, an instance will be requested from the dependency injection container. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IKernelBuilder AddRedisVectorStore(this IKernelBuilder builder, string? redisConnectionConfiguration = default, string? serviceId = default, RedisVectorStoreOptions? options = default) + public static IKernelBuilder AddRedisVectorStore(this IKernelBuilder builder, string? redisConnectionConfiguration = default, RedisVectorStoreOptions? options = default, string? serviceId = default) { - builder.Services.AddRedisVectorStore(redisConnectionConfiguration, serviceId, options); + builder.Services.AddRedisVectorStore(redisConnectionConfiguration, options, serviceId); return builder; } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs index fb608884af10..3ca6c03e186f 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs @@ -16,10 +16,10 @@ public static class RedisServiceCollectionExtensions /// /// The to register the on. /// The Redis connection configuration string. If not provided, an instance will be requested from the dependency injection container. - /// An optional service id to use as the service key. /// Optional options to further configure the . + /// An optional service id to use as the service key. /// The kernel builder. - public static IServiceCollection AddRedisVectorStore(this IServiceCollection services, string? redisConnectionConfiguration = default, string? serviceId = default, RedisVectorStoreOptions? options = default) + public static IServiceCollection AddRedisVectorStore(this IServiceCollection services, string? redisConnectionConfiguration = default, RedisVectorStoreOptions? options = default, string? serviceId = default) { if (redisConnectionConfiguration == null) { diff --git a/dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeKernelBuilderExtensionsTests.cs b/dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeKernelBuilderExtensionsTests.cs new file mode 100644 index 000000000000..67cd1588e0dd --- /dev/null +++ b/dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeKernelBuilderExtensionsTests.cs @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.Pinecone; +using Microsoft.SemanticKernel.Data; +using Xunit; +using Sdk = Pinecone; + +namespace SemanticKernel.Connectors.UnitTests.Pinecone; + +/// +/// Tests for the class. +/// +public class PineconeKernelBuilderExtensionsTests +{ + private readonly IKernelBuilder _kernelBuilder; + + public PineconeKernelBuilderExtensionsTests() + { + this._kernelBuilder = Kernel.CreateBuilder(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Arrange. + using var client = new Sdk.PineconeClient("fake api key"); + this._kernelBuilder.Services.AddSingleton(client); + + // Act. + this._kernelBuilder.AddPineconeVectorStore(); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithApiKeyRegistersClass() + { + // Act. + this._kernelBuilder.AddPineconeVectorStore("fake api key"); + + // Assert. + this.AssertVectorStoreCreated(); + } + + private void AssertVectorStoreCreated() + { + var kernel = this._kernelBuilder.Build(); + var vectorStore = kernel.Services.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} diff --git a/dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeServiceCollectionExtensionsTests.cs b/dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeServiceCollectionExtensionsTests.cs new file mode 100644 index 000000000000..3894e3b65dc5 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeServiceCollectionExtensionsTests.cs @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.Connectors.Pinecone; +using Microsoft.SemanticKernel.Data; +using Xunit; +using Sdk = Pinecone; + +namespace SemanticKernel.Connectors.UnitTests.Pinecone; + +/// +/// Tests for the class. +/// +public class PineconeServiceCollectionExtensionsTests +{ + private readonly IServiceCollection _serviceCollection; + + public PineconeServiceCollectionExtensionsTests() + { + this._serviceCollection = new ServiceCollection(); + } + + [Fact] + public void AddVectorStoreRegistersClass() + { + // Arrange. + using var client = new Sdk.PineconeClient("fake api key"); + this._serviceCollection.AddSingleton(client); + + // Act. + this._serviceCollection.AddPineconeVectorStore(); + + // Assert. + this.AssertVectorStoreCreated(); + } + + [Fact] + public void AddVectorStoreWithApiKeyRegistersClass() + { + // Act. + this._serviceCollection.AddPineconeVectorStore("fake api key"); + + // Assert. + this.AssertVectorStoreCreated(); + } + + private void AssertVectorStoreCreated() + { + var serviceProvider = this._serviceCollection.BuildServiceProvider(); + var vectorStore = serviceProvider.GetRequiredService(); + Assert.NotNull(vectorStore); + Assert.IsType(vectorStore); + } +} From d8fd5e875f0cb8497a2c0277e3445e65ff29e92d Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:03:09 +0100 Subject: [PATCH 36/48] .Net: Removing embedding generation attributes and fixing cloning bug. (#7459) ### Motivation and Context We decided to delay the automatic embedding generation until later, so any attributes related to embedding generation are not required for the time being, but can be added back easily later. ### Description Removed HasEmbedding and EmbeddingPropertyName from record definition and attributes. Fixed a small bug with definition cloning. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../Memory/VectorStore_DataIngestion.cs | 2 +- .../QdrantVectorStoreRecordMapperTests.cs | 2 +- .../RedisJsonVectorStoreRecordMapperTests.cs | 2 +- .../AzureAISearchVectorStoreFixture.cs | 2 +- .../Memory/Qdrant/QdrantVectorStoreFixture.cs | 4 ++-- .../Memory/Redis/RedisVectorStoreFixture.cs | 4 ++-- .../src/Data/VectorStoreRecordPropertyReader.cs | 2 -- .../VectorStoreRecordDataAttribute.cs | 11 ----------- .../VectorStoreRecordDataProperty.cs | 17 +++-------------- .../VectorStoreRecordKeyProperty.cs | 2 +- .../VectorStoreRecordProperty.cs | 6 ++++++ .../VectorStoreRecordVectorProperty.cs | 5 ++++- .../VectorStoreRecordPropertyReaderTests.cs | 9 ++------- 13 files changed, 24 insertions(+), 44 deletions(-) diff --git a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs index a69c3c3ed743..fc12d98dc75b 100644 --- a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs +++ b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs @@ -183,7 +183,7 @@ private sealed class Glossary [VectorStoreRecordData] public string Term { get; set; } - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = nameof(DefinitionEmbedding))] + [VectorStoreRecordData] public string Definition { get; set; } [VectorStoreRecordVector(1536)] diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs index 3a3569a85dab..68cf620be29a 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs @@ -381,7 +381,7 @@ private sealed class MultiPropsModel [VectorStoreRecordKey] public TKey? Key { get; set; } = default; - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1")] + [VectorStoreRecordData] public string DataString { get; set; } = string.Empty; [JsonPropertyName("data_int_json")] diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs index 328ba02e4fb6..4fbb089555f1 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs @@ -80,7 +80,7 @@ private sealed class MultiPropsModel [VectorStoreRecordKey] public string Key { get; set; } = string.Empty; - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1")] + [VectorStoreRecordData] public string Data1 { get; set; } = string.Empty; [VectorStoreRecordData] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs index e91943b1d47a..c16cc077e33d 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs @@ -216,7 +216,7 @@ public class Hotel public string HotelName { get; set; } [SearchableField(AnalyzerName = LexicalAnalyzerName.Values.EnLucene)] - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + [VectorStoreRecordData] public string Description { get; set; } [VectorStoreRecordVector(4)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs index 15136f8d95e6..975b0bfbc87c 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs @@ -291,7 +291,7 @@ public record HotelInfo() public List Tags { get; set; } = new List(); /// A data field. - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + [VectorStoreRecordData] public string Description { get; set; } /// A vector field. @@ -314,7 +314,7 @@ public record HotelInfoWithGuidId() public string? HotelName { get; set; } /// A data field. - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + [VectorStoreRecordData] public string Description { get; set; } /// A vector field. diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs index f42370753f68..4256f9411636 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs @@ -230,7 +230,7 @@ public class Hotel [VectorStoreRecordData(IsFilterable = true)] public int HotelCode { get; init; } - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + [VectorStoreRecordData] public string Description { get; init; } [VectorStoreRecordVector(4)] @@ -278,7 +278,7 @@ public class BasicHotel [VectorStoreRecordData(IsFilterable = true)] public int HotelCode { get; init; } - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "DescriptionEmbedding")] + [VectorStoreRecordData] public string Description { get; init; } [VectorStoreRecordVector(4)] diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index 9048e4439344..1a6a9a99a419 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -217,8 +217,6 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT { definitionProperties.Add(new VectorStoreRecordDataProperty(dataProperty.Name) { - HasEmbedding = dataAttribute.HasEmbedding, - EmbeddingPropertyName = dataAttribute.EmbeddingPropertyName, IsFilterable = dataAttribute.IsFilterable, PropertyType = dataProperty.PropertyType, StoragePropertyName = dataAttribute.StoragePropertyName diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs index eb1ead024a2a..6c0f8c733645 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs @@ -16,17 +16,6 @@ namespace Microsoft.SemanticKernel.Data; [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] public sealed class VectorStoreRecordDataAttribute : Attribute { - /// - /// Gets or sets a value indicating whether this data field has an associated embedding field. - /// - /// Defaults to - public bool HasEmbedding { get; init; } - - /// - /// Gets or sets the name of the property that contains the embedding for this data field. - /// - public string? EmbeddingPropertyName { get; init; } - /// /// Gets or sets a value indicating whether this data property is filterable. /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs index eca2c72ced59..6db74c97cd41 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs @@ -25,23 +25,12 @@ public VectorStoreRecordDataProperty(string propertyName) /// /// The source to clone public VectorStoreRecordDataProperty(VectorStoreRecordDataProperty source) - : base(source.PropertyName) + : base(source) { - this.HasEmbedding = source.HasEmbedding; - this.EmbeddingPropertyName = source.EmbeddingPropertyName; + this.IsFilterable = source.IsFilterable; + this.PropertyType = source.PropertyType; } - /// - /// Gets or sets a value indicating whether this data property has an associated embedding property. - /// - /// Defaults to - public bool HasEmbedding { get; init; } - - /// - /// Gets or sets the name of the property that contains the embedding for this data property. - /// - public string? EmbeddingPropertyName { get; init; } - /// /// Gets or sets a value indicating whether this data property is filterable. /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs index 51fcf5a5af3d..080d7ceaa906 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs @@ -24,7 +24,7 @@ public VectorStoreRecordKeyProperty(string propertyName) /// /// The source to clone public VectorStoreRecordKeyProperty(VectorStoreRecordKeyProperty source) - : base(source.PropertyName) + : base(source) { } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs index fc500b1a2936..a74003cbf105 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs @@ -19,6 +19,12 @@ private protected VectorStoreRecordProperty(string propertyName) this.PropertyName = propertyName; } + private protected VectorStoreRecordProperty(VectorStoreRecordProperty source) + { + this.PropertyName = source.PropertyName; + this.StoragePropertyName = source.StoragePropertyName; + } + /// /// Gets or sets the name of the property. /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs index 9b973f9d3c3e..78966cf0df9f 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs @@ -24,8 +24,11 @@ public VectorStoreRecordVectorProperty(string propertyName) /// /// The source to clone public VectorStoreRecordVectorProperty(VectorStoreRecordVectorProperty source) - : base(source.PropertyName) + : base(source) { + this.Dimensions = source.Dimensions; + this.IndexKind = source.IndexKind; + this.DistanceFunction = source.DistanceFunction; } /// diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 7e4ca5519a1f..2a4aaf7d570a 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -168,11 +168,6 @@ public void CreateVectorStoreRecordDefinitionFromTypeConvertsAllProps() Assert.True(data1.IsFilterable); Assert.False(data2.IsFilterable); - Assert.True(data1.HasEmbedding); - Assert.False(data2.HasEmbedding); - - Assert.Equal("Vector1", data1.EmbeddingPropertyName); - Assert.Equal(typeof(string), data1.PropertyType); Assert.Equal(typeof(string), data2.PropertyType); @@ -333,7 +328,7 @@ private sealed class MultiPropsModel [VectorStoreRecordKey] public string Key { get; set; } = string.Empty; - [VectorStoreRecordData(HasEmbedding = true, EmbeddingPropertyName = "Vector1", IsFilterable = true)] + [VectorStoreRecordData(IsFilterable = true)] public string Data1 { get; set; } = string.Empty; [VectorStoreRecordData] @@ -354,7 +349,7 @@ private sealed class MultiPropsModel Properties = [ new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data1") { HasEmbedding = true, EmbeddingPropertyName = "Vector1", IsFilterable = true }, + new VectorStoreRecordDataProperty("Data1") { IsFilterable = true }, new VectorStoreRecordDataProperty("Data2") { StoragePropertyName = "data_2" }, new VectorStoreRecordVectorProperty("Vector1") { Dimensions = 4, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.DotProductSimilarity }, new VectorStoreRecordVectorProperty("Vector2") From 887fa0c5402f481d55934434589c6ea585e99a05 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:04:23 +0100 Subject: [PATCH 37/48] .Net: Add a few improvements to data ingestion sample. (#7524) ### Motivation and Context Addressing feedback on data ingestion sample from bugbash. See: [#7475](https://github.com/microsoft/semantic-kernel/issues/7475) ### Description Removing .Result that slipped in accidentally. Only starting docker containers if test requires it. Remove parallel foreach in favour of whenall. Add example without dependency injection. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../VectorStoreQdrantContainerFixture.cs | 15 +++- .../VectorStoreRedisContainerFixture.cs | 15 +++- .../Memory/VectorStore_DataIngestion.cs | 81 ++++++++++++++++--- 3 files changed, 93 insertions(+), 18 deletions(-) diff --git a/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs index 3f69902cbcc9..59b5449120e0 100644 --- a/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs +++ b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs @@ -14,10 +14,17 @@ public class VectorStoreQdrantContainerFixture : IAsyncLifetime public async Task InitializeAsync() { - // Connect to docker and start the docker container. - using var dockerClientConfiguration = new DockerClientConfiguration(); - this._dockerClient = dockerClientConfiguration.CreateClient(); - this._qdrantContainerId = await VectorStoreInfra.SetupQdrantContainerAsync(this._dockerClient); + } + + public async Task ManualInitializeAsync() + { + if (this._qdrantContainerId == null) + { + // Connect to docker and start the docker container. + using var dockerClientConfiguration = new DockerClientConfiguration(); + this._dockerClient = dockerClientConfiguration.CreateClient(); + this._qdrantContainerId = await VectorStoreInfra.SetupQdrantContainerAsync(this._dockerClient); + } } public async Task DisposeAsync() diff --git a/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreRedisContainerFixture.cs b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreRedisContainerFixture.cs index 2760e5aaabaa..eb35b7ff555f 100644 --- a/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreRedisContainerFixture.cs +++ b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreRedisContainerFixture.cs @@ -14,10 +14,17 @@ public class VectorStoreRedisContainerFixture : IAsyncLifetime public async Task InitializeAsync() { - // Connect to docker and start the docker container. - using var dockerClientConfiguration = new DockerClientConfiguration(); - this._dockerClient = dockerClientConfiguration.CreateClient(); - this._redisContainerId = await VectorStoreInfra.SetupRedisContainerAsync(this._dockerClient); + } + + public async Task ManualInitializeAsync() + { + if (this._redisContainerId == null) + { + // Connect to docker and start the docker container. + using var dockerClientConfiguration = new DockerClientConfiguration(); + this._dockerClient = dockerClientConfiguration.CreateClient(); + this._redisContainerId = await VectorStoreInfra.SetupRedisContainerAsync(this._dockerClient); + } } public async Task DisposeAsync() diff --git a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs index fc12d98dc75b..3dd33494431e 100644 --- a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs +++ b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs @@ -4,10 +4,13 @@ using Memory.VectorStoreFixtures; using Microsoft.Extensions.DependencyInjection; using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Connectors.Qdrant; using Microsoft.SemanticKernel.Connectors.Redis; using Microsoft.SemanticKernel.Data; using Microsoft.SemanticKernel.Embeddings; +using Qdrant.Client; +using StackExchange.Redis; namespace Memory; @@ -22,19 +25,20 @@ namespace Memory; /// 3. Ingest some data into the vector store. /// 4. Read the data back from the vector store. /// -/// To run this sample, you need a local instance of Docker running, since the associated fixtures will try and start Redis and Qdrant containers in the local docker instance. +/// For some databases in this sample (Redis & Qdrant), you need a local instance of Docker running, since the associated fixtures will try and start containers in the local docker instance to run against. /// -public class VectorStore_DataIngestion(ITestOutputHelper output) : BaseTest(output), IClassFixture, IClassFixture +[Collection("Sequential")] +public class VectorStore_DataIngestion(ITestOutputHelper output, VectorStoreRedisContainerFixture redisFixture, VectorStoreQdrantContainerFixture qdrantFixture) : BaseTest(output), IClassFixture, IClassFixture { /// - /// Main entry point for example. + /// Example with dependency injection. /// /// The type of database to run the example for. [Theory] [InlineData("Redis")] [InlineData("Qdrant")] [InlineData("Volatile")] - public async Task ExampleAsync(string databaseType) + public async Task ExampleWithDIAsync(string databaseType) { // Use the kernel for DI purposes. var kernelBuilder = Kernel @@ -46,13 +50,15 @@ public async Task ExampleAsync(string databaseType) endpoint: TestConfiguration.AzureOpenAIEmbeddings.Endpoint, apiKey: TestConfiguration.AzureOpenAIEmbeddings.ApiKey); - // Register the chosen vector store with the DI container. + // Register the chosen vector store with the DI container and initialize docker containers via the fixtures where needed. if (databaseType == "Redis") { + await redisFixture.ManualInitializeAsync(); kernelBuilder.AddRedisVectorStore("localhost:6379"); } else if (databaseType == "Qdrant") { + await qdrantFixture.ManualInitializeAsync(); kernelBuilder.AddQdrantVectorStore("localhost"); } else if (databaseType == "Volatile") @@ -81,13 +87,67 @@ public async Task ExampleAsync(string databaseType) } } + /// + /// Example without dependency injection. + /// + /// The type of database to run the example for. + [Theory] + [InlineData("Redis")] + [InlineData("Qdrant")] + [InlineData("Volatile")] + public async Task ExampleWithoutDIAsync(string databaseType) + { + // Create an embedding generation service. + var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( + TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, + TestConfiguration.AzureOpenAIEmbeddings.Endpoint, + TestConfiguration.AzureOpenAIEmbeddings.ApiKey); + + // Construct the chosen vector store and initialize docker containers via the fixtures where needed. + IVectorStore vectorStore; + if (databaseType == "Redis") + { + await redisFixture.ManualInitializeAsync(); + var database = ConnectionMultiplexer.Connect("localhost:6379").GetDatabase(); + vectorStore = new RedisVectorStore(database); + } + else if (databaseType == "Qdrant") + { + await qdrantFixture.ManualInitializeAsync(); + var qdrantClient = new QdrantClient("localhost"); + vectorStore = new QdrantVectorStore(qdrantClient); + } + else if (databaseType == "Volatile") + { + vectorStore = new VolatileVectorStore(); + } + else + { + throw new ArgumentException("Invalid database type."); + } + + // Create the DataIngestor. + var dataIngestor = new DataIngestor(vectorStore, textEmbeddingGenerationService); + + // Invoke the data ingestor using an appropriate key generator function for each database type. + // Redis and Volatile supports string keys, while Qdrant supports ulong or Guid keys, so we use a different key generator for each key type. + if (databaseType == "Redis" || databaseType == "Volatile") + { + await this.UpsertDataAndReadFromVectorStoreAsync(dataIngestor, () => Guid.NewGuid().ToString()); + } + else if (databaseType == "Qdrant") + { + await this.UpsertDataAndReadFromVectorStoreAsync(dataIngestor, () => Guid.NewGuid()); + } + } + private async Task UpsertDataAndReadFromVectorStoreAsync(DataIngestor dataIngestor, Func uniqueKeyGenerator) { // Ingest some data into the vector store. - var upsertedKeys = dataIngestor.ImportDataAsync(uniqueKeyGenerator).Result; + var upsertedKeys = await dataIngestor.ImportDataAsync(uniqueKeyGenerator); // Get one of the upserted records. - var upsertedRecord = dataIngestor.GetGlossaryAsync(upsertedKeys.First()).Result; + var upsertedRecord = await dataIngestor.GetGlossaryAsync(upsertedKeys.First()); // Write upserted keys and one of the upserted records to the console. Console.WriteLine($"Upserted keys: {string.Join(", ", upsertedKeys)}"); @@ -114,10 +174,11 @@ public async Task> ImportDataAsync(Func uniqueKeyG // Create glossary entries and generate embeddings for them. var glossaryEntries = CreateGlossaryEntries(uniqueKeyGenerator).ToList(); - await Parallel.ForEachAsync(glossaryEntries, async (entry, cancellationToken) => + var tasks = glossaryEntries.Select(entry => Task.Run(async () => { - entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition, cancellationToken: cancellationToken); - }); + entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition); + })); + await Task.WhenAll(tasks); // Upsert the glossary entries into the collection and return their keys. var upsertedKeys = glossaryEntries.Select(x => collection.UpsertAsync(x)); From ef9bc1b282b98c57c8f09b0363d9585e740ee13b Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:08:33 +0100 Subject: [PATCH 38/48] .Net: Fix bug where JsonSerializerOptions is not consistently used. (#7533) ### Motivation and Context Both Redis and AzureAISearch uses Json as its serialization mechanism, however in various places, the field names in storage need to be used by the rest of the code. E.g. to determine the names of properties when creating an index ### Description - Adding tests to ensure all scenarios are covered - Fix bugs where there are gaps ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...ISearchVectorStoreRecordCollectionTests.cs | 116 +++++++----- ...zureAISearchVectorStoreRecordCollection.cs | 4 +- .../RedisJsonVectorStoreRecordCollection.cs | 2 +- .../RedisJsonVectorStoreRecordMapper.cs | 13 +- ...disJsonVectorStoreRecordCollectionTests.cs | 178 +++++++++++------- .../RedisJsonVectorStoreRecordMapperTests.cs | 63 +++++-- 6 files changed, 242 insertions(+), 134 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs index 585b38de81a7..689461890df2 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -5,6 +5,7 @@ using System.Linq; using System.Text.Json; using System.Text.Json.Nodes; +using System.Text.Json.Serialization; using System.Threading; using System.Threading.Tasks; using Azure; @@ -61,7 +62,7 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN .ThrowsAsync(new RequestFailedException(404, "Index not found")); } - var sut = new AzureAISearchVectorStoreRecordCollection(this._searchIndexClientMock.Object, collectionName); + var sut = new AzureAISearchVectorStoreRecordCollection(this._searchIndexClientMock.Object, collectionName); // Act. var actual = await sut.CollectionExistsAsync(this._testCancellationToken); @@ -71,25 +72,28 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CreateCollectionCallsSDKAsync(bool useDefinition) + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CreateCollectionCallsSDKAsync(bool useDefinition, bool useCustomJsonSerializerOptions) { // Arrange. this._searchIndexClientMock .Setup(x => x.CreateIndexAsync(It.IsAny(), this._testCancellationToken)) .ReturnsAsync(Response.FromValue(new SearchIndex(TestCollectionName), Mock.Of())); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition, useCustomJsonSerializerOptions); // Act. await sut.CreateCollectionAsync(); // Assert. + var expectedFieldNames = useCustomJsonSerializerOptions ? new[] { "key", "storage_data1", "data2", "storage_vector1", "vector2" } : new[] { "Key", "storage_data1", "Data2", "storage_vector1", "Vector2" }; this._searchIndexClientMock .Verify( x => x.CreateIndexAsync( - It.Is(si => si.Fields.Count == 3 && si.Name == TestCollectionName && si.VectorSearch.Profiles.Count == 1 && si.VectorSearch.Algorithms.Count == 1), + It.Is(si => si.Fields.Count == 5 && si.Fields.Select(f => f.Name).SequenceEqual(expectedFieldNames) && si.Name == TestCollectionName && si.VectorSearch.Profiles.Count == 2 && si.VectorSearch.Algorithms.Count == 2), this._testCancellationToken), Times.Once); } @@ -139,7 +143,7 @@ public async Task CreateCollectionIfNotExistsSDKAsync(bool useDefinition, bool e this._searchIndexClientMock .Verify( x => x.CreateIndexAsync( - It.Is(si => si.Fields.Count == 3 && si.Name == TestCollectionName && si.VectorSearch.Profiles.Count == 1 && si.VectorSearch.Algorithms.Count == 1), + It.Is(si => si.Fields.Count == 5 && si.Name == TestCollectionName && si.VectorSearch.Profiles.Count == 2 && si.VectorSearch.Algorithms.Count == 2), this._testCancellationToken), Times.Once); } @@ -169,7 +173,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) { // Arrange. this._searchClientMock.Setup( - x => x.GetDocumentAsync( + x => x.GetDocumentAsync( TestRecordKey1, It.Is(x => !x.SelectedFields.Any()), this._testCancellationToken)) @@ -186,26 +190,31 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) // Assert. Assert.NotNull(actual); Assert.Equal(TestRecordKey1, actual.Key); - Assert.Equal("data 1", actual.Data); - Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + Assert.Equal("data 1", actual.Data1); + Assert.Equal("data 2", actual.Data2); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector1!.Value.ToArray()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector2!.Value.ToArray()); } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool useCustomJsonSerializerOptions) { // Arrange. var storageObject = JsonSerializer.SerializeToNode(CreateModel(TestRecordKey1, false))!.AsObject(); + var expectedSelectFields = useCustomJsonSerializerOptions ? new[] { "key", "storage_data1", "data2" } : new[] { "Key", "storage_data1", "Data2" }; this._searchClientMock.Setup( - x => x.GetDocumentAsync( + x => x.GetDocumentAsync( TestRecordKey1, - It.Is(x => x.SelectedFields.Contains("Key") && x.SelectedFields.Contains("Data")), + It.Is(x => x.SelectedFields.SequenceEqual(expectedSelectFields)), this._testCancellationToken)) .ReturnsAsync(Response.FromValue(CreateModel(TestRecordKey1, true), Mock.Of())); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition, useCustomJsonSerializerOptions); // Act. var actual = await sut.GetAsync( @@ -216,7 +225,8 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) // Assert. Assert.NotNull(actual); Assert.Equal(TestRecordKey1, actual.Key); - Assert.Equal("data 1", actual.Data); + Assert.Equal("data 1", actual.Data1); + Assert.Equal("data 2", actual.Data2); } [Theory] @@ -226,7 +236,7 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange. this._searchClientMock.Setup( - x => x.GetDocumentAsync( + x => x.GetDocumentAsync( It.IsAny(), It.IsAny(), this._testCancellationToken)) @@ -265,7 +275,7 @@ public async Task CanGetRecordWithCustomMapperAsync() .ReturnsAsync(Response.FromValue(storageObject, Mock.Of())); // Arrange mapper mock from JsonObject to data model. - var mapperMock = new Mock>(MockBehavior.Strict); + var mapperMock = new Mock>(MockBehavior.Strict); mapperMock.Setup( x => x.MapFromStorageToDataModel( storageObject, @@ -273,7 +283,7 @@ public async Task CanGetRecordWithCustomMapperAsync() .Returns(CreateModel(TestRecordKey1, true)); // Arrange target with custom mapper. - var sut = new AzureAISearchVectorStoreRecordCollection( + var sut = new AzureAISearchVectorStoreRecordCollection( this._searchIndexClientMock.Object, TestCollectionName, new() @@ -287,8 +297,10 @@ public async Task CanGetRecordWithCustomMapperAsync() // Assert. Assert.NotNull(actual); Assert.Equal(TestRecordKey1, actual.Key); - Assert.Equal("data 1", actual.Data); - Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + Assert.Equal("data 1", actual.Data1); + Assert.Equal("data 2", actual.Data2); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector1!.Value.ToArray()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector2!.Value.ToArray()); } [Theory] @@ -377,7 +389,7 @@ public async Task CanUpsertRecordAsync(bool useDefinition) // Arrange upload. this._searchClientMock.Setup( x => x.UploadDocumentsAsync( - It.IsAny>(), + It.IsAny>(), It.IsAny(), this._testCancellationToken)) .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); @@ -397,7 +409,7 @@ public async Task CanUpsertRecordAsync(bool useDefinition) Assert.Equal(TestRecordKey1, actual); this._searchClientMock.Verify( x => x.UploadDocumentsAsync( - It.Is>(x => x.Count() == 1 && x.First().Key == TestRecordKey1), + It.Is>(x => x.Count() == 1 && x.First().Key == TestRecordKey1), It.Is(x => x.ThrowOnAnyError == true), this._testCancellationToken), Times.Once); @@ -422,7 +434,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) // Arrange upload. this._searchClientMock.Setup( x => x.UploadDocumentsAsync( - It.IsAny>(), + It.IsAny>(), It.IsAny(), this._testCancellationToken)) .ReturnsAsync(Response.FromValue(indexDocumentsResultMock.Object, Mock.Of())); @@ -446,7 +458,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) this._searchClientMock.Verify( x => x.UploadDocumentsAsync( - It.Is>(x => x.Count() == 2 && x.First().Key == TestRecordKey1 && x.ElementAt(1).Key == TestRecordKey2), + It.Is>(x => x.Count() == 2 && x.First().Key == TestRecordKey1 && x.ElementAt(1).Key == TestRecordKey2), It.Is(x => x.ThrowOnAnyError == true), this._testCancellationToken), Times.Once); @@ -480,13 +492,13 @@ public async Task CanUpsertRecordWithCustomMapperAsync() }); // Arrange mapper mock from data model to JsonObject. - var mapperMock = new Mock>(MockBehavior.Strict); + var mapperMock = new Mock>(MockBehavior.Strict); mapperMock - .Setup(x => x.MapFromDataToStorageModel(It.IsAny())) + .Setup(x => x.MapFromDataToStorageModel(It.IsAny())) .Returns(storageObject); // Arrange target with custom mapper. - var sut = new AzureAISearchVectorStoreRecordCollection( + var sut = new AzureAISearchVectorStoreRecordCollection( this._searchIndexClientMock.Object, TestCollectionName, new() @@ -503,52 +515,70 @@ await sut.UpsertAsync( // Assert. mapperMock .Verify( - x => x.MapFromDataToStorageModel(It.Is(x => x.Key == TestRecordKey1)), + x => x.MapFromDataToStorageModel(It.Is(x => x.Key == TestRecordKey1)), Times.Once); } - private AzureAISearchVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) + private AzureAISearchVectorStoreRecordCollection CreateRecordCollection(bool useDefinition, bool useCustomJsonSerializerOptions = false) { - return new AzureAISearchVectorStoreRecordCollection( + return new AzureAISearchVectorStoreRecordCollection( this._searchIndexClientMock.Object, TestCollectionName, new() { - VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null + VectorStoreRecordDefinition = useDefinition ? this._multiPropsDefinition : null, + JsonSerializerOptions = useCustomJsonSerializerOptions ? this._customJsonSerializerOptions : null }); } - private static SinglePropsModel CreateModel(string key, bool withVectors) + private static MultiPropsModel CreateModel(string key, bool withVectors) { - return new SinglePropsModel + return new MultiPropsModel { Key = key, - Data = "data 1", - Vector = withVectors ? new float[] { 1, 2, 3, 4 } : null, + Data1 = "data 1", + Data2 = "data 2", + Vector1 = withVectors ? new float[] { 1, 2, 3, 4 } : null, + Vector2 = withVectors ? new float[] { 1, 2, 3, 4 } : null, NotAnnotated = null, }; } - private readonly VectorStoreRecordDefinition _singlePropsDefinition = new() + private readonly JsonSerializerOptions _customJsonSerializerOptions = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase + }; + + private readonly VectorStoreRecordDefinition _multiPropsDefinition = new() { Properties = [ new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data") { PropertyType = typeof(string) }, - new VectorStoreRecordVectorProperty("Vector") { Dimensions = 4 } + new VectorStoreRecordDataProperty("Data1") { PropertyType = typeof(string) }, + new VectorStoreRecordDataProperty("Data2") { PropertyType = typeof(string) }, + new VectorStoreRecordVectorProperty("Vector1") { Dimensions = 4 }, + new VectorStoreRecordVectorProperty("Vector2") { Dimensions = 4 } ] }; - public sealed class SinglePropsModel + public sealed class MultiPropsModel { [VectorStoreRecordKey] public string Key { get; set; } = string.Empty; + [JsonPropertyName("storage_data1")] + [VectorStoreRecordData] + public string Data1 { get; set; } = string.Empty; + [VectorStoreRecordData] - public string Data { get; set; } = string.Empty; + public string Data2 { get; set; } = string.Empty; + + [JsonPropertyName("storage_vector1")] + [VectorStoreRecordVector(4)] + public ReadOnlyMemory? Vector1 { get; set; } [VectorStoreRecordVector(4)] - public ReadOnlyMemory? Vector { get; set; } + public ReadOnlyMemory? Vector2 { get; set; } public string? NotAnnotated { get; set; } } diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index f096db6918ed..c3db9340fc8c 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -135,7 +135,7 @@ public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexCli // Get storage names for data properties and store for later use. foreach (var property in properties.dataProperties) { - var jsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, property); + var jsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, property); this._storagePropertyNames[property.Name] = jsonPropertyName; this._nonVectorStoragePropertyNames.Add(jsonPropertyName); } @@ -143,7 +143,7 @@ public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexCli // Get storage names for vector properties and store for later use. foreach (var property in properties.vectorProperties) { - var jsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, property); + var jsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, property); this._storagePropertyNames[property.Name] = jsonPropertyName; } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs index 009e983cdc5d..ccb2d8467215 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs @@ -135,7 +135,7 @@ public RedisJsonVectorStoreRecordCollection(IDatabase database, string collectio } else { - this._mapper = new RedisJsonVectorStoreRecordMapper(this._keyJsonPropertyName); + this._mapper = new RedisJsonVectorStoreRecordMapper(this._keyJsonPropertyName, this._jsonSerializerOptions); } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordMapper.cs index cb48b61902bc..3237c50c992e 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordMapper.cs @@ -16,14 +16,21 @@ internal sealed class RedisJsonVectorStoreRecordMapper : IVe /// The name of the temporary json property that the key field will be serialized / parsed from. private readonly string _keyFieldJsonPropertyName; + /// The JSON serializer options to use when converting between the data model and the Redis record. + private readonly JsonSerializerOptions _jsonSerializerOptions; + /// /// Initializes a new instance of the class. /// /// The name of the key field on the model when serialized to json. - public RedisJsonVectorStoreRecordMapper(string keyFieldJsonPropertyName) + /// The JSON serializer options to use when converting between the data model and the Redis record. + public RedisJsonVectorStoreRecordMapper(string keyFieldJsonPropertyName, JsonSerializerOptions jsonSerializerOptions) { Verify.NotNullOrWhiteSpace(keyFieldJsonPropertyName); + Verify.NotNull(jsonSerializerOptions); + this._keyFieldJsonPropertyName = keyFieldJsonPropertyName; + this._jsonSerializerOptions = jsonSerializerOptions; } /// @@ -32,7 +39,7 @@ public RedisJsonVectorStoreRecordMapper(string keyFieldJsonPropertyName) // Convert the provided record into a JsonNode object and try to get the key field for it. // Since we already checked that the key field is a string in the constructor, and that it exists on the model, // the only edge case we have to be concerned about is if the key field is null. - var jsonNode = JsonSerializer.SerializeToNode(dataModel); + var jsonNode = JsonSerializer.SerializeToNode(dataModel, this._jsonSerializerOptions); if (jsonNode!.AsObject().TryGetPropertyValue(this._keyFieldJsonPropertyName, out var keyField) && keyField is JsonValue jsonValue) { // Remove the key field from the JSON object since we don't want to store it in the redis payload. @@ -73,6 +80,6 @@ public TConsumerDataModel MapFromStorageToDataModel((string Key, JsonNode Node) // Since the key is not stored in the redis value, add it back in before deserializing into the data model. jsonObject.Add(this._keyFieldJsonPropertyName, storageModel.Key); - return JsonSerializer.Deserialize(jsonObject)!; + return JsonSerializer.Deserialize(jsonObject, this._jsonSerializerOptions)!; } } diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs index 1a621975de01..201a7c3fdd02 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text.Json; using System.Text.Json.Nodes; using System.Text.Json.Serialization; using System.Threading.Tasks; @@ -47,7 +48,7 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN { SetupExecuteMock(this._redisDatabaseMock, new RedisServerException("Unknown index name")); } - var sut = new RedisJsonVectorStoreRecordCollection( + var sut = new RedisJsonVectorStoreRecordCollection( this._redisDatabaseMock.Object, collectionName); @@ -65,12 +66,16 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN Assert.Equal(expectedExists, actual); } - [Fact] - public async Task CanCreateCollectionAsync() + [Theory] + [InlineData(true, true, "data2", "vector2")] + [InlineData(true, false, "Data2", "Vector2")] + [InlineData(false, true, "data2", "vector2")] + [InlineData(false, false, "Data2", "Vector2")] + public async Task CanCreateCollectionAsync(bool useDefinition, bool useCustomJsonSerializerOptions, string expectedData2Name, string expectedVector2Name) { // Arrange. SetupExecuteMock(this._redisDatabaseMock, string.Empty); - var sut = new RedisJsonVectorStoreRecordCollection(this._redisDatabaseMock.Object, TestCollectionName); + var sut = this.CreateRecordCollection(useDefinition, useCustomJsonSerializerOptions); // Act. await sut.CreateCollectionAsync(); @@ -84,17 +89,29 @@ public async Task CanCreateCollectionAsync() 1, "testcollection:", "SCHEMA", - "$.OriginalNameData", + "$.data1_json_name", "AS", - "OriginalNameData", + "data1_json_name", "TEXT", - "$.data_json_name", + $"$.{expectedData2Name}", "AS", - "data_json_name", + expectedData2Name, "TEXT", - "$.vector_json_name", + "$.vector1_json_name", + "AS", + "vector1_json_name", + "VECTOR", + "HNSW", + 6, + "TYPE", + "FLOAT32", + "DIM", + "4", + "DISTANCE_METRIC", + "COSINE", + $"$.{expectedVector2Name}", "AS", - "vector_json_name", + expectedVector2Name, "VECTOR", "HNSW", 6, @@ -133,14 +150,15 @@ public async Task CanDeleteCollectionAsync() } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanGetRecordWithVectorsAsync(bool useDefinition) + [InlineData(true, true, """{ "data1_json_name": "data 1", "data2": "data 2", "vector1_json_name": [1, 2, 3, 4], "vector2": [1, 2, 3, 4] }""")] + [InlineData(true, false, """{ "data1_json_name": "data 1", "Data2": "data 2", "vector1_json_name": [1, 2, 3, 4], "Vector2": [1, 2, 3, 4] }""")] + [InlineData(false, true, """{ "data1_json_name": "data 1", "data2": "data 2", "vector1_json_name": [1, 2, 3, 4], "vector2": [1, 2, 3, 4] }""")] + [InlineData(false, false, """{ "data1_json_name": "data 1", "Data2": "data 2", "vector1_json_name": [1, 2, 3, 4], "Vector2": [1, 2, 3, 4] }""")] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool useCustomJsonSerializerOptions, string redisResultString) { // Arrange - var redisResultString = """{ "OriginalNameData": "data 1", "data_json_name": "data 1", "vector_json_name": [1, 2, 3, 4] }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition, useCustomJsonSerializerOptions); // Act var actual = await sut.GetAsync( @@ -158,20 +176,22 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) Assert.NotNull(actual); Assert.Equal(TestRecordKey1, actual.Key); - Assert.Equal("data 1", actual.OriginalNameData); - Assert.Equal("data 1", actual.Data); - Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + Assert.Equal("data 1", actual.Data1); + Assert.Equal("data 2", actual.Data2); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector1!.Value.ToArray()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector2!.Value.ToArray()); } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) + [InlineData(true, true, """{ "data1_json_name": "data 1", "data2": "data 2" }""", "data2")] + [InlineData(true, false, """{ "data1_json_name": "data 1", "Data2": "data 2" }""", "Data2")] + [InlineData(false, true, """{ "data1_json_name": "data 1", "data2": "data 2" }""", "data2")] + [InlineData(false, false, """{ "data1_json_name": "data 1", "Data2": "data 2" }""", "Data2")] + public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool useCustomJsonSerializerOptions, string redisResultString, string expectedData2Name) { // Arrange - var redisResultString = """{ "OriginalNameData": "data 1", "data_json_name": "data 1" }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition, useCustomJsonSerializerOptions); // Act var actual = await sut.GetAsync( @@ -179,7 +199,7 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) new() { IncludeVectors = false }); // Assert - var expectedArgs = new object[] { TestRecordKey1, "OriginalNameData", "data_json_name" }; + var expectedArgs = new object[] { TestRecordKey1, "data1_json_name", expectedData2Name }; this._redisDatabaseMock .Verify( x => x.ExecuteAsync( @@ -189,9 +209,9 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) Assert.NotNull(actual); Assert.Equal(TestRecordKey1, actual.Key); - Assert.Equal("data 1", actual.OriginalNameData); - Assert.Equal("data 1", actual.Data); - Assert.False(actual.Vector.HasValue); + Assert.Equal("data 1", actual.Data1); + Assert.Equal("data 2", actual.Data2); + Assert.False(actual.Vector1.HasValue); } [Theory] @@ -200,8 +220,8 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition) public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) { // Arrange - var redisResultString1 = """{ "OriginalNameData": "data 1", "data_json_name": "data 1", "vector_json_name": [1, 2, 3, 4] }"""; - var redisResultString2 = """{ "OriginalNameData": "data 2", "data_json_name": "data 2", "vector_json_name": [5, 6, 7, 8] }"""; + var redisResultString1 = """{ "data1_json_name": "data 1", "Data2": "data 2", "vector1_json_name": [1, 2, 3, 4], "Vector2": [1, 2, 3, 4] }"""; + var redisResultString2 = """{ "data1_json_name": "data 1", "Data2": "data 2", "vector1_json_name": [5, 6, 7, 8], "Vector2": [1, 2, 3, 4] }"""; SetupExecuteMock(this._redisDatabaseMock, [redisResultString1, redisResultString2]); var sut = this.CreateRecordCollection(useDefinition); @@ -222,24 +242,24 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) Assert.NotNull(actual); Assert.Equal(2, actual.Count); Assert.Equal(TestRecordKey1, actual[0].Key); - Assert.Equal("data 1", actual[0].OriginalNameData); - Assert.Equal("data 1", actual[0].Data); - Assert.Equal(new float[] { 1, 2, 3, 4 }, actual[0].Vector!.Value.ToArray()); + Assert.Equal("data 1", actual[0].Data1); + Assert.Equal("data 2", actual[0].Data2); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual[0].Vector1!.Value.ToArray()); Assert.Equal(TestRecordKey2, actual[1].Key); - Assert.Equal("data 2", actual[1].OriginalNameData); - Assert.Equal("data 2", actual[1].Data); - Assert.Equal(new float[] { 5, 6, 7, 8 }, actual[1].Vector!.Value.ToArray()); + Assert.Equal("data 1", actual[1].Data1); + Assert.Equal("data 2", actual[1].Data2); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual[1].Vector1!.Value.ToArray()); } [Fact] public async Task CanGetRecordWithCustomMapperAsync() { // Arrange. - var redisResultString = """{ "OriginalNameData": "data 1", "data_json_name": "data 1", "vector_json_name": [1, 2, 3, 4] }"""; + var redisResultString = """{ "data1_json_name": "data 1", "Data2": "data 2", "vector1_json_name": [1, 2, 3, 4], "Vector2": [1, 2, 3, 4] }"""; SetupExecuteMock(this._redisDatabaseMock, redisResultString); // Arrange mapper mock from JsonNode to data model. - var mapperMock = new Mock>(MockBehavior.Strict); + var mapperMock = new Mock>(MockBehavior.Strict); mapperMock.Setup( x => x.MapFromStorageToDataModel( It.IsAny<(string key, JsonNode node)>(), @@ -247,7 +267,7 @@ public async Task CanGetRecordWithCustomMapperAsync() .Returns(CreateModel(TestRecordKey1, true)); // Arrange target with custom mapper. - var sut = new RedisJsonVectorStoreRecordCollection( + var sut = new RedisJsonVectorStoreRecordCollection( this._redisDatabaseMock.Object, TestCollectionName, new() @@ -263,9 +283,10 @@ public async Task CanGetRecordWithCustomMapperAsync() // Assert Assert.NotNull(actual); Assert.Equal(TestRecordKey1, actual.Key); - Assert.Equal("data 1", actual.OriginalNameData); - Assert.Equal("data 1", actual.Data); - Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); + Assert.Equal("data 1", actual.Data1); + Assert.Equal("data 2", actual.Data2); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector1!.Value.ToArray()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector2!.Value.ToArray()); mapperMock .Verify( @@ -327,13 +348,15 @@ public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanUpsertRecordAsync(bool useDefinition) + [InlineData(true, true, """{"data1_json_name":"data 1","data2":"data 2","vector1_json_name":[1,2,3,4],"vector2":[1,2,3,4],"notAnnotated":null}""")] + [InlineData(true, false, """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4],"NotAnnotated":null}""")] + [InlineData(false, true, """{"data1_json_name":"data 1","data2":"data 2","vector1_json_name":[1,2,3,4],"vector2":[1,2,3,4],"notAnnotated":null}""")] + [InlineData(false, false, """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4],"NotAnnotated":null}""")] + public async Task CanUpsertRecordAsync(bool useDefinition, bool useCustomJsonSerializerOptions, string expectedUpsertedJson) { // Arrange SetupExecuteMock(this._redisDatabaseMock, "OK"); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition, useCustomJsonSerializerOptions); var model = CreateModel(TestRecordKey1, true); // Act @@ -341,7 +364,7 @@ public async Task CanUpsertRecordAsync(bool useDefinition) // Assert // TODO: Fix issue where NotAnnotated is being included in the JSON. - var expectedArgs = new object[] { TestRecordKey1, "$", """{"OriginalNameData":"data 1","data_json_name":"data 1","vector_json_name":[1,2,3,4],"NotAnnotated":null}""" }; + var expectedArgs = new object[] { TestRecordKey1, "$", expectedUpsertedJson }; this._redisDatabaseMock .Verify( x => x.ExecuteAsync( @@ -372,7 +395,7 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) Assert.Equal(TestRecordKey2, actual[1]); // TODO: Fix issue where NotAnnotated is being included in the JSON. - var expectedArgs = new object[] { TestRecordKey1, "$", """{"OriginalNameData":"data 1","data_json_name":"data 1","vector_json_name":[1,2,3,4],"NotAnnotated":null}""", TestRecordKey2, "$", """{"OriginalNameData":"data 1","data_json_name":"data 1","vector_json_name":[1,2,3,4],"NotAnnotated":null}""" }; + var expectedArgs = new object[] { TestRecordKey1, "$", """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4],"NotAnnotated":null}""", TestRecordKey2, "$", """{"data1_json_name":"data 1","Data2":"data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4],"NotAnnotated":null}""" }; this._redisDatabaseMock .Verify( x => x.ExecuteAsync( @@ -388,14 +411,14 @@ public async Task CanUpsertRecordWithCustomMapperAsync() SetupExecuteMock(this._redisDatabaseMock, "OK"); // Arrange mapper mock from data model to JsonNode. - var mapperMock = new Mock>(MockBehavior.Strict); - var jsonNode = """{"OriginalNameData": "data 1", "data_json_name":"data 1","vector_json_name":[1,2,3,4],"NotAnnotated":null}"""; + var mapperMock = new Mock>(MockBehavior.Strict); + var jsonNode = """{"data1_json_name":"data 1","Data2": "data 2","vector1_json_name":[1,2,3,4],"Vector2":[1,2,3,4],"NotAnnotated":null}"""; mapperMock - .Setup(x => x.MapFromDataToStorageModel(It.IsAny())) + .Setup(x => x.MapFromDataToStorageModel(It.IsAny())) .Returns((TestRecordKey1, JsonNode.Parse(jsonNode)!)); // Arrange target with custom mapper. - var sut = new RedisJsonVectorStoreRecordCollection( + var sut = new RedisJsonVectorStoreRecordCollection( this._redisDatabaseMock.Object, TestCollectionName, new() @@ -411,19 +434,20 @@ public async Task CanUpsertRecordWithCustomMapperAsync() // Assert mapperMock .Verify( - x => x.MapFromDataToStorageModel(It.Is(x => x == model)), + x => x.MapFromDataToStorageModel(It.Is(x => x == model)), Times.Once); } - private RedisJsonVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) + private RedisJsonVectorStoreRecordCollection CreateRecordCollection(bool useDefinition, bool useCustomJsonSerializerOptions = false) { - return new RedisJsonVectorStoreRecordCollection( + return new RedisJsonVectorStoreRecordCollection( this._redisDatabaseMock.Object, TestCollectionName, new() { PrefixCollectionNameToKeyNames = false, - VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null + VectorStoreRecordDefinition = useDefinition ? this._multiPropsDefinition : null, + JsonSerializerOptions = useCustomJsonSerializerOptions ? this._customJsonSerializerOptions : null }); } @@ -464,44 +488,54 @@ private static void SetupExecuteMock(Mock redisDatabaseMock, string r .ReturnsAsync(RedisResult.Create(new RedisValue(redisResultString))); } - private static SinglePropsModel CreateModel(string key, bool withVectors) + private static MultiPropsModel CreateModel(string key, bool withVectors) { - return new SinglePropsModel + return new MultiPropsModel { Key = key, - OriginalNameData = "data 1", - Data = "data 1", - Vector = withVectors ? new float[] { 1, 2, 3, 4 } : null, + Data1 = "data 1", + Data2 = "data 2", + Vector1 = withVectors ? new float[] { 1, 2, 3, 4 } : null, + Vector2 = withVectors ? new float[] { 1, 2, 3, 4 } : null, NotAnnotated = null, }; } - private readonly VectorStoreRecordDefinition _singlePropsDefinition = new() + private readonly JsonSerializerOptions _customJsonSerializerOptions = new() + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase + }; + + private readonly VectorStoreRecordDefinition _multiPropsDefinition = new() { Properties = [ new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("OriginalNameData"), - new VectorStoreRecordDataProperty("Data") { StoragePropertyName = "ignored_data_storage_name" }, - new VectorStoreRecordVectorProperty("Vector") + new VectorStoreRecordDataProperty("Data1") { IsFilterable = true, PropertyType = typeof(string), StoragePropertyName = "ignored_data1_storage_name" }, + new VectorStoreRecordDataProperty("Data2") { IsFilterable = true, PropertyType = typeof(string) }, + new VectorStoreRecordVectorProperty("Vector1") { Dimensions = 4, StoragePropertyName = "ignored_vector1_storage_name" }, + new VectorStoreRecordVectorProperty("Vector2") { Dimensions = 4 } ] }; - public sealed class SinglePropsModel + public sealed class MultiPropsModel { [VectorStoreRecordKey] public string Key { get; set; } = string.Empty; + [JsonPropertyName("data1_json_name")] + [VectorStoreRecordData(IsFilterable = true, StoragePropertyName = "ignored_data1_storage_name")] + public string Data1 { get; set; } = string.Empty; + [VectorStoreRecordData(IsFilterable = true)] - public string OriginalNameData { get; set; } = string.Empty; + public string Data2 { get; set; } = string.Empty; - [JsonPropertyName("data_json_name")] - [VectorStoreRecordData(IsFilterable = true, StoragePropertyName = "ignored_data_storage_name")] - public string Data { get; set; } = string.Empty; + [JsonPropertyName("vector1_json_name")] + [VectorStoreRecordVector(4, StoragePropertyName = "ignored_vector1_storage_name")] + public ReadOnlyMemory? Vector1 { get; set; } - [JsonPropertyName("vector_json_name")] - [VectorStoreRecordVector(4, StoragePropertyName = "ignored_vector_storage_name")] - public ReadOnlyMemory? Vector { get; set; } + [VectorStoreRecordVector(4)] + public ReadOnlyMemory? Vector2 { get; set; } public string? NotAnnotated { get; set; } } diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs index 4fbb089555f1..a7ae97c06355 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordMapperTests.cs @@ -2,6 +2,7 @@ using System; using System.Linq; +using System.Text.Json; using System.Text.Json.Nodes; using Microsoft.SemanticKernel.Connectors.Redis; using Microsoft.SemanticKernel.Data; @@ -18,7 +19,7 @@ public sealed class RedisJsonVectorStoreRecordMapperTests public void MapsAllFieldsFromDataToStorageModel() { // Arrange. - var sut = new RedisJsonVectorStoreRecordMapper("Key"); + var sut = new RedisJsonVectorStoreRecordMapper("Key", JsonSerializerOptions.Default); // Act. var actual = sut.MapFromDataToStorageModel(CreateModel("test key")); @@ -33,21 +34,67 @@ public void MapsAllFieldsFromDataToStorageModel() Assert.Equal(new float[] { 5, 6, 7, 8 }, jsonObject?["Vector2"]?.AsArray().GetValues().ToArray()); } + [Fact] + public void MapsAllFieldsFromDataToStorageModelWithCustomSerializerOptions() + { + // Arrange. + var sut = new RedisJsonVectorStoreRecordMapper("key", new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }); + + // Act. + var actual = sut.MapFromDataToStorageModel(CreateModel("test key")); + + // Assert. + Assert.NotNull(actual.Node); + Assert.Equal("test key", actual.Key); + var jsonObject = actual.Node.AsObject(); + Assert.Equal("data 1", jsonObject?["data1"]?.ToString()); + Assert.Equal("data 2", jsonObject?["data2"]?.ToString()); + Assert.Equal(new float[] { 1, 2, 3, 4 }, jsonObject?["vector1"]?.AsArray().GetValues().ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, jsonObject?["vector2"]?.AsArray().GetValues().ToArray()); + } + [Fact] public void MapsAllFieldsFromStorageToDataModel() { // Arrange. - var sut = new RedisJsonVectorStoreRecordMapper("Key"); + var sut = new RedisJsonVectorStoreRecordMapper("Key", JsonSerializerOptions.Default); // Act. - var actual = sut.MapFromStorageToDataModel(("test key", CreateJsonNode()), new()); + var jsonObject = new JsonObject(); + jsonObject.Add("Data1", "data 1"); + jsonObject.Add("Data2", "data 2"); + jsonObject.Add("Vector1", new JsonArray(new[] { 1, 2, 3, 4 }.Select(x => JsonValue.Create(x)).ToArray())); + jsonObject.Add("Vector2", new JsonArray(new[] { 5, 6, 7, 8 }.Select(x => JsonValue.Create(x)).ToArray())); + var actual = sut.MapFromStorageToDataModel(("test key", jsonObject), new()); // Assert. Assert.NotNull(actual); Assert.Equal("test key", actual.Key); Assert.Equal("data 1", actual.Data1); Assert.Equal("data 2", actual.Data2); + Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector1!.Value.ToArray()); + Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vector2!.Value.ToArray()); + } + [Fact] + public void MapsAllFieldsFromStorageToDataModelWithCustomSerializerOptions() + { + // Arrange. + var sut = new RedisJsonVectorStoreRecordMapper("key", new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }); + + // Act. + var jsonObject = new JsonObject(); + jsonObject.Add("data1", "data 1"); + jsonObject.Add("data2", "data 2"); + jsonObject.Add("vector1", new JsonArray(new[] { 1, 2, 3, 4 }.Select(x => JsonValue.Create(x)).ToArray())); + jsonObject.Add("vector2", new JsonArray(new[] { 5, 6, 7, 8 }.Select(x => JsonValue.Create(x)).ToArray())); + var actual = sut.MapFromStorageToDataModel(("test key", jsonObject), new()); + + // Assert. + Assert.NotNull(actual); + Assert.Equal("test key", actual.Key); + Assert.Equal("data 1", actual.Data1); + Assert.Equal("data 2", actual.Data2); Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector1!.Value.ToArray()); Assert.Equal(new float[] { 5, 6, 7, 8 }, actual.Vector2!.Value.ToArray()); } @@ -65,16 +112,6 @@ private static MultiPropsModel CreateModel(string key) }; } - private static JsonObject CreateJsonNode() - { - var jsonObject = new JsonObject(); - jsonObject.Add("Data1", "data 1"); - jsonObject.Add("Data2", "data 2"); - jsonObject.Add("Vector1", new JsonArray(new[] { 1, 2, 3, 4 }.Select(x => JsonValue.Create(x)).ToArray())); - jsonObject.Add("Vector2", new JsonArray(new[] { 5, 6, 7, 8 }.Select(x => JsonValue.Create(x)).ToArray())); - return jsonObject; - } - private sealed class MultiPropsModel { [VectorStoreRecordKey] From 04e20ee0a7c51718c78d1d5f581156b3afd76705 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:30:25 +0100 Subject: [PATCH 39/48] .Net: Add support for any key type to the VolatileVectorStore. (#7529) ### Motivation and Context Addressing feedback on Volatile VectorStore key types from bugbash. See: [#7468](https://github.com/microsoft/semantic-kernel/issues/7468) ### Description - Adding support for any key type to VolatileVectorStore. - Updated VolatileVectorStore unit tests to test with multiple types of keys. - This also caused a knock-on effect of requiring keys to not be nullable, which is a good constraint to have regardless. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../Memory/VectorStore_DataIngestion.cs | 3 + .../AzureAISearchVectorStore.cs | 4 +- ...earchVectorStoreRecordCollectionFactory.cs | 4 +- ...econeVectorStoreRecordCollectionFactory.cs | 4 +- .../PineconeVectorStore.cs | 4 +- ...drantVectorStoreRecordCollectionFactory.cs | 4 +- .../QdrantVectorStore.cs | 4 +- ...RedisVectorStoreRecordCollectionFactory.cs | 4 +- .../RedisVectorStore.cs | 4 +- .../QdrantVectorStoreRecordCollectionTests.cs | 6 + .../Pinecone/PineconeVectorStoreTests.cs | 4 +- .../Data/IVectorStore.cs | 1 + .../Data/IVectorStoreRecordCollection.cs | 1 + .../Data/VolatileVectorStore.cs | 16 +- .../VolatileVectorStoreRecordCollection.cs | 45 ++--- ...atileVectorStoreRecordCollectionOptions.cs | 2 +- ...olatileVectorStoreRecordCollectionTests.cs | 187 ++++++++++-------- .../Data/VolatileVectorStoreTests.cs | 18 +- 18 files changed, 179 insertions(+), 136 deletions(-) diff --git a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs index 3dd33494431e..411e4ab8f8a4 100644 --- a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs +++ b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs @@ -142,6 +142,7 @@ public async Task ExampleWithoutDIAsync(string databaseType) } private async Task UpsertDataAndReadFromVectorStoreAsync(DataIngestor dataIngestor, Func uniqueKeyGenerator) + where TKey : notnull { // Ingest some data into the vector store. var upsertedKeys = await dataIngestor.ImportDataAsync(uniqueKeyGenerator); @@ -167,6 +168,7 @@ private sealed class DataIngestor(IVectorStore vectorStore, ITextEmbeddingGenera /// The keys of the upserted glossary entries. /// The type of the keys in the vector store. public async Task> ImportDataAsync(Func uniqueKeyGenerator) + where TKey : notnull { // Get and create collection if it doesn't exist. var collection = vectorStore.GetCollection>("skglossary"); @@ -192,6 +194,7 @@ public async Task> ImportDataAsync(Func uniqueKeyG /// The glossary entry. /// The type of the keys in the vector store. public Task?> GetGlossaryAsync(TKey key) + where TKey : notnull { var collection = vectorStore.GetCollection>("skglossary"); return collection.GetAsync(key, new() { IncludeVectors = true }); diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStore.cs index aa5cc82d4360..2ca2bf9577f5 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStore.cs @@ -42,7 +42,9 @@ public AzureAISearchVectorStore(SearchIndexClient searchIndexClient, AzureAISear } /// - public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) + where TKey : notnull + where TRecord : class { if (typeof(TKey) != typeof(string)) { diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/IAzureAISearchVectorStoreRecordCollectionFactory.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/IAzureAISearchVectorStoreRecordCollectionFactory.cs index ae83ec11b9fc..3e7dc2d82bc9 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/IAzureAISearchVectorStoreRecordCollectionFactory.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/IAzureAISearchVectorStoreRecordCollectionFactory.cs @@ -19,5 +19,7 @@ public interface IAzureAISearchVectorStoreRecordCollectionFactory /// The name of the collection to connect to. /// An optional record definition that defines the schema of the record type. If not present, attributes on will be used. /// The new instance of . - IVectorStoreRecordCollection CreateVectorStoreRecordCollection(SearchIndexClient searchIndexClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class; + IVectorStoreRecordCollection CreateVectorStoreRecordCollection(SearchIndexClient searchIndexClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) + where TKey : notnull + where TRecord : class; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/IPineconeVectorStoreRecordCollectionFactory.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/IPineconeVectorStoreRecordCollectionFactory.cs index cc993159b247..965639e93c8e 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/IPineconeVectorStoreRecordCollectionFactory.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/IPineconeVectorStoreRecordCollectionFactory.cs @@ -19,5 +19,7 @@ public interface IPineconeVectorStoreRecordCollectionFactory /// The name of the collection to connect to. /// An optional record definition that defines the schema of the record type. If not present, attributes on will be used. /// The new instance of . - IVectorStoreRecordCollection CreateVectorStoreRecordCollection(Sdk.PineconeClient pineconeClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class; + IVectorStoreRecordCollection CreateVectorStoreRecordCollection(Sdk.PineconeClient pineconeClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) + where TKey : notnull + where TRecord : class; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStore.cs index 4c2ce5d9ecc9..ec5b6114c801 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStore.cs @@ -39,7 +39,9 @@ public PineconeVectorStore(Sdk.PineconeClient pineconeClient, PineconeVectorStor } /// - public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) + where TKey : notnull + where TRecord : class { if (typeof(TKey) != typeof(string)) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/IQdrantVectorStoreRecordCollectionFactory.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/IQdrantVectorStoreRecordCollectionFactory.cs index a94e472da3eb..2f93e14dfb82 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/IQdrantVectorStoreRecordCollectionFactory.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/IQdrantVectorStoreRecordCollectionFactory.cs @@ -19,5 +19,7 @@ public interface IQdrantVectorStoreRecordCollectionFactory /// The name of the collection to connect to. /// An optional record definition that defines the schema of the record type. If not present, attributes on will be used. /// The new instance of . - IVectorStoreRecordCollection CreateVectorStoreRecordCollection(QdrantClient qdrantClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class; + IVectorStoreRecordCollection CreateVectorStoreRecordCollection(QdrantClient qdrantClient, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) + where TKey : notnull + where TRecord : class; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStore.cs index 3ec3e44ee6e4..ef9c9f1593f0 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStore.cs @@ -51,7 +51,9 @@ internal QdrantVectorStore(MockableQdrantClient qdrantClient, QdrantVectorStoreO } /// - public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) + where TKey : notnull + where TRecord : class { if (typeof(TKey) != typeof(ulong) && typeof(TKey) != typeof(Guid)) { diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/IRedisVectorStoreRecordCollectionFactory.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/IRedisVectorStoreRecordCollectionFactory.cs index 6ad90b5e61f1..f4eae7661b7a 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/IRedisVectorStoreRecordCollectionFactory.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/IRedisVectorStoreRecordCollectionFactory.cs @@ -19,5 +19,7 @@ public interface IRedisVectorStoreRecordCollectionFactory /// The name of the collection to connect to. /// An optional record definition that defines the schema of the record type. If not present, attributes on will be used. /// The new instance of . - IVectorStoreRecordCollection CreateVectorStoreRecordCollection(IDatabase database, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class; + IVectorStoreRecordCollection CreateVectorStoreRecordCollection(IDatabase database, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) + where TKey : notnull + where TRecord : class; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs index 98cfc0020dba..51a933d36062 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStore.cs @@ -41,7 +41,9 @@ public RedisVectorStore(IDatabase database, RedisVectorStoreOptions? options = d } /// - public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) + where TKey : notnull + where TRecord : class { if (typeof(TKey) != typeof(string)) { diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs index d7e533364881..b22f401d00cf 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs @@ -135,6 +135,7 @@ public async Task CanDeleteCollectionAsync() [Theory] [MemberData(nameof(TestOptions))] public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool hasNamedVectors, TKey testRecordKey) + where TKey : notnull { var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); @@ -171,6 +172,7 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition, bool ha [Theory] [MemberData(nameof(TestOptions))] public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool hasNamedVectors, TKey testRecordKey) + where TKey : notnull { // Arrange. var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); @@ -206,6 +208,7 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool [Theory] [MemberData(nameof(MultiRecordTestOptions))] public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, bool hasNamedVectors, TKey[] testRecordKeys) + where TKey : notnull { // Arrange. var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); @@ -406,6 +409,7 @@ await sut.DeleteBatchAsync( [Theory] [MemberData(nameof(TestOptions))] public async Task CanUpsertRecordAsync(bool useDefinition, bool hasNamedVectors, TKey testRecordKey) + where TKey : notnull { // Arrange var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); @@ -432,6 +436,7 @@ await sut.UpsertAsync( [Theory] [MemberData(nameof(MultiRecordTestOptions))] public async Task CanUpsertManyRecordsAsync(bool useDefinition, bool hasNamedVectors, TKey[] testRecordKeys) + where TKey : notnull { // Arrange var sut = this.CreateRecordCollection(useDefinition, hasNamedVectors); @@ -614,6 +619,7 @@ private static RetrievedPoint CreateRetrievedPoint(bool hasNamedVectors, T } private IVectorStoreRecordCollection> CreateRecordCollection(bool useDefinition, bool hasNamedVectors) + where T : notnull { var store = new QdrantVectorStoreRecordCollection>( this._qdrantClientMock.Object, diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreTests.cs index d8c6f9452221..8aa50e6fa2fa 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreTests.cs @@ -46,7 +46,9 @@ private sealed class MyVectorStoreRecordCollectionFactory : IPineconeVectorStore public IVectorStoreRecordCollection CreateVectorStoreRecordCollection( Sdk.PineconeClient pineconeClient, string name, - VectorStoreRecordDefinition? vectorStoreRecordDefinition) where TRecord : class + VectorStoreRecordDefinition? vectorStoreRecordDefinition) + where TKey : notnull + where TRecord : class { if (typeof(TKey) != typeof(string)) { diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs index bf09077b5b7a..31246a3138d6 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStore.cs @@ -31,6 +31,7 @@ public interface IVectorStore /// /// IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) + where TKey : notnull where TRecord : class; /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs index 0ab29fb8ac84..5071412014a8 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/IVectorStoreRecordCollection.cs @@ -16,6 +16,7 @@ namespace Microsoft.SemanticKernel.Data; #pragma warning disable CA1711 // Identifiers should not have incorrect suffix public interface IVectorStoreRecordCollection #pragma warning restore CA1711 // Identifiers should not have incorrect suffix + where TKey : notnull where TRecord : class { /// diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs index d8da4508c386..7175e2896978 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStore.cs @@ -1,6 +1,5 @@ // Copyright (c) Microsoft. All rights reserved. -using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; @@ -16,7 +15,7 @@ namespace Microsoft.SemanticKernel.Data; public sealed class VolatileVectorStore : IVectorStore { /// Internal storage for the record collection. - private readonly ConcurrentDictionary> _internalCollection; + private readonly ConcurrentDictionary> _internalCollection; /// /// Initializes a new instance of the class. @@ -30,20 +29,17 @@ public VolatileVectorStore() /// Initializes a new instance of the class. /// /// Allows passing in the dictionary used for storage, for testing purposes. - internal VolatileVectorStore(ConcurrentDictionary> internalCollection) + internal VolatileVectorStore(ConcurrentDictionary> internalCollection) { this._internalCollection = internalCollection; } /// - public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) where TRecord : class + public IVectorStoreRecordCollection GetCollection(string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition = null) + where TKey : notnull + where TRecord : class { - if (typeof(TKey) != typeof(string)) - { - throw new NotSupportedException("Only string keys are supported."); - } - - var collection = new VolatileVectorStoreRecordCollection(this._internalCollection, name, new() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; + var collection = new VolatileVectorStoreRecordCollection(this._internalCollection, name, new() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; return collection!; } diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs index abe55996abd9..e3fcfc310676 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs @@ -1,6 +1,5 @@ // Copyright (c) Microsoft. All rights reserved. -using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; @@ -14,35 +13,31 @@ namespace Microsoft.SemanticKernel.Data; /// /// Service for storing and retrieving vector records, that uses an in memory dictionary as the underlying storage. /// +/// The data type of the record key. /// The data model to use for adding, updating and retrieving data from storage. [Experimental("SKEXP0001")] #pragma warning disable CA1711 // Identifiers should not have incorrect suffix -public sealed class VolatileVectorStoreRecordCollection : IVectorStoreRecordCollection +public sealed class VolatileVectorStoreRecordCollection : IVectorStoreRecordCollection #pragma warning restore CA1711 // Identifiers should not have incorrect suffix + where TKey : notnull where TRecord : class { /// Internal storage for the record collection. - private readonly ConcurrentDictionary> _internalCollection; + private readonly ConcurrentDictionary> _internalCollection; /// Optional configuration options for this class. private readonly VolatileVectorStoreRecordCollectionOptions _options; - /// The name of the collection that this will access. + /// The name of the collection that this will access. private readonly string _collectionName; - /// A set of types that a key on the provided model may have. - private static readonly HashSet s_supportedKeyTypes = - [ - typeof(string) - ]; - /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. private readonly PropertyInfo _keyPropertyInfo; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// - /// The name of the collection that this will access. + /// The name of the collection that this will access. /// Optional configuration options for this class. public VolatileVectorStoreRecordCollection(string collectionName, VolatileVectorStoreRecordCollectionOptions? options = default) { @@ -65,18 +60,16 @@ public VolatileVectorStoreRecordCollection(string collectionName, VolatileVector properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); } - // Validate property types and store for later use. - VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); this._keyPropertyInfo = properties.keyProperty; } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Allows passing in the dictionary used for storage, for testing purposes. - /// The name of the collection that this will access. + /// The name of the collection that this will access. /// Optional configuration options for this class. - internal VolatileVectorStoreRecordCollection(ConcurrentDictionary> internalCollection, string collectionName, VolatileVectorStoreRecordCollectionOptions? options = default) + internal VolatileVectorStoreRecordCollection(ConcurrentDictionary> internalCollection, string collectionName, VolatileVectorStoreRecordCollectionOptions? options = default) : this(collectionName, options) { this._internalCollection = internalCollection; @@ -94,7 +87,7 @@ public Task CollectionExistsAsync(CancellationToken cancellationToken = de /// public Task CreateCollectionAsync(CancellationToken cancellationToken = default) { - this._internalCollection.TryAdd(this._collectionName, new ConcurrentDictionary()); + this._internalCollection.TryAdd(this._collectionName, new ConcurrentDictionary()); return Task.CompletedTask; } @@ -115,7 +108,7 @@ public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) } /// - public Task GetAsync(string key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) + public Task GetAsync(TKey key, GetRecordOptions? options = null, CancellationToken cancellationToken = default) { var collectionDictionary = this.GetCollectionDictionary(); @@ -128,7 +121,7 @@ public Task DeleteCollectionAsync(CancellationToken cancellationToken = default) } /// - public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, GetRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { foreach (var key in keys) { @@ -142,7 +135,7 @@ public async IAsyncEnumerable GetBatchAsync(IEnumerable keys, G } /// - public Task DeleteAsync(string key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + public Task DeleteAsync(TKey key, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) { var collectionDictionary = this.GetCollectionDictionary(); @@ -151,7 +144,7 @@ public Task DeleteAsync(string key, DeleteRecordOptions? options = null, Cancell } /// - public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) + public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? options = null, CancellationToken cancellationToken = default) { var collectionDictionary = this.GetCollectionDictionary(); @@ -164,18 +157,18 @@ public Task DeleteBatchAsync(IEnumerable keys, DeleteRecordOptions? opti } /// - public Task UpsertAsync(TRecord record, UpsertRecordOptions? options = null, CancellationToken cancellationToken = default) + public Task UpsertAsync(TRecord record, UpsertRecordOptions? options = null, CancellationToken cancellationToken = default) { var collectionDictionary = this.GetCollectionDictionary(); - var key = this._keyPropertyInfo.GetValue(record) as string; + var key = (TKey)this._keyPropertyInfo.GetValue(record)!; collectionDictionary.AddOrUpdate(key!, record, (key, currentValue) => record); return Task.FromResult(key!); } /// - public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + public async IAsyncEnumerable UpsertBatchAsync(IEnumerable records, UpsertRecordOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) { foreach (var record in records) { @@ -187,7 +180,7 @@ public async IAsyncEnumerable UpsertBatchAsync(IEnumerable reco /// Get the collection dictionary from the internal storage, throws if it does not exist. /// /// The retrieved collection dictionary. - private ConcurrentDictionary GetCollectionDictionary() + private ConcurrentDictionary GetCollectionDictionary() { if (!this._internalCollection.TryGetValue(this._collectionName, out var collectionDictionary)) { diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollectionOptions.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollectionOptions.cs index ef825c2e9ec6..8732e7efa486 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollectionOptions.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollectionOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Options when creating a . +/// Options when creating a . /// [Experimental("SKEXP0001")] public sealed class VolatileVectorStoreRecordCollectionOptions diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs index 35b3312afc87..e627ae9829de 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs @@ -11,17 +11,19 @@ namespace SemanticKernel.UnitTests.Data; /// -/// Contains tests for the class. +/// Contains tests for the class. /// public class VolatileVectorStoreRecordCollectionTests { private const string TestCollectionName = "testcollection"; private const string TestRecordKey1 = "testid1"; private const string TestRecordKey2 = "testid2"; + private const int TestRecordIntKey1 = 1; + private const int TestRecordIntKey2 = 2; private readonly CancellationToken _testCancellationToken = new(false); - private readonly ConcurrentDictionary> _collectionStore; + private readonly ConcurrentDictionary> _collectionStore; public VolatileVectorStoreRecordCollectionTests() { @@ -34,10 +36,10 @@ public VolatileVectorStoreRecordCollectionTests() public async Task CollectionExistsReturnsCollectionStateAsync(string collectionName, bool expectedExists) { // Arrange - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = new VolatileVectorStoreRecordCollection( + var sut = new VolatileVectorStoreRecordCollection>( this._collectionStore, collectionName); @@ -52,7 +54,7 @@ public async Task CollectionExistsReturnsCollectionStateAsync(string collectionN public async Task CanCreateCollectionAsync() { // Arrange - var sut = this.CreateRecordCollection(false); + var sut = this.CreateRecordCollection(false); // Act await sut.CreateCollectionAsync(this._testCancellationToken); @@ -65,10 +67,10 @@ public async Task CanCreateCollectionAsync() public async Task DeleteCollectionRemovesCollectionFromDictionaryAsync() { // Arrange - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateRecordCollection(false); + var sut = this.CreateRecordCollection(false); // Act await sut.DeleteCollectionAsync(this._testCancellationToken); @@ -78,21 +80,24 @@ public async Task DeleteCollectionRemovesCollectionFromDictionaryAsync() } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanGetRecordWithVectorsAsync(bool useDefinition) + [InlineData(true, TestRecordKey1)] + [InlineData(true, TestRecordIntKey1)] + [InlineData(false, TestRecordKey1)] + [InlineData(false, TestRecordIntKey1)] + public async Task CanGetRecordWithVectorsAsync(bool useDefinition, TKey testKey) + where TKey : notnull { // Arrange - var record = CreateModel(TestRecordKey1, withVectors: true); - var collection = new ConcurrentDictionary(); - collection.TryAdd(TestRecordKey1, record); + var record = CreateModel(testKey, withVectors: true); + var collection = new ConcurrentDictionary(); + collection.TryAdd(testKey!, record); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var actual = await sut.GetAsync( - TestRecordKey1, + testKey, new() { IncludeVectors = true @@ -103,29 +108,32 @@ public async Task CanGetRecordWithVectorsAsync(bool useDefinition) var expectedArgs = new object[] { TestRecordKey1 }; Assert.NotNull(actual); - Assert.Equal(TestRecordKey1, actual.Key); - Assert.Equal("data testid1", actual.Data); + Assert.Equal(testKey, actual.Key); + Assert.Equal($"data {testKey}", actual.Data); Assert.Equal(new float[] { 1, 2, 3, 4 }, actual.Vector!.Value.ToArray()); } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) + [InlineData(true, TestRecordKey1, TestRecordKey2)] + [InlineData(true, TestRecordIntKey1, TestRecordIntKey2)] + [InlineData(false, TestRecordKey1, TestRecordKey2)] + [InlineData(false, TestRecordIntKey1, TestRecordIntKey2)] + public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition, TKey testKey1, TKey testKey2) + where TKey : notnull { // Arrange - var record1 = CreateModel(TestRecordKey1, withVectors: true); - var record2 = CreateModel(TestRecordKey2, withVectors: true); - var collection = new ConcurrentDictionary(); - collection.TryAdd(TestRecordKey1, record1); - collection.TryAdd(TestRecordKey2, record2); + var record1 = CreateModel(testKey1, withVectors: true); + var record2 = CreateModel(testKey2, withVectors: true); + var collection = new ConcurrentDictionary(); + collection.TryAdd(testKey1!, record1); + collection.TryAdd(testKey2!, record2); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var actual = await sut.GetBatchAsync( - [TestRecordKey1, TestRecordKey2], + [testKey1, testKey2], new() { IncludeVectors = true @@ -135,73 +143,82 @@ public async Task CanGetManyRecordsWithVectorsAsync(bool useDefinition) // Assert Assert.NotNull(actual); Assert.Equal(2, actual.Count); - Assert.Equal(TestRecordKey1, actual[0].Key); - Assert.Equal("data testid1", actual[0].Data); - Assert.Equal(TestRecordKey2, actual[1].Key); - Assert.Equal("data testid2", actual[1].Data); + Assert.Equal(testKey1, actual[0].Key); + Assert.Equal($"data {testKey1}", actual[0].Data); + Assert.Equal(testKey2, actual[1].Key); + Assert.Equal($"data {testKey2}", actual[1].Data); } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanDeleteRecordAsync(bool useDefinition) + [InlineData(true, TestRecordKey1, TestRecordKey2)] + [InlineData(true, TestRecordIntKey1, TestRecordIntKey2)] + [InlineData(false, TestRecordKey1, TestRecordKey2)] + [InlineData(false, TestRecordIntKey1, TestRecordIntKey2)] + public async Task CanDeleteRecordAsync(bool useDefinition, TKey testKey1, TKey testKey2) + where TKey : notnull { // Arrange - var record1 = CreateModel(TestRecordKey1, withVectors: true); - var record2 = CreateModel(TestRecordKey2, withVectors: true); - var collection = new ConcurrentDictionary(); - collection.TryAdd(TestRecordKey1, record1); - collection.TryAdd(TestRecordKey2, record2); + var record1 = CreateModel(testKey1, withVectors: true); + var record2 = CreateModel(testKey2, withVectors: true); + var collection = new ConcurrentDictionary(); + collection.TryAdd(testKey1, record1); + collection.TryAdd(testKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act await sut.DeleteAsync( - TestRecordKey1, + testKey1, cancellationToken: this._testCancellationToken); // Assert - Assert.False(collection.ContainsKey(TestRecordKey1)); - Assert.True(collection.ContainsKey(TestRecordKey2)); + Assert.False(collection.ContainsKey(testKey1)); + Assert.True(collection.ContainsKey(testKey2)); } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition) + [InlineData(true, TestRecordKey1, TestRecordKey2)] + [InlineData(true, TestRecordIntKey1, TestRecordIntKey2)] + [InlineData(false, TestRecordKey1, TestRecordKey2)] + [InlineData(false, TestRecordIntKey1, TestRecordIntKey2)] + public async Task CanDeleteManyRecordsWithVectorsAsync(bool useDefinition, TKey testKey1, TKey testKey2) + where TKey : notnull { // Arrange - var record1 = CreateModel(TestRecordKey1, withVectors: true); - var record2 = CreateModel(TestRecordKey2, withVectors: true); - var collection = new ConcurrentDictionary(); - collection.TryAdd(TestRecordKey1, record1); - collection.TryAdd(TestRecordKey2, record2); + var record1 = CreateModel(testKey1, withVectors: true); + var record2 = CreateModel(testKey2, withVectors: true); + var collection = new ConcurrentDictionary(); + collection.TryAdd(testKey1, record1); + collection.TryAdd(testKey2, record2); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act await sut.DeleteBatchAsync( - [TestRecordKey1, TestRecordKey2], + [testKey1, testKey2], cancellationToken: this._testCancellationToken); // Assert - Assert.False(collection.ContainsKey(TestRecordKey1)); - Assert.False(collection.ContainsKey(TestRecordKey2)); + Assert.False(collection.ContainsKey(testKey1)); + Assert.False(collection.ContainsKey(testKey2)); } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanUpsertRecordAsync(bool useDefinition) + [InlineData(true, TestRecordKey1)] + [InlineData(true, TestRecordIntKey1)] + [InlineData(false, TestRecordKey1)] + [InlineData(false, TestRecordIntKey1)] + public async Task CanUpsertRecordAsync(bool useDefinition, TKey testKey1) + where TKey : notnull { // Arrange - var record1 = CreateModel(TestRecordKey1, withVectors: true); - var collection = new ConcurrentDictionary(); + var record1 = CreateModel(testKey1, withVectors: true); + var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var upsertResult = await sut.UpsertAsync( @@ -209,25 +226,28 @@ public async Task CanUpsertRecordAsync(bool useDefinition) cancellationToken: this._testCancellationToken); // Assert - Assert.Equal(TestRecordKey1, upsertResult); - Assert.True(collection.ContainsKey(TestRecordKey1)); - Assert.IsType(collection[TestRecordKey1]); - Assert.Equal("data testid1", (collection[TestRecordKey1] as SinglePropsModel)!.Data); + Assert.Equal(testKey1, upsertResult); + Assert.True(collection.ContainsKey(testKey1)); + Assert.IsType>(collection[testKey1]); + Assert.Equal($"data {testKey1}", (collection[testKey1] as SinglePropsModel)!.Data); } [Theory] - [InlineData(true)] - [InlineData(false)] - public async Task CanUpsertManyRecordsAsync(bool useDefinition) + [InlineData(true, TestRecordKey1, TestRecordKey2)] + [InlineData(true, TestRecordIntKey1, TestRecordIntKey2)] + [InlineData(false, TestRecordKey1, TestRecordKey2)] + [InlineData(false, TestRecordIntKey1, TestRecordIntKey2)] + public async Task CanUpsertManyRecordsAsync(bool useDefinition, TKey testKey1, TKey testKey2) + where TKey : notnull { // Arrange - var record1 = CreateModel(TestRecordKey1, withVectors: true); - var record2 = CreateModel(TestRecordKey2, withVectors: true); + var record1 = CreateModel(testKey1, withVectors: true); + var record2 = CreateModel(testKey2, withVectors: true); - var collection = new ConcurrentDictionary(); + var collection = new ConcurrentDictionary(); this._collectionStore.TryAdd(TestCollectionName, collection); - var sut = this.CreateRecordCollection(useDefinition); + var sut = this.CreateRecordCollection(useDefinition); // Act var actual = await sut.UpsertBatchAsync( @@ -237,17 +257,17 @@ public async Task CanUpsertManyRecordsAsync(bool useDefinition) // Assert Assert.NotNull(actual); Assert.Equal(2, actual.Count); - Assert.Equal(TestRecordKey1, actual[0]); - Assert.Equal(TestRecordKey2, actual[1]); + Assert.Equal(testKey1, actual[0]); + Assert.Equal(testKey2, actual[1]); - Assert.True(collection.ContainsKey(TestRecordKey1)); - Assert.IsType(collection[TestRecordKey1]); - Assert.Equal("data testid1", (collection[TestRecordKey1] as SinglePropsModel)!.Data); + Assert.True(collection.ContainsKey(testKey1)); + Assert.IsType>(collection[testKey1]); + Assert.Equal($"data {testKey1}", (collection[testKey1] as SinglePropsModel)!.Data); } - private static SinglePropsModel CreateModel(string key, bool withVectors) + private static SinglePropsModel CreateModel(TKey key, bool withVectors) { - return new SinglePropsModel + return new SinglePropsModel { Key = key, Data = "data " + key, @@ -256,9 +276,10 @@ private static SinglePropsModel CreateModel(string key, bool withVectors) }; } - private VolatileVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) + private VolatileVectorStoreRecordCollection> CreateRecordCollection(bool useDefinition) + where TKey : notnull { - return new VolatileVectorStoreRecordCollection( + return new VolatileVectorStoreRecordCollection>( this._collectionStore, TestCollectionName, new() @@ -277,10 +298,10 @@ private VolatileVectorStoreRecordCollection CreateRecordCollec ] }; - public sealed class SinglePropsModel + public sealed class SinglePropsModel { [VectorStoreRecordKey] - public string Key { get; set; } = string.Empty; + public TKey? Key { get; set; } [VectorStoreRecordData] public string Data { get; set; } = string.Empty; diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreTests.cs index 6174f24bec6c..694d2239b224 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreTests.cs @@ -27,26 +27,30 @@ public void GetCollectionReturnsCollection() // Assert. Assert.NotNull(actual); - Assert.IsType>>(actual); + Assert.IsType>>(actual); } [Fact] - public void GetCollectionThrowsForInvalidKeyType() + public void GetCollectionReturnsCollectionWithNonStringKey() { // Arrange. var sut = new VolatileVectorStore(); - // Act & Assert. - Assert.Throws(() => sut.GetCollection>(TestCollectionName)); + // Act. + var actual = sut.GetCollection>(TestCollectionName); + + // Assert. + Assert.NotNull(actual); + Assert.IsType>>(actual); } [Fact] public async Task ListCollectionNamesReadsDictionaryAsync() { // Arrange. - var collectionStore = new ConcurrentDictionary>(); - collectionStore.TryAdd("collection1", new ConcurrentDictionary()); - collectionStore.TryAdd("collection2", new ConcurrentDictionary()); + var collectionStore = new ConcurrentDictionary>(); + collectionStore.TryAdd("collection1", new ConcurrentDictionary()); + collectionStore.TryAdd("collection2", new ConcurrentDictionary()); var sut = new VolatileVectorStore(collectionStore); // Act. From 8a591dd363bde0319a8b68b4050226d3606f06c0 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:52:49 +0100 Subject: [PATCH 40/48] .Net: Restricting property writability and renaming data model name property on VectorStoreRecordProperty (#7545) ### Motivation and Context The writability of properties on VectorStoreRecordProperty wasn't great and the naming of the data model property name wasn't appropriately differentiated from the storage property name. See issue for more info: [#7469](https://github.com/microsoft/semantic-kernel/issues/7469) ### Description - Make properties read-only or init only. - Rename PropertyName to DataModelPropertyName to better differentiate it from the StoragePropertyName ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...ISearchVectorStoreCollectionCreateMapping.cs | 8 ++++---- .../AzureAISearchVectorStoreRecordCollection.cs | 4 ++-- ...ineconeVectorStoreCollectionCreateMapping.cs | 4 ++-- .../QdrantVectorStoreCollectionCreateMapping.cs | 8 ++++---- .../QdrantVectorStoreRecordCollection.cs | 4 ++-- .../RedisVectorStoreCollectionCreateMapping.cs | 12 ++++++------ .../src/Data/VectorStoreRecordPropertyReader.cs | 14 +++++++------- .../VectorStoreRecordProperty.cs | 17 ++++++++++------- .../VectorStoreRecordPropertyReaderTests.cs | 10 +++++----- 9 files changed, 42 insertions(+), 39 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs index d4972b5a62fd..106b3fae1c60 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs @@ -41,7 +41,7 @@ public static SimpleField MapDataField(VectorStoreRecordDataProperty dataPropert if (dataProperty.PropertyType is null) { - throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.PropertyName}' must be set to create a collection."); + throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' must be set to create a collection."); } return new SimpleField(storagePropertyName, AzureAISearchVectorStoreCollectionCreateMapping.GetSDKFieldDataType(dataProperty.PropertyType)) { IsFilterable = dataProperty.IsFilterable }; @@ -58,7 +58,7 @@ public static (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfigu { if (vectorProperty.Dimensions is not > 0) { - throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive integer to create a collection."); + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' must be set to a positive integer to create a collection."); } // Build a name for the profile and algorithm configuration based on the property name @@ -74,7 +74,7 @@ public static (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfigu { IndexKind.Hnsw => new HnswAlgorithmConfiguration(algorithmConfigName) { Parameters = new HnswParameters { Metric = algorithmMetric } }, IndexKind.Flat => new ExhaustiveKnnAlgorithmConfiguration(algorithmConfigName) { Parameters = new ExhaustiveKnnParameters { Metric = algorithmMetric } }, - _ => throw new InvalidOperationException($"Unsupported index kind '{indexKind}' on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + _ => throw new InvalidOperationException($"Unsupported index kind '{indexKind}' on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") }; var vectorSearchProfile = new VectorSearchProfile(vectorSearchProfileName, algorithmConfigName); @@ -116,7 +116,7 @@ public static VectorSearchAlgorithmMetric GetSDKDistanceAlgorithm(VectorStoreRec DistanceFunction.CosineSimilarity => VectorSearchAlgorithmMetric.Cosine, DistanceFunction.DotProductSimilarity => VectorSearchAlgorithmMetric.DotProduct, DistanceFunction.EuclideanDistance => VectorSearchAlgorithmMetric.Euclidean, - _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") }; } diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index c3db9340fc8c..959da9f5e51f 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -192,7 +192,7 @@ public Task CreateCollectionAsync(CancellationToken cancellationToken = default) // Data property. if (property is VectorStoreRecordDataProperty dataProperty) { - searchFields.Add(AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, this._storagePropertyNames[dataProperty.PropertyName])); + searchFields.Add(AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, this._storagePropertyNames[dataProperty.DataModelPropertyName])); } // Vector property. @@ -200,7 +200,7 @@ public Task CreateCollectionAsync(CancellationToken cancellationToken = default) { (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfiguration algorithmConfiguration, VectorSearchProfile vectorSearchProfile) = AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField( vectorProperty, - this._storagePropertyNames[vectorProperty.PropertyName]); + this._storagePropertyNames[vectorProperty.DataModelPropertyName]); // Add the search field, plus its profile and algorithm configuration to the search config. searchFields.Add(vectorSearchField); diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs index 754c19f8eaa3..d24219407050 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs @@ -21,7 +21,7 @@ public static (uint Dimension, Metric Metric) MapServerlessIndex(VectorStoreReco { if (vectorProperty!.Dimensions is not > 0) { - throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive integer to create a collection."); + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' must be set to a positive integer to create a collection."); } return (Dimension: (uint)vectorProperty.Dimensions, Metric: GetSDKMetricAlgorithm(vectorProperty)); @@ -41,6 +41,6 @@ public static Metric GetSDKMetricAlgorithm(VectorStoreRecordVectorProperty vecto DistanceFunction.DotProductSimilarity => Metric.DotProduct, DistanceFunction.EuclideanDistance => Metric.Euclidean, null => Metric.Cosine, - _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") }; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs index d0319463422d..2117d5616de9 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs @@ -57,12 +57,12 @@ public static VectorParams MapSingleVector(VectorStoreRecordVectorProperty vecto { if (vectorProperty!.Dimensions is not > 0) { - throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive integer to create a collection."); + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' must be set to a positive integer to create a collection."); } if (vectorProperty!.IndexKind is not null && vectorProperty!.IndexKind != IndexKind.Hnsw) { - throw new InvalidOperationException($"Unsupported index kind '{vectorProperty!.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'."); + throw new InvalidOperationException($"Unsupported index kind '{vectorProperty!.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'."); } return new VectorParams { Size = (ulong)vectorProperty.Dimensions, Distance = QdrantVectorStoreCollectionCreateMapping.GetSDKDistanceAlgorithm(vectorProperty) }; @@ -81,7 +81,7 @@ public static VectorParamsMap MapNamedVectors(IEnumerable Distance.Dot, DistanceFunction.EuclideanDistance => Distance.Euclid, DistanceFunction.ManhattanDistance => Distance.Manhattan, - _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") }; } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index 0197346a8228..d4f63c719ebf 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -178,10 +178,10 @@ await this.RunOperationAsync( { if (dataProperty.PropertyType is null) { - throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.PropertyName}' must be set to create a collection, since the property is filterable."); + throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' must be set to create a collection, since the property is filterable."); } - var storageFieldName = this._storagePropertyNames[dataProperty.PropertyName]; + var storageFieldName = this._storagePropertyNames[dataProperty.DataModelPropertyName]; var schemaType = QdrantVectorStoreCollectionCreateMapping.s_schemaTypeMap[dataProperty.PropertyType!]; await this.RunOperationAsync( diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs index 209b958273e7..a4d198bb7a92 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs @@ -67,10 +67,10 @@ public static Schema MapToSchema(IEnumerable properti { if (dataProperty.PropertyType is null) { - throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.PropertyName}' must be set to create a collection, since the property is filterable."); + throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' must be set to create a collection, since the property is filterable."); } - var storageName = storagePropertyNames[dataProperty.PropertyName]; + var storageName = storagePropertyNames[dataProperty.DataModelPropertyName]; if (dataProperty.PropertyType == typeof(string)) { @@ -90,10 +90,10 @@ public static Schema MapToSchema(IEnumerable properti { if (vectorProperty.Dimensions is not > 0) { - throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}' must be set to a positive integer to create a collection."); + throw new InvalidOperationException($"Property {nameof(vectorProperty.Dimensions)} on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' must be set to a positive integer to create a collection."); } - var storageName = storagePropertyNames[vectorProperty.PropertyName]; + var storageName = storagePropertyNames[vectorProperty.DataModelPropertyName]; var indexKind = GetSDKIndexKind(vectorProperty); var distanceAlgorithm = GetSDKDistanceAlgorithm(vectorProperty); var dimensions = vectorProperty.Dimensions.Value.ToString(CultureInfo.InvariantCulture); @@ -127,7 +127,7 @@ public static Schema.VectorField.VectorAlgo GetSDKIndexKind(VectorStoreRecordVec { IndexKind.Hnsw => Schema.VectorField.VectorAlgo.HNSW, IndexKind.Flat => Schema.VectorField.VectorAlgo.FLAT, - _ => throw new InvalidOperationException($"Unsupported index kind '{vectorProperty.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + _ => throw new InvalidOperationException($"Unsupported index kind '{vectorProperty.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") }; } @@ -150,7 +150,7 @@ public static string GetSDKDistanceAlgorithm(VectorStoreRecordVectorProperty vec DistanceFunction.CosineSimilarity => "COSINE", DistanceFunction.DotProductSimilarity => "IP", DistanceFunction.EuclideanDistance => "L2", - _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.PropertyName}'.") + _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") }; } } diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index 1a6a9a99a419..d08b96f0f095 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -131,19 +131,19 @@ public static (PropertyInfo keyProperty, List dataProperties, List throw new ArgumentException($"Multiple key properties configured for type {type.FullName}."); } - keyProperty = type.GetProperty(keyPropertyInfo.PropertyName); + keyProperty = type.GetProperty(keyPropertyInfo.DataModelPropertyName); if (keyProperty == null) { - throw new ArgumentException($"Key property '{keyPropertyInfo.PropertyName}' not found on type {type.FullName}."); + throw new ArgumentException($"Key property '{keyPropertyInfo.DataModelPropertyName}' not found on type {type.FullName}."); } } // Data. else if (property is VectorStoreRecordDataProperty dataPropertyInfo) { - var dataProperty = type.GetProperty(dataPropertyInfo.PropertyName); + var dataProperty = type.GetProperty(dataPropertyInfo.DataModelPropertyName); if (dataProperty == null) { - throw new ArgumentException($"Data property '{dataPropertyInfo.PropertyName}' not found on type {type.FullName}."); + throw new ArgumentException($"Data property '{dataPropertyInfo.DataModelPropertyName}' not found on type {type.FullName}."); } dataProperties.Add(dataProperty); @@ -151,10 +151,10 @@ public static (PropertyInfo keyProperty, List dataProperties, List // Vector. else if (property is VectorStoreRecordVectorProperty vectorPropertyInfo) { - var vectorProperty = type.GetProperty(vectorPropertyInfo.PropertyName); + var vectorProperty = type.GetProperty(vectorPropertyInfo.DataModelPropertyName); if (vectorProperty == null) { - throw new ArgumentException($"Vector property '{vectorPropertyInfo.PropertyName}' not found on type {type.FullName}."); + throw new ArgumentException($"Vector property '{vectorPropertyInfo.DataModelPropertyName}' not found on type {type.FullName}."); } // Add all vector properties if we support multiple vectors. @@ -341,7 +341,7 @@ public static string GetStoragePropertyName(PropertyInfo property, VectorStoreRe if (vectorStoreRecordDefinition is not null) { // First check to see if the developer configured a storage property name on the record definition. - if (vectorStoreRecordDefinition.Properties.FirstOrDefault(p => p.PropertyName == property.Name) is VectorStoreRecordProperty recordProperty && recordProperty.StoragePropertyName is not null) + if (vectorStoreRecordDefinition.Properties.FirstOrDefault(p => p.DataModelPropertyName == property.Name) is VectorStoreRecordProperty recordProperty && recordProperty.StoragePropertyName is not null) { return recordProperty.StoragePropertyName; } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs index a74003cbf105..ccb760d89a00 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs @@ -13,26 +13,29 @@ public abstract class VectorStoreRecordProperty /// /// Initializes a new instance of the class. /// - /// The name of the property. - private protected VectorStoreRecordProperty(string propertyName) + /// The name of the property on the data model. + private protected VectorStoreRecordProperty(string dataModelPropertyName) { - this.PropertyName = propertyName; + this.DataModelPropertyName = dataModelPropertyName; } private protected VectorStoreRecordProperty(VectorStoreRecordProperty source) { - this.PropertyName = source.PropertyName; + this.DataModelPropertyName = source.DataModelPropertyName; this.StoragePropertyName = source.StoragePropertyName; } /// - /// Gets or sets the name of the property. + /// Gets or sets the name of the property on the data model. /// - public string PropertyName { get; set; } + public string DataModelPropertyName { get; private set; } /// /// Gets or sets an optional name to use for the property in storage, if different from the property name. /// E.g. the property name might be "MyProperty" but the storage name might be "my_property". + /// This property will only be respected by implementations that do not support a well known + /// serialization mechanism like JSON, in which case the attributes used by that seriallization system will + /// be used. /// - public string? StoragePropertyName { get; set; } + public string? StoragePropertyName { get; init; } } diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 2a4aaf7d570a..000bca81fd74 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -150,11 +150,11 @@ public void CreateVectorStoreRecordDefinitionFromTypeConvertsAllProps() // Assert. Assert.Equal(5, definition.Properties.Count); - Assert.Equal("Key", definition.Properties[0].PropertyName); - Assert.Equal("Data1", definition.Properties[1].PropertyName); - Assert.Equal("Data2", definition.Properties[2].PropertyName); - Assert.Equal("Vector1", definition.Properties[3].PropertyName); - Assert.Equal("Vector2", definition.Properties[4].PropertyName); + Assert.Equal("Key", definition.Properties[0].DataModelPropertyName); + Assert.Equal("Data1", definition.Properties[1].DataModelPropertyName); + Assert.Equal("Data2", definition.Properties[2].DataModelPropertyName); + Assert.Equal("Vector1", definition.Properties[3].DataModelPropertyName); + Assert.Equal("Vector2", definition.Properties[4].DataModelPropertyName); Assert.IsType(definition.Properties[0]); Assert.IsType(definition.Properties[1]); From df0ea21198502ddddd98eedc2f1fca22983f7d59 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:53:23 +0100 Subject: [PATCH 41/48] .Net: Rename DI extensions that depend on already registered dependencies. (#7541) ### Motivation and Context There is a concern that it's not clear when using the DI extensions without any parameters that the method will rely on already registered database clients. See [#7467](https://github.com/microsoft/semantic-kernel/issues/7467) ### Description - Splitting each set of DI Extensions clearly between those that construct a client and those that request it from the DI container. - Renaming the ones that request it from the DI container by appending `WithRegisteredClient` ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...zureAISearchServiceCollectionExtensions.cs | 6 ++- .../PineconeKernelBuilderExtensions.cs | 15 +++++- .../PineconeServiceCollectionExtensions.cs | 32 ++++++++++-- .../QdrantKernelBuilderExtensions.cs | 17 ++++++- .../QdrantServiceCollectionExtensions.cs | 33 +++++++++++-- .../RedisKernelBuilderExtensions.cs | 17 ++++++- .../RedisServiceCollectionExtensions.cs | 49 +++++++++++-------- 7 files changed, 134 insertions(+), 35 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs index fdb280733a74..f7dca74cb00b 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs @@ -23,6 +23,8 @@ public static class AzureAISearchServiceCollectionExtensions /// The kernel builder. public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { + // If we are not constructing the SearchIndexClient, add the IVectorStore as transient, since we + // cannot make assumptions about how SearchIndexClient is being managed. services.AddKeyedTransient( serviceId, (sp, obj) => @@ -52,7 +54,7 @@ public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollec Verify.NotNull(endpoint); Verify.NotNull(tokenCredential); - services.AddKeyedTransient( + services.AddKeyedSingleton( serviceId, (sp, obj) => { @@ -81,7 +83,7 @@ public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollec Verify.NotNull(endpoint); Verify.NotNull(credential); - services.AddKeyedTransient( + services.AddKeyedSingleton( serviceId, (sp, obj) => { diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeKernelBuilderExtensions.cs index ff6460ae21d6..f4c6e643ecc5 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeKernelBuilderExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeKernelBuilderExtensions.cs @@ -14,11 +14,24 @@ public static class PineconeKernelBuilderExtensions /// Register a Pinecone with the specified service ID and where is retrieved from the dependency injection container. /// /// The builder to register the on. + /// Optional options to further configure the . + /// An optional service id to use as the service key. + /// The kernel builder. + public static IKernelBuilder AddPineconeVectorStore(this IKernelBuilder builder, PineconeVectorStoreOptions? options = default, string? serviceId = default) + { + builder.Services.AddPineconeVectorStore(options, serviceId); + return builder; + } + + /// + /// Register a Pinecone with the specified service ID and where is constructed using the provided apikey. + /// + /// The builder to register the on. /// The api key for Pinecone. /// Optional options to further configure the . /// An optional service id to use as the service key. /// The kernel builder. - public static IKernelBuilder AddPineconeVectorStore(this IKernelBuilder builder, string? apiKey = default, PineconeVectorStoreOptions? options = default, string? serviceId = default) + public static IKernelBuilder AddPineconeVectorStore(this IKernelBuilder builder, string apiKey, PineconeVectorStoreOptions? options = default, string? serviceId = default) { builder.Services.AddPineconeVectorStore(apiKey, options, serviceId); return builder; diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs index 0b6013ccc9be..eb7b8b15ff78 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs @@ -15,17 +15,43 @@ public static class PineconeServiceCollectionExtensions /// Register a Pinecone with the specified service ID and where is retrieved from the dependency injection container. /// /// The to register the on. - /// The api key for Pinecone. /// Optional options to further configure the . /// An optional service id to use as the service key. /// The kernel builder. - public static IServiceCollection AddPineconeVectorStore(this IServiceCollection services, string? apiKey = default, PineconeVectorStoreOptions? options = default, string? serviceId = default) + public static IServiceCollection AddPineconeVectorStore(this IServiceCollection services, PineconeVectorStoreOptions? options = default, string? serviceId = default) { + // If we are not constructing the PineconeClient, add the IVectorStore as transient, since we + // cannot make assumptions about how PineconeClient is being managed. services.AddKeyedTransient( serviceId, (sp, obj) => { - var pineconeClient = apiKey == null ? sp.GetRequiredService() : new Sdk.PineconeClient(apiKey); + var pineconeClient = sp.GetRequiredService(); + var selectedOptions = options ?? sp.GetService(); + + return new PineconeVectorStore( + pineconeClient, + selectedOptions); + }); + + return services; + } + + /// + /// Register a Pinecone with the specified service ID and where is constructed using the provided apikey. + /// + /// The to register the on. + /// The api key for Pinecone. + /// Optional options to further configure the . + /// An optional service id to use as the service key. + /// The kernel builder. + public static IServiceCollection AddPineconeVectorStore(this IServiceCollection services, string apiKey, PineconeVectorStoreOptions? options = default, string? serviceId = default) + { + services.AddKeyedSingleton( + serviceId, + (sp, obj) => + { + var pineconeClient = new Sdk.PineconeClient(apiKey); var selectedOptions = options ?? sp.GetService(); return new PineconeVectorStore( diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs index 6afad06c33f5..213aef587653 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantKernelBuilderExtensions.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using Microsoft.SemanticKernel.Data; +using Qdrant.Client; namespace Microsoft.SemanticKernel.Connectors.Qdrant; @@ -10,7 +11,19 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant; public static class QdrantKernelBuilderExtensions { /// - /// Register a Qdrant with the specified service ID. + /// Register a Qdrant with the specified service ID and where is retrieved from the dependency injection container. + /// + /// The builder to register the on. + /// Optional options to further configure the . + /// An optional service id to use as the service key. + /// The kernel builder. + public static IKernelBuilder AddQdrantVectorStore(this IKernelBuilder builder, QdrantVectorStoreOptions? options = default, string? serviceId = default) + { + builder.Services.AddQdrantVectorStore(options, serviceId); + return builder; + } + /// + /// Register a Qdrant with the specified service ID and where is constructed using the provided parameters. /// /// The builder to register the on. /// The Qdrant service host name. @@ -20,7 +33,7 @@ public static class QdrantKernelBuilderExtensions /// Optional options to further configure the . /// An optional service id to use as the service key. /// The kernel builder. - public static IKernelBuilder AddQdrantVectorStore(this IKernelBuilder builder, string? host = default, int port = 6334, bool https = false, string? apiKey = default, QdrantVectorStoreOptions? options = default, string? serviceId = default) + public static IKernelBuilder AddQdrantVectorStore(this IKernelBuilder builder, string host, int port = 6334, bool https = false, string? apiKey = default, QdrantVectorStoreOptions? options = default, string? serviceId = default) { builder.Services.AddQdrantVectorStore(host, port, https, apiKey, options, serviceId); return builder; diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs index c8ac4479e15f..1b006146aba6 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs @@ -12,7 +12,32 @@ namespace Microsoft.SemanticKernel.Connectors.Qdrant; public static class QdrantServiceCollectionExtensions { /// - /// Register a Qdrant with the specified service ID. + /// Register a Qdrant with the specified service ID and where is retrieved from the dependency injection container. + /// + /// The to register the on. + /// Optional options to further configure the . + /// An optional service id to use as the service key. + /// The kernel builder. + public static IServiceCollection AddQdrantVectorStore(this IServiceCollection services, QdrantVectorStoreOptions? options = default, string? serviceId = default) + { + // If we are not constructing the QdrantClient, add the IVectorStore as transient, since we + // cannot make assumptions about how QdrantClient is being managed. + services.AddKeyedTransient( + serviceId, + (sp, obj) => + { + var qdrantClient = sp.GetRequiredService(); + var selectedOptions = options ?? sp.GetService(); + + return new QdrantVectorStore( + qdrantClient, + selectedOptions); + }); + + return services; + } + /// + /// Register a Qdrant with the specified service ID and where is constructed using the provided parameters. /// /// The to register the on. /// The Qdrant service host name. @@ -22,13 +47,13 @@ public static class QdrantServiceCollectionExtensions /// Optional options to further configure the . /// An optional service id to use as the service key. /// The kernel builder. - public static IServiceCollection AddQdrantVectorStore(this IServiceCollection services, string? host = default, int port = 6334, bool https = false, string? apiKey = default, QdrantVectorStoreOptions? options = default, string? serviceId = default) + public static IServiceCollection AddQdrantVectorStore(this IServiceCollection services, string host, int port = 6334, bool https = false, string? apiKey = default, QdrantVectorStoreOptions? options = default, string? serviceId = default) { - services.AddKeyedTransient( + services.AddKeyedSingleton( serviceId, (sp, obj) => { - var qdrantClient = host == null ? sp.GetRequiredService() : new QdrantClient(host, port, https, apiKey); + var qdrantClient = new QdrantClient(host, port, https, apiKey); var selectedOptions = options ?? sp.GetService(); return new QdrantVectorStore( diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs index 1618d9f7391f..2b20b4d87de2 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisKernelBuilderExtensions.cs @@ -11,14 +11,27 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; public static class RedisKernelBuilderExtensions { /// - /// Register a Redis with the specified service ID. + /// Register a Redis with the specified service ID and where the Redis is retrieved from the dependency injection container. + /// + /// The builder to register the on. + /// Optional options to further configure the . + /// An optional service id to use as the service key. + /// The kernel builder. + public static IKernelBuilder AddRedisVectorStore(this IKernelBuilder builder, RedisVectorStoreOptions? options = default, string? serviceId = default) + { + builder.Services.AddRedisVectorStore(options, serviceId); + return builder; + } + + /// + /// Register a Redis with the specified service ID and where the Redis is constructed using the provided . /// /// The builder to register the on. /// The Redis connection configuration string. If not provided, an instance will be requested from the dependency injection container. /// Optional options to further configure the . /// An optional service id to use as the service key. /// The kernel builder. - public static IKernelBuilder AddRedisVectorStore(this IKernelBuilder builder, string? redisConnectionConfiguration = default, RedisVectorStoreOptions? options = default, string? serviceId = default) + public static IKernelBuilder AddRedisVectorStore(this IKernelBuilder builder, string redisConnectionConfiguration, RedisVectorStoreOptions? options = default, string? serviceId = default) { builder.Services.AddRedisVectorStore(redisConnectionConfiguration, options, serviceId); return builder; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs index 3ca6c03e186f..a2a0c93d5492 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs @@ -12,34 +12,41 @@ namespace Microsoft.SemanticKernel.Connectors.Redis; public static class RedisServiceCollectionExtensions { /// - /// Register a Redis with the specified service ID. + /// Register a Redis with the specified service ID and where the Redis is retrieved from the dependency injection container. /// /// The to register the on. - /// The Redis connection configuration string. If not provided, an instance will be requested from the dependency injection container. /// Optional options to further configure the . /// An optional service id to use as the service key. /// The kernel builder. - public static IServiceCollection AddRedisVectorStore(this IServiceCollection services, string? redisConnectionConfiguration = default, RedisVectorStoreOptions? options = default, string? serviceId = default) + public static IServiceCollection AddRedisVectorStore(this IServiceCollection services, RedisVectorStoreOptions? options = default, string? serviceId = default) { - if (redisConnectionConfiguration == null) - { - // If we are not constructing the ConnectionMultiplexer, add the IVectorStore as transient, since we - // cannot make assumptions about how IDatabase is being managed. - services.AddKeyedTransient( - serviceId, - (sp, obj) => - { - var database = sp.GetRequiredService(); - var selectedOptions = options ?? sp.GetService(); - - return new RedisVectorStore( - database, - selectedOptions); - }); - - return services; - } + // If we are not constructing the ConnectionMultiplexer, add the IVectorStore as transient, since we + // cannot make assumptions about how IDatabase is being managed. + services.AddKeyedTransient( + serviceId, + (sp, obj) => + { + var database = sp.GetRequiredService(); + var selectedOptions = options ?? sp.GetService(); + return new RedisVectorStore( + database, + selectedOptions); + }); + + return services; + } + + /// + /// Register a Redis with the specified service ID and where the Redis is constructed using the provided . + /// + /// The to register the on. + /// The Redis connection configuration string. If not provided, an instance will be requested from the dependency injection container. + /// Optional options to further configure the . + /// An optional service id to use as the service key. + /// The kernel builder. + public static IServiceCollection AddRedisVectorStore(this IServiceCollection services, string redisConnectionConfiguration, RedisVectorStoreOptions? options = default, string? serviceId = default) + { // If we are constructing the ConnectionMultiplexer, add the IVectorStore as singleton, since we are managing the lifetime // of the ConnectionMultiplexer, and the recommendation from StackExchange.Redis is to share the ConnectionMultiplexer. services.AddKeyedSingleton( From 74963d6fe95b41159fa72a5c37d9fb810d21055e Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Wed, 31 Jul 2024 17:51:48 +0100 Subject: [PATCH 42/48] .Net: Removing reflection type checks where possible when using definitions. (#7575) ### Motivation and Context When developers supply a property definition object, the expectation is that this should be used instead of using reflection on the data model to determine the list of properties. This is especially useful when a developer wishes to supply a definition that doesn't look like the data model at all and wants to use a custom mapper to map between the storage and data models. Most default mappers will continue to use reflection and do checks on the data model, since that is their primary mechanism of mapping. See: [#7473](https://github.com/microsoft/semantic-kernel/issues/7473) ### Description - Require the data type of each property on the property definition objects. - Remove checks where we verify that the properties defined in the definition matches those on the data model. - Change validation logic to always use the definition where possible. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...VectorStoreCollectionCreateMappingTests.cs | 27 +- ...ISearchVectorStoreRecordCollectionTests.cs | 47 +++- ...earchVectorStoreCollectionCreateMapping.cs | 5 - ...zureAISearchVectorStoreRecordCollection.cs | 41 +-- .../PineconeVectorStoreRecordCollection.cs | 14 +- .../PineconeVectorStoreRecordMapper.cs | 38 ++- .../QdrantVectorStoreRecordCollection.cs | 26 +- .../QdrantVectorStoreRecordMapper.cs | 34 ++- ...RedisHashSetVectorStoreRecordCollection.cs | 22 +- .../RedisHashSetVectorStoreRecordMapper.cs | 14 +- .../RedisJsonVectorStoreRecordCollection.cs | 47 +--- ...RedisVectorStoreCollectionCreateMapping.cs | 5 - ...VectorStoreCollectionCreateMappingTests.cs | 12 +- .../QdrantVectorStoreRecordCollectionTests.cs | 49 +++- .../QdrantVectorStoreRecordMapperTests.cs | 60 ++-- ...HashSetVectorStoreRecordCollectionTests.cs | 34 ++- ...edisHashSetVectorStoreRecordMapperTests.cs | 32 ++- ...disJsonVectorStoreRecordCollectionTests.cs | 36 ++- ...VectorStoreCollectionCreateMappingTests.cs | 31 +-- ...ineconeVectorStoreRecordCollectionTests.cs | 57 ++++ .../AzureAISearchVectorStoreFixture.cs | 16 +- .../Pinecone/PineconeVectorStoreFixture.cs | 60 ++-- .../Memory/Qdrant/QdrantVectorStoreFixture.cs | 24 +- .../Memory/Redis/RedisVectorStoreFixture.cs | 34 +-- .../Data/VectorStoreRecordPropertyReader.cs | 262 +++++++++++++----- .../VectorStoreRecordDataProperty.cs | 11 +- .../VectorStoreRecordKeyProperty.cs | 6 +- .../VectorStoreRecordProperty.cs | 14 +- .../VectorStoreRecordVectorProperty.cs | 6 +- .../VolatileVectorStoreRecordCollection.cs | 17 +- .../VectorStoreRecordPropertyReaderTests.cs | 125 +++++++-- ...olatileVectorStoreRecordCollectionTests.cs | 6 +- 32 files changed, 771 insertions(+), 441 deletions(-) create mode 100644 dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs index debafaa60e35..cc43e08b7d64 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs @@ -18,7 +18,7 @@ public class AzureAISearchVectorStoreCollectionCreateMappingTests public void MapKeyFieldCreatesSearchableField() { // Arrange - var keyProperty = new VectorStoreRecordKeyProperty("testkey"); + var keyProperty = new VectorStoreRecordKeyProperty("testkey", typeof(string)); var storagePropertyName = "test_key"; // Act @@ -37,7 +37,7 @@ public void MapKeyFieldCreatesSearchableField() public void MapStringDataFieldCreatesSearchableField(bool isFilterable) { // Arrange - var dataProperty = new VectorStoreRecordDataProperty("testdata") { IsFilterable = isFilterable, PropertyType = typeof(string) }; + var dataProperty = new VectorStoreRecordDataProperty("testdata", typeof(string)) { IsFilterable = isFilterable }; var storagePropertyName = "test_data"; // Act @@ -57,7 +57,7 @@ public void MapStringDataFieldCreatesSearchableField(bool isFilterable) public void MapDataFieldCreatesSimpleField(bool isFilterable) { // Arrange - var dataProperty = new VectorStoreRecordDataProperty("testdata") { IsFilterable = isFilterable, PropertyType = typeof(int) }; + var dataProperty = new VectorStoreRecordDataProperty("testdata", typeof(int)) { IsFilterable = isFilterable }; var storagePropertyName = "test_data"; // Act @@ -72,22 +72,11 @@ public void MapDataFieldCreatesSimpleField(bool isFilterable) Assert.Equal(isFilterable, result.IsFilterable); } - [Fact] - public void MapDataFieldFailsForNullType() - { - // Arrange - var dataProperty = new VectorStoreRecordDataProperty("testdata"); - var storagePropertyName = "test_data"; - - // Act & Assert - Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, storagePropertyName)); - } - [Fact] public void MapVectorFieldCreatesVectorSearchField() { // Arrange - var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.DotProductSimilarity }; + var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)) { Dimensions = 10, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.DotProductSimilarity }; var storagePropertyName = "test_vector"; // Act @@ -115,7 +104,7 @@ public void MapVectorFieldCreatesVectorSearchField() public void MapVectorFieldCreatesExpectedAlgoConfigTypes(string indexKind, Type algoConfigType) { // Arrange - var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10, IndexKind = indexKind, DistanceFunction = DistanceFunction.DotProductSimilarity }; + var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)) { Dimensions = 10, IndexKind = indexKind, DistanceFunction = DistanceFunction.DotProductSimilarity }; var storagePropertyName = "test_vector"; // Act @@ -130,7 +119,7 @@ public void MapVectorFieldCreatesExpectedAlgoConfigTypes(string indexKind, Type public void MapVectorFieldDefaultsToHsnwAndCosine() { // Arrange - var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10 }; + var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)) { Dimensions = 10 }; var storagePropertyName = "test_vector"; // Act @@ -146,7 +135,7 @@ public void MapVectorFieldDefaultsToHsnwAndCosine() public void MapVectorFieldThrowsForUnsupportedDistanceFunction() { // Arrange - var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 10, DistanceFunction = DistanceFunction.ManhattanDistance }; + var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)) { Dimensions = 10, DistanceFunction = DistanceFunction.ManhattanDistance }; var storagePropertyName = "test_vector"; // Act @@ -157,7 +146,7 @@ public void MapVectorFieldThrowsForUnsupportedDistanceFunction() public void MapVectorFieldThrowsForMissingDimensionsCount() { // Arrange - var vectorProperty = new VectorStoreRecordVectorProperty("testvector"); + var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)); var storagePropertyName = "test_vector"; // Act diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs index 689461890df2..c303613248f0 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreRecordCollectionTests.cs @@ -206,11 +206,11 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool useCu // Arrange. var storageObject = JsonSerializer.SerializeToNode(CreateModel(TestRecordKey1, false))!.AsObject(); - var expectedSelectFields = useCustomJsonSerializerOptions ? new[] { "key", "storage_data1", "data2" } : new[] { "Key", "storage_data1", "Data2" }; + var expectedSelectFields = useCustomJsonSerializerOptions ? new[] { "storage_data1", "data2", "key" } : new[] { "storage_data1", "Data2", "Key" }; this._searchClientMock.Setup( x => x.GetDocumentAsync( TestRecordKey1, - It.Is(x => x.SelectedFields.SequenceEqual(expectedSelectFields)), + It.IsAny(), this._testCancellationToken)) .ReturnsAsync(Response.FromValue(CreateModel(TestRecordKey1, true), Mock.Of())); @@ -227,6 +227,13 @@ public async Task CanGetRecordWithoutVectorsAsync(bool useDefinition, bool useCu Assert.Equal(TestRecordKey1, actual.Key); Assert.Equal("data 1", actual.Data1); Assert.Equal("data 2", actual.Data2); + + this._searchClientMock.Verify( + x => x.GetDocumentAsync( + TestRecordKey1, + It.Is(x => x.SelectedFields.SequenceEqual(expectedSelectFields)), + this._testCancellationToken), + Times.Once); } [Theory] @@ -519,6 +526,32 @@ await sut.UpsertAsync( Times.Once); } + /// + /// Tests that the collection can be created even if the definition and the type do not match. + /// In this case, the expectation is that a custom mapper will be provided to map between the + /// schema as defined by the definition and the different data model. + /// + [Fact] + public void CanCreateCollectionWithMismatchedDefinitionAndType() + { + // Arrange. + var definition = new VectorStoreRecordDefinition() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Id", typeof(string)), + new VectorStoreRecordDataProperty("Text", typeof(string)), + new VectorStoreRecordVectorProperty("Embedding", typeof(ReadOnlyMemory)) { Dimensions = 4 }, + } + }; + + // Act. + var sut = new AzureAISearchVectorStoreRecordCollection( + this._searchIndexClientMock.Object, + TestCollectionName, + new() { VectorStoreRecordDefinition = definition, JsonObjectCustomMapper = Mock.Of>() }); + } + private AzureAISearchVectorStoreRecordCollection CreateRecordCollection(bool useDefinition, bool useCustomJsonSerializerOptions = false) { return new AzureAISearchVectorStoreRecordCollection( @@ -553,11 +586,11 @@ private static MultiPropsModel CreateModel(string key, bool withVectors) { Properties = [ - new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data1") { PropertyType = typeof(string) }, - new VectorStoreRecordDataProperty("Data2") { PropertyType = typeof(string) }, - new VectorStoreRecordVectorProperty("Vector1") { Dimensions = 4 }, - new VectorStoreRecordVectorProperty("Vector2") { Dimensions = 4 } + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("Data1", typeof(string)), + new VectorStoreRecordDataProperty("Data2", typeof(string)), + new VectorStoreRecordVectorProperty("Vector1", typeof(ReadOnlyMemory)) { Dimensions = 4 }, + new VectorStoreRecordVectorProperty("Vector2", typeof(ReadOnlyMemory)) { Dimensions = 4 } ] }; diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs index 106b3fae1c60..7b832c667111 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs @@ -39,11 +39,6 @@ public static SimpleField MapDataField(VectorStoreRecordDataProperty dataPropert return new SearchableField(storagePropertyName) { IsFilterable = dataProperty.IsFilterable }; } - if (dataProperty.PropertyType is null) - { - throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' must be set to create a collection."); - } - return new SimpleField(storagePropertyName, AzureAISearchVectorStoreCollectionCreateMapping.GetSDKFieldDataType(dataProperty.PropertyType)) { IsFilterable = dataProperty.IsFilterable }; } diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index 959da9f5e51f..7a5fa4b02ed8 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Reflection; using System.Runtime.CompilerServices; using System.Text.Json; using System.Text.Json.Nodes; @@ -110,42 +109,22 @@ public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexCli this._options = options ?? new AzureAISearchVectorStoreRecordCollectionOptions(); this._searchClient = this._searchIndexClient.GetSearchClient(collectionName); this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); - - // Enumerate public properties using configuration or attributes. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; - if (this._options.VectorStoreRecordDefinition is not null) - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: true); - } - else - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); - } + var jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; // Validate property types. - var jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; + var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, this._vectorStoreRecordDefinition, supportsMultipleVectors: true, requiresAtLeastOneVector: false); VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); - // Get storage name for key property and store for later use. - this._keyStoragePropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, properties.keyProperty); - this._nonVectorStoragePropertyNames.Add(this._keyStoragePropertyName); - - // Get storage names for data properties and store for later use. - foreach (var property in properties.dataProperties) - { - var jsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, property); - this._storagePropertyNames[property.Name] = jsonPropertyName; - this._nonVectorStoragePropertyNames.Add(jsonPropertyName); - } - - // Get storage names for vector properties and store for later use. - foreach (var property in properties.vectorProperties) - { - var jsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(jsonSerializerOptions, property); - this._storagePropertyNames[property.Name] = jsonPropertyName; - } + // Get storage names and store for later use. + this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToJsonPropertyNameMap(properties, typeof(TRecord), jsonSerializerOptions); + this._keyStoragePropertyName = this._storagePropertyNames[properties.keyProperty.DataModelPropertyName]; + this._nonVectorStoragePropertyNames = properties.dataProperties + .Cast() + .Concat([properties.keyProperty]) + .Select(x => this._storagePropertyNames[x.DataModelPropertyName]) + .ToList(); } /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs index 8a353ed6ea96..323681f629be 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordCollection.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Reflection; using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; @@ -61,18 +60,7 @@ public PineconeVectorStoreRecordCollection(Sdk.PineconeClient pineconeClient, st if (this._options.VectorCustomMapper is null) { - (PropertyInfo KeyProperty, List DataProperties, List VectorProperties) properties; - if (this._options.VectorStoreRecordDefinition is not null) - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: false); - } - else - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: false); - } - - var storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._options.VectorStoreRecordDefinition); - this._mapper = new PineconeVectorStoreRecordMapper(properties.KeyProperty, properties.DataProperties, properties.VectorProperties, storagePropertyNames); + this._mapper = new PineconeVectorStoreRecordMapper(this._vectorStoreRecordDefinition); } else { diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs index 5eb3e6d5e8ca..0c9987fdae7a 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs @@ -65,27 +65,25 @@ internal sealed class PineconeVectorStoreRecordMapper : IVectorStoreRec /// /// Initializes a new instance of the class. /// - /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. - /// A list of property info objects that point at the data properties in the current model, and allows easy reading and writing of these properties. - /// A list of property info objects that point at the vector properties in the current model, and allows easy reading and writing of these properties. - /// A dictionary that maps from a property name to the configured name that should be used when storing it. - public PineconeVectorStoreRecordMapper(PropertyInfo keyProperty, List dataProperties, List vectorProperties, Dictionary storagePropertyNames) + /// The record definition that defines the schema of the record type. + public PineconeVectorStoreRecordMapper( + VectorStoreRecordDefinition vectorStoreRecordDefinition) { - Verify.True(vectorProperties.Count == 1, "There should be exactly one vector property in the data model."); - - VectorStoreRecordPropertyReader.VerifyPropertyTypes([keyProperty], s_supportedKeyTypes, "Key"); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(dataProperties, s_supportedDataTypes, "Data", s_supportedEnumerableDataElementTypes); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(vectorProperties, s_supportedVectorTypes, "Vector"); - - this._keyPropertyInfo = keyProperty; - this._dataPropertiesInfo = dataProperties; - this._vectorPropertyInfo = vectorProperties[0]; - this._storagePropertyNames = storagePropertyNames; - - foreach (var property in dataProperties.Concat(vectorProperties).Concat([keyProperty])) - { - this._jsonPropertyNames[property.Name] = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, property); - } + // Validate property types. + var propertiesInfo = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), vectorStoreRecordDefinition, supportsMultipleVectors: false); + VectorStoreRecordPropertyReader.VerifyPropertyTypes([propertiesInfo.keyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.dataProperties, s_supportedDataTypes, s_supportedEnumerableDataElementTypes, "Data"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.vectorProperties, s_supportedVectorTypes, "Vector"); + + // Assign. + this._keyPropertyInfo = propertiesInfo.keyProperty; + this._dataPropertiesInfo = propertiesInfo.dataProperties; + this._vectorPropertyInfo = propertiesInfo.vectorProperties[0]; + + // Get storage names and store for later use. + var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, vectorStoreRecordDefinition, supportsMultipleVectors: false, requiresAtLeastOneVector: true); + this._jsonPropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToJsonPropertyNameMap(properties, typeof(TRecord), JsonSerializerOptions.Default); + this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties); } /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index d4f63c719ebf..5d3f26f94d08 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Reflection; using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; @@ -90,22 +89,12 @@ internal QdrantVectorStoreRecordCollection(MockableQdrantClient qdrantClient, st this._options = options ?? new QdrantVectorStoreRecordCollectionOptions(); this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); - // Enumerate public properties using configuration or attributes. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; - if (this._options.VectorStoreRecordDefinition is not null) - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: this._options.HasNamedVectors); - } - else - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: this._options.HasNamedVectors); - } - - // Validate key property types. + // Validate property types. + var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, this._vectorStoreRecordDefinition, supportsMultipleVectors: this._options.HasNamedVectors, requiresAtLeastOneVector: !this._options.HasNamedVectors); VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); // Build a map of property names to storage names. - this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._options.VectorStoreRecordDefinition); + this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties); // Assign Mapper. if (this._options.PointStructCustomMapper is not null) @@ -117,10 +106,8 @@ internal QdrantVectorStoreRecordCollection(MockableQdrantClient qdrantClient, st { // Default Mapper. this._mapper = new QdrantVectorStoreRecordMapper( + this._vectorStoreRecordDefinition, this._options.HasNamedVectors, - properties.keyProperty, - properties.dataProperties, - properties.vectorProperties, this._storagePropertyNames); } } @@ -176,11 +163,6 @@ await this.RunOperationAsync( var dataProperties = this._vectorStoreRecordDefinition.Properties.Where(x => x is VectorStoreRecordDataProperty).Select(x => (VectorStoreRecordDataProperty)x).Where(x => x.IsFilterable); foreach (var dataProperty in dataProperties) { - if (dataProperty.PropertyType is null) - { - throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' must be set to create a collection, since the property is filterable."); - } - var storageFieldName = this._storagePropertyNames[dataProperty.DataModelPropertyName]; var schemaType = QdrantVectorStoreCollectionCreateMapping.s_schemaTypeMap[dataProperty.PropertyType!]; diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index b40dacbb47e7..88dfbc0a7679 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -69,27 +69,31 @@ internal sealed class QdrantVectorStoreRecordMapper : IVectorStoreRecor /// /// Initializes a new instance of the class. /// + /// The record definition that defines the schema of the record type. /// A value indicating whether the vectors in the store are named, or whether there is just a single unnamed vector per qdrant point. - /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. - /// A list of property info objects that point at the data properties in the current model, and allows easy reading and writing of these properties. - /// A list of property info objects that point at the vector properties in the current model, and allows easy reading and writing of these properties. /// A dictionary that maps from a property name to the configured name that should be used when storing it. - public QdrantVectorStoreRecordMapper(bool hasNamedVectors, PropertyInfo keyProperty, List dataProperties, List vectorProperties, Dictionary storagePropertyNames) + public QdrantVectorStoreRecordMapper( + VectorStoreRecordDefinition vectorStoreRecordDefinition, + bool hasNamedVectors, + Dictionary storagePropertyNames) { + Verify.NotNull(vectorStoreRecordDefinition); + Verify.NotNull(storagePropertyNames); + + // Validate property types. + var propertiesInfo = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), vectorStoreRecordDefinition, supportsMultipleVectors: hasNamedVectors); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.vectorProperties, s_supportedVectorTypes, "Vector"); + + // Assign. this._hasNamedVectors = hasNamedVectors; - this._keyPropertyInfo = keyProperty; - this._dataPropertiesInfo = dataProperties; - this._vectorPropertiesInfo = vectorProperties; + this._keyPropertyInfo = propertiesInfo.keyProperty; + this._dataPropertiesInfo = propertiesInfo.dataProperties; + this._vectorPropertiesInfo = propertiesInfo.vectorProperties; this._storagePropertyNames = storagePropertyNames; - // Validate property types and store for later use. - VectorStoreRecordPropertyReader.VerifyPropertyTypes(dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(vectorProperties, s_supportedVectorTypes, "Vector"); - - foreach (var property in dataProperties.Concat(vectorProperties).Concat([keyProperty])) - { - this._jsonPropertyNames[property.Name] = VectorStoreRecordPropertyReader.GetJsonPropertyName(JsonSerializerOptions.Default, property); - } + // Get json storage names and store for later use. + this._jsonPropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToJsonPropertyNameMap(propertiesInfo, typeof(TRecord), JsonSerializerOptions.Default); } /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs index ea7d15c38e60..84c15d498375 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Reflection; using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; @@ -102,26 +101,17 @@ public RedisHashSetVectorStoreRecordCollection(IDatabase database, string collec this._options = options ?? new RedisHashSetVectorStoreRecordCollectionOptions(); this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); - // Enumerate public properties using configuration or attributes. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; - if (this._options.VectorStoreRecordDefinition is not null) - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: true); - } - else - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); - } - - // Validate property types and store for later use. + // Validate property types. + var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, this._vectorStoreRecordDefinition, supportsMultipleVectors: true, requiresAtLeastOneVector: false); VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data"); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); - this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._options.VectorStoreRecordDefinition); + // Lookup storage property names. + this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties); this._dataStoragePropertyNames = properties .dataProperties - .Select(x => this._storagePropertyNames[x.Name]) + .Select(x => this._storagePropertyNames[x.DataModelPropertyName]) .Select(RedisValue.Unbox) .ToArray(); @@ -132,7 +122,7 @@ public RedisHashSetVectorStoreRecordCollection(IDatabase database, string collec } else { - this._mapper = new RedisHashSetVectorStoreRecordMapper(properties.keyProperty, properties.dataProperties, properties.vectorProperties, this._storagePropertyNames); + this._mapper = new RedisHashSetVectorStoreRecordMapper(this._vectorStoreRecordDefinition, this._storagePropertyNames); } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordMapper.cs index 60b0700edb99..ef31bf09f475 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordMapper.cs @@ -40,21 +40,17 @@ internal sealed class RedisHashSetVectorStoreRecordMapper : /// /// Initializes a new instance of the class. /// - /// The property info object that points at the key property for the current model. - /// The property info objects that point at the payload properties in the current model. - /// The property info objects that point at the vector properties in the current model. + /// The record definition that defines the schema of the record type. /// A dictionary that maps from a property name to the configured name that should be used when storing it. public RedisHashSetVectorStoreRecordMapper( - PropertyInfo keyPropertyInfo, - IEnumerable dataPropertiesInfo, - IEnumerable vectorPropertiesInfo, + VectorStoreRecordDefinition vectorStoreRecordDefinition, Dictionary storagePropertyNames) { - Verify.NotNull(keyPropertyInfo); - Verify.NotNull(dataPropertiesInfo); - Verify.NotNull(vectorPropertiesInfo); + Verify.NotNull(vectorStoreRecordDefinition); Verify.NotNull(storagePropertyNames); + (PropertyInfo keyPropertyInfo, List dataPropertiesInfo, List vectorPropertiesInfo) = VectorStoreRecordPropertyReader.FindProperties(typeof(TConsumerDataModel), vectorStoreRecordDefinition, supportsMultipleVectors: true); + this._keyPropertyInfo = keyPropertyInfo; this._dataPropertiesInfo = dataPropertiesInfo; this._vectorPropertiesInfo = vectorPropertiesInfo; diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs index ccb2d8467215..7cb36092c223 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Reflection; using System.Runtime.CompilerServices; using System.Text.Json; using System.Text.Json.Nodes; @@ -57,12 +56,6 @@ public sealed class RedisJsonVectorStoreRecordCollection : IVectorStore /// A definition of the current storage model. private readonly VectorStoreRecordDefinition _vectorStoreRecordDefinition; - /// A property info object that points at the key property for the current model, allowing easy reading and writing of this property. - private readonly PropertyInfo _keyPropertyInfo; - - /// The name of the temporary JSON property that the key property will be serialized / parsed from. - private readonly string _keyJsonPropertyName; - /// An array of the storage names of all the data properties that are part of the Redis payload, i.e. all properties except the key and vector properties. private readonly string[] _dataStoragePropertyNames; @@ -95,38 +88,20 @@ public RedisJsonVectorStoreRecordCollection(IDatabase database, string collectio this._jsonSerializerOptions = this._options.JsonSerializerOptions ?? JsonSerializerOptions.Default; this._vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); - // Enumerate public properties using configuration or attributes. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; - if (this._options.VectorStoreRecordDefinition is not null) - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: true); - } - else - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); - } - - // Validate property types and store for later use. + // Validate property types. + var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, this._vectorStoreRecordDefinition, supportsMultipleVectors: true, requiresAtLeastOneVector: false); VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); - this._keyPropertyInfo = properties.keyProperty; - this._keyJsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(this._jsonSerializerOptions, this._keyPropertyInfo); - - this._dataStoragePropertyNames = new string[properties.dataProperties.Count]; - var index = 0; - foreach (var property in properties.dataProperties) - { - var storagePropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(this._jsonSerializerOptions, property); - this._storagePropertyNames[property.Name] = storagePropertyName; - this._dataStoragePropertyNames[index++] = storagePropertyName; - } + // Lookup json storage property names. + var keyJsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(properties.keyProperty, typeof(TRecord), this._jsonSerializerOptions); - foreach (var property in properties.vectorProperties) - { - var storagePropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(this._jsonSerializerOptions, property); - this._storagePropertyNames[property.Name] = storagePropertyName; - } + // Lookup storage property names. + this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToJsonPropertyNameMap(properties, typeof(TRecord), this._jsonSerializerOptions); + this._dataStoragePropertyNames = properties + .dataProperties + .Select(x => this._storagePropertyNames[x.DataModelPropertyName]) + .ToArray(); // Assign Mapper. if (this._options.JsonNodeCustomMapper is not null) @@ -135,7 +110,7 @@ public RedisJsonVectorStoreRecordCollection(IDatabase database, string collectio } else { - this._mapper = new RedisJsonVectorStoreRecordMapper(this._keyJsonPropertyName, this._jsonSerializerOptions); + this._mapper = new RedisJsonVectorStoreRecordMapper(keyJsonPropertyName, this._jsonSerializerOptions); } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs index a4d198bb7a92..ae0ec760d3e7 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs @@ -65,11 +65,6 @@ public static Schema MapToSchema(IEnumerable properti // Data property. if (property is VectorStoreRecordDataProperty dataProperty && dataProperty.IsFilterable) { - if (dataProperty.PropertyType is null) - { - throw new InvalidOperationException($"Property {nameof(dataProperty.PropertyType)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' must be set to create a collection, since the property is filterable."); - } - var storageName = storagePropertyNames[dataProperty.DataModelPropertyName]; if (dataProperty.PropertyType == typeof(string)) diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreCollectionCreateMappingTests.cs index 6a7b9c705b8a..37cd1d8af53f 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreCollectionCreateMappingTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreCollectionCreateMappingTests.cs @@ -17,7 +17,7 @@ public class QdrantVectorStoreCollectionCreateMappingTests public void MapSingleVectorCreatesVectorParams() { // Arrange. - var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 4, DistanceFunction = DistanceFunction.DotProductSimilarity }; + var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)) { Dimensions = 4, DistanceFunction = DistanceFunction.DotProductSimilarity }; // Act. var actual = QdrantVectorStoreCollectionCreateMapping.MapSingleVector(vectorProperty); @@ -32,7 +32,7 @@ public void MapSingleVectorCreatesVectorParams() public void MapSingleVectorDefaultsToCosine() { // Arrange. - var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 4 }; + var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)) { Dimensions = 4 }; // Act. var actual = QdrantVectorStoreCollectionCreateMapping.MapSingleVector(vectorProperty); @@ -45,7 +45,7 @@ public void MapSingleVectorDefaultsToCosine() public void MapSingleVectorThrowsForUnsupportedDistanceFunction() { // Arrange. - var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = 4, DistanceFunction = DistanceFunction.CosineDistance }; + var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)) { Dimensions = 4, DistanceFunction = DistanceFunction.CosineDistance }; // Act and assert. Assert.Throws(() => QdrantVectorStoreCollectionCreateMapping.MapSingleVector(vectorProperty)); @@ -57,7 +57,7 @@ public void MapSingleVectorThrowsForUnsupportedDistanceFunction() public void MapSingleVectorThrowsIfDimensionsIsInvalid(int? dimensions) { // Arrange. - var vectorProperty = new VectorStoreRecordVectorProperty("testvector") { Dimensions = dimensions }; + var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)) { Dimensions = dimensions }; // Act and assert. Assert.Throws(() => QdrantVectorStoreCollectionCreateMapping.MapSingleVector(vectorProperty)); @@ -69,8 +69,8 @@ public void MapNamedVectorsCreatesVectorParamsMap() // Arrange. var vectorProperties = new VectorStoreRecordVectorProperty[] { - new("testvector1") { Dimensions = 10, DistanceFunction = DistanceFunction.EuclideanDistance }, - new("testvector2") { Dimensions = 20 } + new("testvector1", typeof(ReadOnlyMemory)) { Dimensions = 10, DistanceFunction = DistanceFunction.EuclideanDistance }, + new("testvector2", typeof(ReadOnlyMemory)) { Dimensions = 20 } }; var storagePropertyNames = new Dictionary diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs index b22f401d00cf..f2508d59d03a 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs @@ -513,6 +513,32 @@ await sut.UpsertAsync( Times.Once); } + /// + /// Tests that the collection can be created even if the definition and the type do not match. + /// In this case, the expectation is that a custom mapper will be provided to map between the + /// schema as defined by the definition and the different data model. + /// + [Fact] + public void CanCreateCollectionWithMismatchedDefinitionAndType() + { + // Arrange. + var definition = new VectorStoreRecordDefinition() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Id", typeof(ulong)), + new VectorStoreRecordDataProperty("Text", typeof(string)), + new VectorStoreRecordVectorProperty("Embedding", typeof(ReadOnlyMemory)) { Dimensions = 4 }, + } + }; + + // Act. + var sut = new QdrantVectorStoreRecordCollection>( + this._qdrantClientMock.Object, + TestCollectionName, + new() { VectorStoreRecordDefinition = definition, PointStructCustomMapper = Mock.Of, PointStruct>>() }); + } + private void SetupRetrieveMock(List retrievedPoints) { this._qdrantClientMock @@ -626,7 +652,7 @@ private IVectorStoreRecordCollection> CreateRecordCollect TestCollectionName, new() { - VectorStoreRecordDefinition = useDefinition ? this._singlePropsDefinition : null, + VectorStoreRecordDefinition = useDefinition ? CreateSinglePropsDefinition(typeof(T)) : null, HasNamedVectors = hasNamedVectors }) as IVectorStoreRecordCollection>; return store!; @@ -644,16 +670,19 @@ private static SinglePropsModel CreateModel(T key, bool withVectors) }; } - private readonly VectorStoreRecordDefinition _singlePropsDefinition = new() + private static VectorStoreRecordDefinition CreateSinglePropsDefinition(Type keyType) { - Properties = - [ - new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("OriginalNameData") { IsFilterable = true }, - new VectorStoreRecordDataProperty("Data") { IsFilterable = true, StoragePropertyName = "data_storage_name" }, - new VectorStoreRecordVectorProperty("Vector") { StoragePropertyName = "vector_storage_name" } - ] - }; + return new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key", keyType), + new VectorStoreRecordDataProperty("OriginalNameData", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("Data", typeof(string)) { IsFilterable = true, StoragePropertyName = "data_storage_name" }, + new VectorStoreRecordVectorProperty("Vector", typeof(ReadOnlyMemory)) { StoragePropertyName = "vector_storage_name" } + ] + }; + } public sealed class SinglePropsModel { diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs index 68cf620be29a..68ff1d46a86b 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordMapperTests.cs @@ -3,7 +3,6 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Reflection; using System.Text.Json.Serialization; using Microsoft.SemanticKernel.Connectors.Qdrant; using Microsoft.SemanticKernel.Data; @@ -23,8 +22,8 @@ public class QdrantVectorStoreRecordMapperTests public void MapsSinglePropsFromDataToStorageModelWithUlong(bool hasNamedVectors) { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors: hasNamedVectors); - var sut = new QdrantVectorStoreRecordMapper>(hasNamedVectors, keyProperty, dataProperties, vectorProperties, s_singlePropsModelStorageNamesMap); + var definition = CreateSinglePropsVectorStoreRecordDefinition(typeof(ulong)); + var sut = new QdrantVectorStoreRecordMapper>(definition, hasNamedVectors, s_singlePropsModelStorageNamesMap); // Act. var actual = sut.MapFromDataToStorageModel(CreateSinglePropsModel(5ul)); @@ -51,8 +50,8 @@ public void MapsSinglePropsFromDataToStorageModelWithUlong(bool hasNamedVectors) public void MapsSinglePropsFromDataToStorageModelWithGuid(bool hasNamedVectors) { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors: hasNamedVectors); - var sut = new QdrantVectorStoreRecordMapper>(hasNamedVectors, keyProperty, dataProperties, vectorProperties, s_singlePropsModelStorageNamesMap); + var definition = CreateSinglePropsVectorStoreRecordDefinition(typeof(Guid)); + var sut = new QdrantVectorStoreRecordMapper>(definition, hasNamedVectors, s_singlePropsModelStorageNamesMap); // Act. var actual = sut.MapFromDataToStorageModel(CreateSinglePropsModel(Guid.Parse("11111111-1111-1111-1111-111111111111"))); @@ -72,8 +71,8 @@ public void MapsSinglePropsFromDataToStorageModelWithGuid(bool hasNamedVectors) public void MapsSinglePropsFromStorageToDataModelWithUlong(bool hasNamedVectors, bool includeVectors) { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors: hasNamedVectors); - var sut = new QdrantVectorStoreRecordMapper>(hasNamedVectors, keyProperty, dataProperties, vectorProperties, s_singlePropsModelStorageNamesMap); + var definition = CreateSinglePropsVectorStoreRecordDefinition(typeof(ulong)); + var sut = new QdrantVectorStoreRecordMapper>(definition, hasNamedVectors, s_singlePropsModelStorageNamesMap); // Act. var actual = sut.MapFromStorageToDataModel(CreateSinglePropsPointStruct(5, hasNamedVectors), new() { IncludeVectors = includeVectors }); @@ -101,8 +100,8 @@ public void MapsSinglePropsFromStorageToDataModelWithUlong(bool hasNamedVectors, public void MapsSinglePropsFromStorageToDataModelWithGuid(bool hasNamedVectors, bool includeVectors) { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors: hasNamedVectors); - var sut = new QdrantVectorStoreRecordMapper>(hasNamedVectors, keyProperty, dataProperties, vectorProperties, s_singlePropsModelStorageNamesMap); + var definition = CreateSinglePropsVectorStoreRecordDefinition(typeof(Guid)); + var sut = new QdrantVectorStoreRecordMapper>(definition, hasNamedVectors, s_singlePropsModelStorageNamesMap); // Act. var actual = sut.MapFromStorageToDataModel(CreateSinglePropsPointStruct(Guid.Parse("11111111-1111-1111-1111-111111111111"), hasNamedVectors), new() { IncludeVectors = includeVectors }); @@ -126,8 +125,8 @@ public void MapsSinglePropsFromStorageToDataModelWithGuid(bool hasNamedVectors, public void MapsMultiPropsFromDataToStorageModelWithUlong() { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), supportsMultipleVectors: true); - var sut = new QdrantVectorStoreRecordMapper>(true, keyProperty, dataProperties, vectorProperties, s_multiPropsModelStorageNamesMap); + var definition = CreateMultiPropsVectorStoreRecordDefinition(typeof(ulong)); + var sut = new QdrantVectorStoreRecordMapper>(definition, true, s_multiPropsModelStorageNamesMap); // Act. var actual = sut.MapFromDataToStorageModel(CreateMultiPropsModel(5ul)); @@ -151,8 +150,8 @@ public void MapsMultiPropsFromDataToStorageModelWithUlong() public void MapsMultiPropsFromDataToStorageModelWithGuid() { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), supportsMultipleVectors: true); - var sut = new QdrantVectorStoreRecordMapper>(true, keyProperty, dataProperties, vectorProperties, s_multiPropsModelStorageNamesMap); + var definition = CreateMultiPropsVectorStoreRecordDefinition(typeof(Guid)); + var sut = new QdrantVectorStoreRecordMapper>(definition, true, s_multiPropsModelStorageNamesMap); // Act. var actual = sut.MapFromDataToStorageModel(CreateMultiPropsModel(Guid.Parse("11111111-1111-1111-1111-111111111111"))); @@ -178,8 +177,8 @@ public void MapsMultiPropsFromDataToStorageModelWithGuid() public void MapsMultiPropsFromStorageToDataModelWithUlong(bool includeVectors) { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), supportsMultipleVectors: true); - var sut = new QdrantVectorStoreRecordMapper>(true, keyProperty, dataProperties, vectorProperties, s_multiPropsModelStorageNamesMap); + var definition = CreateMultiPropsVectorStoreRecordDefinition(typeof(ulong)); + var sut = new QdrantVectorStoreRecordMapper>(definition, true, s_multiPropsModelStorageNamesMap); // Act. var actual = sut.MapFromStorageToDataModel(CreateMultiPropsPointStruct(5), new() { IncludeVectors = includeVectors }); @@ -213,8 +212,8 @@ public void MapsMultiPropsFromStorageToDataModelWithUlong(bool includeVectors) public void MapsMultiPropsFromStorageToDataModelWithGuid(bool includeVectors) { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), supportsMultipleVectors: true); - var sut = new QdrantVectorStoreRecordMapper>(true, keyProperty, dataProperties, vectorProperties, s_multiPropsModelStorageNamesMap); + var definition = CreateMultiPropsVectorStoreRecordDefinition(typeof(Guid)); + var sut = new QdrantVectorStoreRecordMapper>(definition, true, s_multiPropsModelStorageNamesMap); // Act. var actual = sut.MapFromStorageToDataModel(CreateMultiPropsPointStruct(Guid.Parse("11111111-1111-1111-1111-111111111111")), new() { IncludeVectors = includeVectors }); @@ -348,6 +347,16 @@ private static void AddDataToMultiPropsPointStruct(PointStruct pointStruct) { "Vector", "vector" }, }; + private static VectorStoreRecordDefinition CreateSinglePropsVectorStoreRecordDefinition(Type keyType) => new() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Key", keyType), + new VectorStoreRecordDataProperty("Data", typeof(string)), + new VectorStoreRecordVectorProperty("Vector", typeof(ReadOnlyMemory)), + }, + }; + private sealed class SinglePropsModel { [VectorStoreRecordKey] @@ -376,6 +385,23 @@ private sealed class SinglePropsModel { "Vector2", "vector2" }, }; + private static VectorStoreRecordDefinition CreateMultiPropsVectorStoreRecordDefinition(Type keyType) => new() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Key", keyType), + new VectorStoreRecordDataProperty("DataString", typeof(string)), + new VectorStoreRecordDataProperty("DataInt", typeof(int)), + new VectorStoreRecordDataProperty("DataLong", typeof(long)), + new VectorStoreRecordDataProperty("DataFloat", typeof(float)), + new VectorStoreRecordDataProperty("DataDouble", typeof(double)), + new VectorStoreRecordDataProperty("DataBool", typeof(bool)), + new VectorStoreRecordDataProperty("DataArrayInt", typeof(List)), + new VectorStoreRecordVectorProperty("Vector1", typeof(ReadOnlyMemory)), + new VectorStoreRecordVectorProperty("Vector2", typeof(ReadOnlyMemory)), + }, + }; + private sealed class MultiPropsModel { [VectorStoreRecordKey] diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs index 152ae5920195..112b81ce01c7 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs @@ -415,6 +415,32 @@ public async Task CanUpsertRecordWithCustomMapperAsync() Times.Once); } + /// + /// Tests that the collection can be created even if the definition and the type do not match. + /// In this case, the expectation is that a custom mapper will be provided to map between the + /// schema as defined by the definition and the different data model. + /// + [Fact] + public void CanCreateCollectionWithMismatchedDefinitionAndType() + { + // Arrange. + var definition = new VectorStoreRecordDefinition() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Id", typeof(string)), + new VectorStoreRecordDataProperty("Text", typeof(string)), + new VectorStoreRecordVectorProperty("Embedding", typeof(ReadOnlyMemory)) { Dimensions = 4 }, + } + }; + + // Act. + var sut = new RedisHashSetVectorStoreRecordCollection( + this._redisDatabaseMock.Object, + TestCollectionName, + new() { VectorStoreRecordDefinition = definition, HashEntriesCustomMapper = Mock.Of>() }); + } + private RedisHashSetVectorStoreRecordCollection CreateRecordCollection(bool useDefinition) { return new RedisHashSetVectorStoreRecordCollection( @@ -480,10 +506,10 @@ private static SinglePropsModel CreateModel(string key, bool withVectors) { Properties = [ - new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("OriginalNameData"), - new VectorStoreRecordDataProperty("Data") { StoragePropertyName = "data_storage_name" }, - new VectorStoreRecordVectorProperty("Vector") { StoragePropertyName = "vector_storage_name" } + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("OriginalNameData", typeof(string)), + new VectorStoreRecordDataProperty("Data", typeof(string)) { StoragePropertyName = "data_storage_name" }, + new VectorStoreRecordVectorProperty("Vector", typeof(ReadOnlyMemory)) { StoragePropertyName = "vector_storage_name" } ] }; diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordMapperTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordMapperTests.cs index cb1a13de8822..fd7a56d8765c 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordMapperTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordMapperTests.cs @@ -2,7 +2,6 @@ using System; using System.Collections.Generic; -using System.Reflection; using System.Runtime.InteropServices; using Microsoft.SemanticKernel.Connectors.Redis; using Microsoft.SemanticKernel.Data; @@ -20,8 +19,7 @@ public sealed class RedisHashSetVectorStoreRecordMapperTests public void MapsAllFieldsFromDataToStorageModel() { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(AllTypesModel), supportsMultipleVectors: true); - var sut = new RedisHashSetVectorStoreRecordMapper(keyProperty, dataProperties, vectorProperties, s_storagePropertyNames); + var sut = new RedisHashSetVectorStoreRecordMapper(s_vectorStoreRecordDefinition, s_storagePropertyNames); // Act. var actual = sut.MapFromDataToStorageModel(CreateModel("test key")); @@ -86,8 +84,7 @@ public void MapsAllFieldsFromDataToStorageModel() public void MapsAllFieldsFromStorageToDataModel() { // Arrange. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) = VectorStoreRecordPropertyReader.FindProperties(typeof(AllTypesModel), supportsMultipleVectors: true); - var sut = new RedisHashSetVectorStoreRecordMapper(keyProperty, dataProperties, vectorProperties, s_storagePropertyNames); + var sut = new RedisHashSetVectorStoreRecordMapper(s_vectorStoreRecordDefinition, s_storagePropertyNames); // Act. var actual = sut.MapFromStorageToDataModel(("test key", CreateHashSet()), new() { IncludeVectors = true }); @@ -185,6 +182,31 @@ private static HashEntry[] CreateHashSet() ["DoubleVector"] = "DoubleVector", }; + private static readonly VectorStoreRecordDefinition s_vectorStoreRecordDefinition = new() + { + Properties = new List() + { + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("StringData", typeof(string)), + new VectorStoreRecordDataProperty("IntData", typeof(int)), + new VectorStoreRecordDataProperty("UIntData", typeof(uint)), + new VectorStoreRecordDataProperty("LongData", typeof(long)), + new VectorStoreRecordDataProperty("ULongData", typeof(ulong)), + new VectorStoreRecordDataProperty("DoubleData", typeof(double)), + new VectorStoreRecordDataProperty("FloatData", typeof(float)), + new VectorStoreRecordDataProperty("BoolData", typeof(bool)), + new VectorStoreRecordDataProperty("NullableIntData", typeof(int?)), + new VectorStoreRecordDataProperty("NullableUIntData", typeof(uint?)), + new VectorStoreRecordDataProperty("NullableLongData", typeof(long?)), + new VectorStoreRecordDataProperty("NullableULongData", typeof(ulong?)), + new VectorStoreRecordDataProperty("NullableDoubleData", typeof(double?)), + new VectorStoreRecordDataProperty("NullableFloatData", typeof(float?)), + new VectorStoreRecordDataProperty("NullableBoolData", typeof(bool?)), + new VectorStoreRecordVectorProperty("FloatVector", typeof(float)), + new VectorStoreRecordVectorProperty("DoubleVector", typeof(double)), + } + }; + private sealed class AllTypesModel { [VectorStoreRecordKey] diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs index 201a7c3fdd02..64d16b2fc244 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs @@ -438,6 +438,32 @@ public async Task CanUpsertRecordWithCustomMapperAsync() Times.Once); } + /// + /// Tests that the collection can be created even if the definition and the type do not match. + /// In this case, the expectation is that a custom mapper will be provided to map between the + /// schema as defined by the definition and the different data model. + /// + [Fact] + public void CanCreateCollectionWithMismatchedDefinitionAndType() + { + // Arrange. + var definition = new VectorStoreRecordDefinition() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Id", typeof(string)), + new VectorStoreRecordDataProperty("Text", typeof(string)), + new VectorStoreRecordVectorProperty("Embedding", typeof(ReadOnlyMemory)) { Dimensions = 4 }, + } + }; + + // Act. + var sut = new RedisJsonVectorStoreRecordCollection( + this._redisDatabaseMock.Object, + TestCollectionName, + new() { VectorStoreRecordDefinition = definition, JsonNodeCustomMapper = Mock.Of>() }); + } + private RedisJsonVectorStoreRecordCollection CreateRecordCollection(bool useDefinition, bool useCustomJsonSerializerOptions = false) { return new RedisJsonVectorStoreRecordCollection( @@ -510,11 +536,11 @@ private static MultiPropsModel CreateModel(string key, bool withVectors) { Properties = [ - new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data1") { IsFilterable = true, PropertyType = typeof(string), StoragePropertyName = "ignored_data1_storage_name" }, - new VectorStoreRecordDataProperty("Data2") { IsFilterable = true, PropertyType = typeof(string) }, - new VectorStoreRecordVectorProperty("Vector1") { Dimensions = 4, StoragePropertyName = "ignored_vector1_storage_name" }, - new VectorStoreRecordVectorProperty("Vector2") { Dimensions = 4 } + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("Data1", typeof(string)) { IsFilterable = true, StoragePropertyName = "ignored_data1_storage_name" }, + new VectorStoreRecordDataProperty("Data2", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordVectorProperty("Vector1", typeof(ReadOnlyMemory)) { Dimensions = 4, StoragePropertyName = "ignored_vector1_storage_name" }, + new VectorStoreRecordVectorProperty("Vector2", typeof(ReadOnlyMemory)) { Dimensions = 4 } ] }; diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs index f487656b43e0..eae7028f24ac 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs @@ -20,16 +20,16 @@ public void MapToSchemaCreatesSchema() // Arrange. var properties = new VectorStoreRecordProperty[] { - new VectorStoreRecordKeyProperty("Key"), + new VectorStoreRecordKeyProperty("Key", typeof(string)), - new VectorStoreRecordDataProperty("FilterableString") { PropertyType = typeof(string), IsFilterable = true }, - new VectorStoreRecordDataProperty("FilterableInt") { PropertyType = typeof(int), IsFilterable = true }, - new VectorStoreRecordDataProperty("FilterableNullableInt") { PropertyType = typeof(int?), IsFilterable = true }, + new VectorStoreRecordDataProperty("FilterableString", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("FilterableInt", typeof(int)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("FilterableNullableInt", typeof(int)) { IsFilterable = true }, - new VectorStoreRecordDataProperty("NonFilterableString") { PropertyType = typeof(string) }, + new VectorStoreRecordDataProperty("NonFilterableString", typeof(string)), - new VectorStoreRecordVectorProperty("VectorDefaultIndexingOptions") { Dimensions = 10 }, - new VectorStoreRecordVectorProperty("VectorSpecificIndexingOptions") { Dimensions = 20, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.EuclideanDistance }, + new VectorStoreRecordVectorProperty("VectorDefaultIndexingOptions", typeof(ReadOnlyMemory)) { Dimensions = 10 }, + new VectorStoreRecordVectorProperty("VectorSpecificIndexingOptions", typeof(ReadOnlyMemory)) { Dimensions = 20, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.EuclideanDistance }, }; var storagePropertyNames = new Dictionary() @@ -71,24 +71,13 @@ public void MapToSchemaCreatesSchema() Assert.Equal("L2", ((VectorField)schema.Fields[4]).Attributes!["DISTANCE_METRIC"]); } - [Fact] - public void MapToSchemaThrowsOnMissingPropertyType() - { - // Arrange. - var properties = new VectorStoreRecordProperty[] { new VectorStoreRecordDataProperty("FilterableString") { IsFilterable = true } }; - var storagePropertyNames = new Dictionary() { { "FilterableString", "FilterableString" } }; - - // Act and assert. - Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.MapToSchema(properties, storagePropertyNames)); - } - [Theory] [InlineData(null)] [InlineData(0)] public void MapToSchemaThrowsOnInvalidVectorDimensions(int? dimensions) { // Arrange. - var properties = new VectorStoreRecordProperty[] { new VectorStoreRecordVectorProperty("VectorProperty") { Dimensions = dimensions } }; + var properties = new VectorStoreRecordProperty[] { new VectorStoreRecordVectorProperty("VectorProperty", typeof(ReadOnlyMemory)) { Dimensions = dimensions } }; var storagePropertyNames = new Dictionary() { { "VectorProperty", "VectorProperty" } }; // Act and assert. @@ -99,7 +88,7 @@ public void MapToSchemaThrowsOnInvalidVectorDimensions(int? dimensions) public void GetSDKIndexKindThrowsOnUnsupportedIndexKind() { // Arrange. - var vectorProperty = new VectorStoreRecordVectorProperty("VectorProperty") { IndexKind = "Unsupported" }; + var vectorProperty = new VectorStoreRecordVectorProperty("VectorProperty", typeof(ReadOnlyMemory)) { IndexKind = "Unsupported" }; // Act and assert. Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.GetSDKIndexKind(vectorProperty)); @@ -109,7 +98,7 @@ public void GetSDKIndexKindThrowsOnUnsupportedIndexKind() public void GetSDKDistanceAlgorithmThrowsOnUnsupportedDistanceFunction() { // Arrange. - var vectorProperty = new VectorStoreRecordVectorProperty("VectorProperty") { DistanceFunction = "Unsupported" }; + var vectorProperty = new VectorStoreRecordVectorProperty("VectorProperty", typeof(ReadOnlyMemory)) { DistanceFunction = "Unsupported" }; // Act and assert. Assert.Throws(() => RedisVectorStoreCollectionCreateMapping.GetSDKDistanceAlgorithm(vectorProperty)); diff --git a/dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs new file mode 100644 index 000000000000..d8e10c71491d --- /dev/null +++ b/dotnet/src/Connectors/Connectors.UnitTests/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using Microsoft.SemanticKernel.Connectors.Pinecone; +using Microsoft.SemanticKernel.Data; +using Moq; +using Xunit; +using Sdk = Pinecone; + +namespace SemanticKernel.Connectors.UnitTests.Pinecone; + +/// +/// Contains tests for the class. +/// +public class PineconeVectorStoreRecordCollectionTests +{ + private const string TestCollectionName = "testcollection"; + + /// + /// Tests that the collection can be created even if the definition and the type do not match. + /// In this case, the expectation is that a custom mapper will be provided to map between the + /// schema as defined by the definition and the different data model. + /// + [Fact] + public void CanCreateCollectionWithMismatchedDefinitionAndType() + { + // Arrange. + var definition = new VectorStoreRecordDefinition() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Id", typeof(string)), + new VectorStoreRecordDataProperty("Text", typeof(string)), + new VectorStoreRecordVectorProperty("Embedding", typeof(ReadOnlyMemory)) { Dimensions = 4 }, + } + }; + using var pineconeClient = new Sdk.PineconeClient("fake api key"); + + // Act. + var sut = new PineconeVectorStoreRecordCollection( + pineconeClient, + TestCollectionName, + new() { VectorStoreRecordDefinition = definition, VectorCustomMapper = Mock.Of>() }); + } + + public sealed class SinglePropsModel + { + public string Key { get; set; } = string.Empty; + + public string OriginalNameData { get; set; } = string.Empty; + + public string Data { get; set; } = string.Empty; + + public ReadOnlyMemory? Vector { get; set; } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs index c16cc077e33d..23a83606af1d 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs @@ -53,14 +53,14 @@ public AzureAISearchVectorStoreFixture() { Properties = new List { - new VectorStoreRecordKeyProperty("HotelId"), - new VectorStoreRecordDataProperty("HotelName") { PropertyType = typeof(string) }, - new VectorStoreRecordDataProperty("Description") { PropertyType = typeof(string) }, - new VectorStoreRecordVectorProperty("DescriptionEmbedding") { Dimensions = 4 }, - new VectorStoreRecordDataProperty("Tags") { PropertyType = typeof(string[]) }, - new VectorStoreRecordDataProperty("ParkingIncluded") { PropertyType = typeof(bool?) }, - new VectorStoreRecordDataProperty("LastRenovationDate") { PropertyType = typeof(DateTimeOffset?) }, - new VectorStoreRecordDataProperty("Rating") { PropertyType = typeof(float?) } + new VectorStoreRecordKeyProperty("HotelId", typeof(string)), + new VectorStoreRecordDataProperty("HotelName", typeof(string)), + new VectorStoreRecordDataProperty("Description", typeof(string)), + new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4 }, + new VectorStoreRecordDataProperty("Tags", typeof(string[])), + new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool?)), + new VectorStoreRecordDataProperty("LastRenovationDate", typeof(DateTimeOffset?)), + new VectorStoreRecordDataProperty("Rating", typeof(float?)) } }; } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreFixture.cs index dbae7d21bf56..28559cb0d19f 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreFixture.cs @@ -42,14 +42,14 @@ public virtual async Task InitializeAsync() { Properties = [ - new VectorStoreRecordKeyProperty(nameof(PineconeHotel.HotelId)), - new VectorStoreRecordDataProperty(nameof(PineconeHotel.HotelName)), - new VectorStoreRecordDataProperty(nameof(PineconeHotel.HotelCode)), - new VectorStoreRecordDataProperty(nameof(PineconeHotel.ParkingIncluded)) { StoragePropertyName = "parking_is_included" }, - new VectorStoreRecordDataProperty(nameof(PineconeHotel.HotelRating)), - new VectorStoreRecordDataProperty(nameof(PineconeHotel.Tags)), - new VectorStoreRecordDataProperty(nameof(PineconeHotel.Description)), - new VectorStoreRecordVectorProperty(nameof(PineconeHotel.DescriptionEmbedding)) { Dimensions = 8, DistanceFunction = DistanceFunction.DotProductSimilarity } + new VectorStoreRecordKeyProperty(nameof(PineconeHotel.HotelId), typeof(string)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.HotelName), typeof(string)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.HotelCode), typeof(int)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.ParkingIncluded), typeof(bool)) { StoragePropertyName = "parking_is_included" }, + new VectorStoreRecordDataProperty(nameof(PineconeHotel.HotelRating), typeof(float)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.Tags), typeof(List)), + new VectorStoreRecordDataProperty(nameof(PineconeHotel.Description), typeof(string)), + new VectorStoreRecordVectorProperty(nameof(PineconeHotel.DescriptionEmbedding), typeof(ReadOnlyMemory)) { Dimensions = 8, DistanceFunction = DistanceFunction.DotProductSimilarity } ] }; @@ -57,28 +57,28 @@ public virtual async Task InitializeAsync() { Properties = [ - new VectorStoreRecordKeyProperty(nameof(PineconeAllTypes.Id)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.BoolProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableBoolProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.StringProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableStringProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.IntProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableIntProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.LongProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableLongProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.FloatProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableFloatProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.DoubleProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableDoubleProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.DecimalProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableDecimalProperty)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.StringArray)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableStringArray)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.StringList)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableStringList)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.Collection)), - new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.Enumerable)), - new VectorStoreRecordVectorProperty(nameof(PineconeAllTypes.Embedding)) { Dimensions = 8, DistanceFunction = DistanceFunction.DotProductSimilarity } + new VectorStoreRecordKeyProperty(nameof(PineconeAllTypes.Id), typeof(string)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.BoolProperty), typeof(bool)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableBoolProperty), typeof(bool?)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.StringProperty), typeof(string)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableStringProperty), typeof(string)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.IntProperty), typeof(int)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableIntProperty), typeof(int?)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.LongProperty), typeof(long)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableLongProperty), typeof(long?)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.FloatProperty), typeof(float)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableFloatProperty), typeof(float?)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.DoubleProperty), typeof(double)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableDoubleProperty), typeof(double?)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.DecimalProperty), typeof(decimal)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableDecimalProperty), typeof(decimal?)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.StringArray), typeof(string[])), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableStringArray), typeof(string[])), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.StringList), typeof(List)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.NullableStringList), typeof(List)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.Collection), typeof(IReadOnlyCollection)), + new VectorStoreRecordDataProperty(nameof(PineconeAllTypes.Enumerable), typeof(IEnumerable)), + new VectorStoreRecordVectorProperty(nameof(PineconeAllTypes.Embedding), typeof(ReadOnlyMemory?)) { Dimensions = 8, DistanceFunction = DistanceFunction.DotProductSimilarity } ] }; diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs index 975b0bfbc87c..1ed9825d4aa9 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs @@ -34,24 +34,24 @@ public QdrantVectorStoreFixture() { Properties = new List { - new VectorStoreRecordKeyProperty("HotelId"), - new VectorStoreRecordDataProperty("HotelName") { IsFilterable = true, PropertyType = typeof(string) }, - new VectorStoreRecordDataProperty("HotelCode") { IsFilterable = true, PropertyType = typeof(int) }, - new VectorStoreRecordDataProperty("ParkingIncluded") { IsFilterable = true, PropertyType = typeof(bool), StoragePropertyName = "parking_is_included" }, - new VectorStoreRecordDataProperty("HotelRating") { IsFilterable = true, PropertyType = typeof(float) }, - new VectorStoreRecordDataProperty("Tags"), - new VectorStoreRecordDataProperty("Description"), - new VectorStoreRecordVectorProperty("DescriptionEmbedding") { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance } + new VectorStoreRecordKeyProperty("HotelId", typeof(ulong)), + new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("HotelCode", typeof(int)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool)) { IsFilterable = true, StoragePropertyName = "parking_is_included" }, + new VectorStoreRecordDataProperty("HotelRating", typeof(float)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("Tags", typeof(List)), + new VectorStoreRecordDataProperty("Description", typeof(string)), + new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance } } }; this.HotelWithGuidIdVectorStoreRecordDefinition = new VectorStoreRecordDefinition { Properties = new List { - new VectorStoreRecordKeyProperty("HotelId"), - new VectorStoreRecordDataProperty("HotelName"), - new VectorStoreRecordDataProperty("Description"), - new VectorStoreRecordVectorProperty("DescriptionEmbedding") + new VectorStoreRecordKeyProperty("HotelId", typeof(Guid)), + new VectorStoreRecordDataProperty("HotelName", typeof(string)), + new VectorStoreRecordDataProperty("Description", typeof(string)), + new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) } }; } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs index 4256f9411636..4d8987b483d6 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs @@ -39,29 +39,29 @@ public RedisVectorStoreFixture() { Properties = new List { - new VectorStoreRecordKeyProperty("HotelId"), - new VectorStoreRecordDataProperty("HotelName") { IsFilterable = true, PropertyType = typeof(string) }, - new VectorStoreRecordDataProperty("HotelCode") { IsFilterable = true, PropertyType = typeof(int) }, - new VectorStoreRecordDataProperty("Description"), - new VectorStoreRecordVectorProperty("DescriptionEmbedding") { Dimensions = 4 }, - new VectorStoreRecordDataProperty("Tags"), - new VectorStoreRecordDataProperty("ParkingIncluded") { StoragePropertyName = "parking_is_included" }, - new VectorStoreRecordDataProperty("LastRenovationDate"), - new VectorStoreRecordDataProperty("Rating"), - new VectorStoreRecordDataProperty("Address") + new VectorStoreRecordKeyProperty("HotelId", typeof(string)), + new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("HotelCode", typeof(int)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("Description", typeof(string)), + new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4 }, + new VectorStoreRecordDataProperty("Tags", typeof(string[])), + new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool)) { StoragePropertyName = "parking_is_included" }, + new VectorStoreRecordDataProperty("LastRenovationDate", typeof(DateTimeOffset)), + new VectorStoreRecordDataProperty("Rating", typeof(double)), + new VectorStoreRecordDataProperty("Address", typeof(HotelAddress)) } }; this.BasicVectorStoreRecordDefinition = new VectorStoreRecordDefinition { Properties = new List { - new VectorStoreRecordKeyProperty("HotelId"), - new VectorStoreRecordDataProperty("HotelName") { IsFilterable = true, PropertyType = typeof(string) }, - new VectorStoreRecordDataProperty("HotelCode") { IsFilterable = true, PropertyType = typeof(int) }, - new VectorStoreRecordDataProperty("Description"), - new VectorStoreRecordVectorProperty("DescriptionEmbedding") { Dimensions = 4 }, - new VectorStoreRecordDataProperty("ParkingIncluded") { StoragePropertyName = "parking_is_included" }, - new VectorStoreRecordDataProperty("Rating") + new VectorStoreRecordKeyProperty("HotelId", typeof(string)), + new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("HotelCode", typeof(int)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("Description", typeof(string)), + new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4 }, + new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool)) { StoragePropertyName = "parking_is_included" }, + new VectorStoreRecordDataProperty("Rating", typeof(double)), } }; } diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index d08b96f0f095..35547a5983a0 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -24,6 +24,50 @@ internal static class VectorStoreRecordPropertyReader /// Cache of property enumerations so that we don't incur reflection costs with each invocation. private static readonly ConcurrentDictionary dataProperties, List vectorProperties)> s_multipleVectorsPropertiesCache = new(); + /// + /// Split the given into key, data and vector properties and verify that we have the expected numbers of each type. + /// + /// The name of the type that the definition relates to. + /// The to split. + /// A value indicating whether multiple vectors are supported. + /// A value indicating whether we need at least one vector. + /// The properties on the split into key, data and vector groupings. + /// Thrown if there are any validation failures with the provided . + public static (VectorStoreRecordKeyProperty keyProperty, List dataProperties, List vectorProperties) SplitDefinitionAndVerify( + string typeName, + VectorStoreRecordDefinition definition, + bool supportsMultipleVectors, + bool requiresAtLeastOneVector) + { + var keyProperties = definition.Properties.OfType().ToList(); + + if (keyProperties.Count > 1) + { + throw new ArgumentException($"Multiple key properties found on type {typeName} or the provided {nameof(VectorStoreRecordDefinition)}."); + } + + var keyProperty = keyProperties.FirstOrDefault(); + var dataProperties = definition.Properties.OfType().ToList(); + var vectorProperties = definition.Properties.OfType().ToList(); + + if (keyProperty is null) + { + throw new ArgumentException($"No key property found on type {typeName} or the provided {nameof(VectorStoreRecordDefinition)}."); + } + + if (requiresAtLeastOneVector && vectorProperties.Count == 0) + { + throw new ArgumentException($"No vector property found on type {typeName} or the provided {nameof(VectorStoreRecordDefinition)}."); + } + + if (!supportsMultipleVectors && vectorProperties.Count > 1) + { + throw new ArgumentException($"Multiple vector properties found on type {typeName} or the provided {nameof(VectorStoreRecordDefinition)} while only one is supported."); + } + + return (keyProperty, dataProperties, vectorProperties); + } + /// /// Find the properties with , and attributes /// and verify that they exist and that we have the expected numbers of each type. @@ -207,7 +251,7 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT // Key property. var keyAttribute = properties.keyProperty.GetCustomAttribute(); - definitionProperties.Add(new VectorStoreRecordKeyProperty(properties.keyProperty.Name) { StoragePropertyName = keyAttribute!.StoragePropertyName }); + definitionProperties.Add(new VectorStoreRecordKeyProperty(properties.keyProperty.Name, properties.keyProperty.PropertyType) { StoragePropertyName = keyAttribute!.StoragePropertyName }); // Data properties. foreach (var dataProperty in properties.dataProperties) @@ -215,10 +259,9 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT var dataAttribute = dataProperty.GetCustomAttribute(); if (dataAttribute is not null) { - definitionProperties.Add(new VectorStoreRecordDataProperty(dataProperty.Name) + definitionProperties.Add(new VectorStoreRecordDataProperty(dataProperty.Name, dataProperty.PropertyType) { IsFilterable = dataAttribute.IsFilterable, - PropertyType = dataProperty.PropertyType, StoragePropertyName = dataAttribute.StoragePropertyName }); } @@ -230,7 +273,7 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT var vectorAttribute = vectorProperty.GetCustomAttribute(); if (vectorAttribute is not null) { - definitionProperties.Add(new VectorStoreRecordVectorProperty(vectorProperty.Name) + definitionProperties.Add(new VectorStoreRecordVectorProperty(vectorProperty.Name, vectorProperty.PropertyType) { Dimensions = vectorAttribute.Dimensions, IndexKind = vectorAttribute.IndexKind, @@ -257,7 +300,23 @@ public static void VerifyPropertyTypes(List properties, HashSet + /// Verify that the given properties are of the supported types. + /// + /// The properties to check. + /// A set of supported types that the provided properties may have. + /// A set of supported types that the provided enumerable properties may use as their element type. + /// A description of the category of properties being checked. Used for error messaging. + /// Thrown if any of the properties are not in the given set of types. + public static void VerifyPropertyTypes(List properties, HashSet supportedTypes, HashSet supportedEnumerableTypes, string propertyCategoryDescription) + { + foreach (var property in properties) + { + VerifyPropertyType(property.Name, property.PropertyType, supportedTypes, supportedEnumerableTypes, propertyCategoryDescription); + } } /// @@ -266,44 +325,106 @@ public static void VerifyPropertyTypes(List properties, HashSetThe properties to check. /// A set of supported types that the provided properties may have. /// A description of the category of properties being checked. Used for error messaging. + /// A value indicating whether versions of all the types should also be supported. + /// Thrown if any of the properties are not in the given set of types. + public static void VerifyPropertyTypes(IEnumerable properties, HashSet supportedTypes, string propertyCategoryDescription, bool? supportEnumerable = false) + { + var supportedEnumerableTypes = supportEnumerable == true + ? supportedTypes + : []; + + VerifyPropertyTypes(properties, supportedTypes, supportedEnumerableTypes, propertyCategoryDescription); + } + + /// + /// Verify that the given properties are of the supported types. + /// + /// The properties to check. + /// A set of supported types that the provided properties may have. /// A set of supported types that the provided enumerable properties may use as their element type. + /// A description of the category of properties being checked. Used for error messaging. /// Thrown if any of the properties are not in the given set of types. - public static void VerifyPropertyTypes(List properties, HashSet supportedTypes, string propertyCategoryDescription, HashSet supportedEnumerableTypes) + public static void VerifyPropertyTypes(IEnumerable properties, HashSet supportedTypes, HashSet supportedEnumerableTypes, string propertyCategoryDescription) { foreach (var property in properties) { - // Add shortcut before testing all the more expensive scenarios. - if (supportedTypes.Contains(property.PropertyType)) - { - continue; - } + VerifyPropertyType(property.DataModelPropertyName, property.PropertyType, supportedTypes, supportedEnumerableTypes, propertyCategoryDescription); + } + } - // Check all collection scenarios and get stored type. - if (typeof(IEnumerable).IsAssignableFrom(property.PropertyType) && supportedEnumerableTypes.Count > 0) + /// + /// Verify that the given property is of the supported types. + /// + /// The name of the property being checked. Used for error messaging. + /// The type of the property being checked. + /// A set of supported types that the provided property may have. + /// A set of supported types that the provided property may use as its element type if it's enumerable. + /// A description of the category of property being checked. Used for error messaging. + /// Thrown if the property is not in the given set of types. + public static void VerifyPropertyType(string propertyName, Type propertyType, HashSet supportedTypes, HashSet supportedEnumerableTypes, string propertyCategoryDescription) + { + // Add shortcut before testing all the more expensive scenarios. + if (supportedTypes.Contains(propertyType)) + { + return; + } + + // Check all collection scenarios and get stored type. + if (supportedEnumerableTypes.Count > 0 && typeof(IEnumerable).IsAssignableFrom(propertyType)) + { + var typeToCheck = propertyType switch { - var typeToCheck = property.PropertyType switch - { - IEnumerable => typeof(object), - var enumerableType when enumerableType.IsGenericType && enumerableType.GetGenericTypeDefinition() == typeof(IEnumerable<>) => enumerableType.GetGenericArguments()[0], - var arrayType when arrayType.IsArray => arrayType.GetElementType()!, - var interfaceType when interfaceType.GetInterfaces().FirstOrDefault(i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(IEnumerable<>)) is Type enumerableInterface => - enumerableInterface.GetGenericArguments()[0], - _ => property.PropertyType - }; - - if (!supportedEnumerableTypes.Contains(typeToCheck)) - { - var supportedEnumerableElementTypesString = string.Join(", ", supportedEnumerableTypes!.Select(t => t.FullName)); - throw new ArgumentException($"Enumerable {propertyCategoryDescription} properties must have one of the supported element types: {supportedEnumerableElementTypesString}. Element type of the property '{property.Name}' is {typeToCheck.FullName}."); - } + IEnumerable => typeof(object), + var enumerableType when enumerableType.IsGenericType && enumerableType.GetGenericTypeDefinition() == typeof(IEnumerable<>) => enumerableType.GetGenericArguments()[0], + var arrayType when arrayType.IsArray => arrayType.GetElementType()!, + var interfaceType when interfaceType.GetInterfaces().FirstOrDefault(i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(IEnumerable<>)) is Type enumerableInterface => + enumerableInterface.GetGenericArguments()[0], + _ => propertyType + }; + + if (!supportedEnumerableTypes.Contains(typeToCheck)) + { + var supportedEnumerableElementTypesString = string.Join(", ", supportedEnumerableTypes!.Select(t => t.FullName)); + throw new ArgumentException($"Enumerable {propertyCategoryDescription} properties must have one of the supported element types: {supportedEnumerableElementTypesString}. Element type of the property '{propertyName}' is {typeToCheck.FullName}."); } - else + } + else + { + // if we got here, we know the type is not supported + var supportedTypesString = string.Join(", ", supportedTypes.Select(t => t.FullName)); + throw new ArgumentException($"{propertyCategoryDescription} properties must be one of the supported types: {supportedTypesString}. Type of the property '{propertyName}' is {propertyType.FullName}."); + } + } + + /// + /// Get the JSON property name of a property by using the if available, otherwise + /// using the if available, otherwise falling back to the property name. + /// The provided may not actually contain the property, e.g. when the user has a data model that + /// doesn't resemble the stored data and where they are using a custom mapper. + /// + /// The property to retrieve a storage name for. + /// The data model type that the property belongs to. + /// The options used for JSON serialization. + /// The JSON storage property name. + public static string GetJsonPropertyName(VectorStoreRecordProperty property, Type dataModel, JsonSerializerOptions options) + { + var propertyInfo = dataModel.GetProperty(property.DataModelPropertyName); + + if (propertyInfo != null) + { + var jsonPropertyNameAttribute = propertyInfo.GetCustomAttribute(); + if (jsonPropertyNameAttribute is not null) { - // if we got here, we know the type is not supported - var supportedTypesString = string.Join(", ", supportedTypes.Select(t => t.FullName)); - throw new ArgumentException($"{propertyCategoryDescription} properties must be one of the supported types: {supportedTypesString}. Type of the property '{property.Name}' is {property.PropertyType.FullName}."); + return jsonPropertyNameAttribute.Name; } } + + if (options.PropertyNamingPolicy is not null) + { + return options.PropertyNamingPolicy.ConvertName(property.DataModelPropertyName); + } + + return property.DataModelPropertyName; } /// @@ -330,64 +451,79 @@ public static string GetJsonPropertyName(JsonSerializerOptions options, Property } /// - /// Get the storage name of a property by first checking the , if one is available, - /// otherwise falling back to the attributes on the property and finally, the property name. + /// Build a map of property names to the names under which they should be saved in storage if using JSON serialization. /// - /// The property to retrieve a storage name for. - /// The property configuration, if available. - /// The storage name for the property. - public static string GetStoragePropertyName(PropertyInfo property, VectorStoreRecordDefinition? vectorStoreRecordDefinition) + /// The properties to build the map for. + /// The data model type that the property belongs to. + /// The options used for JSON serialization. + /// The map from property names to the names under which they should be saved in storage if using JSON serialization. + public static Dictionary BuildPropertyNameToJsonPropertyNameMap( + (VectorStoreRecordKeyProperty keyProperty, List dataProperties, List vectorProperties) properties, + Type dataModel, + JsonSerializerOptions options) { - if (vectorStoreRecordDefinition is not null) - { - // First check to see if the developer configured a storage property name on the record definition. - if (vectorStoreRecordDefinition.Properties.FirstOrDefault(p => p.DataModelPropertyName == property.Name) is VectorStoreRecordProperty recordProperty && recordProperty.StoragePropertyName is not null) - { - return recordProperty.StoragePropertyName; - } + var jsonPropertyNameMap = new Dictionary(); + jsonPropertyNameMap.Add(properties.keyProperty.DataModelPropertyName, GetJsonPropertyName(properties.keyProperty, dataModel, options)); - // Otherwise, return just the property name. - return property.Name; + foreach (var dataProperty in properties.dataProperties) + { + jsonPropertyNameMap.Add(dataProperty.DataModelPropertyName, GetJsonPropertyName(dataProperty, dataModel, options)); } - // If no definition was supplied, check the attributes. - else if (property.GetCustomAttribute() is VectorStoreRecordDataAttribute dataAttribute) + + foreach (var vectorProperty in properties.vectorProperties) { - return dataAttribute.StoragePropertyName ?? property.Name; + jsonPropertyNameMap.Add(vectorProperty.DataModelPropertyName, GetJsonPropertyName(vectorProperty, dataModel, options)); } - else if (property.GetCustomAttribute() is VectorStoreRecordVectorAttribute vectorAttribute) + + return jsonPropertyNameMap; + } + + /// + /// Build a map of property names to the names under which they should be saved in storage if using JSON serialization. + /// + /// The properties to build the map for. + /// The data model type that the property belongs to. + /// The options used for JSON serialization. + /// The map from property names to the names under which they should be saved in storage if using JSON serialization. + public static Dictionary BuildPropertyNameToJsonPropertyNameMap( + (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties, + Type dataModel, + JsonSerializerOptions options) + { + var jsonPropertyNameMap = new Dictionary(); + jsonPropertyNameMap.Add(properties.keyProperty.Name, GetJsonPropertyName(options, properties.keyProperty)); + + foreach (var dataProperty in properties.dataProperties) { - return vectorAttribute.StoragePropertyName ?? property.Name; + jsonPropertyNameMap.Add(dataProperty.Name, GetJsonPropertyName(options, dataProperty)); } - else if (property.GetCustomAttribute() is VectorStoreRecordKeyAttribute keyAttribute) + + foreach (var vectorProperty in properties.vectorProperties) { - return keyAttribute.StoragePropertyName ?? property.Name; + jsonPropertyNameMap.Add(vectorProperty.Name, GetJsonPropertyName(options, vectorProperty)); } - // Otherwise, return just the property name. - return property.Name; + return jsonPropertyNameMap; } /// /// Build a map of property names to the names under which they should be saved in storage, for the given properties. /// /// The properties to build the map for. - /// The property configuration, if available. /// The map from property names to the names under which they should be saved in storage. - public static Dictionary BuildPropertyNameToStorageNameMap( - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties, - VectorStoreRecordDefinition? vectorStoreRecordDefinition) + public static Dictionary BuildPropertyNameToStorageNameMap((VectorStoreRecordKeyProperty keyProperty, List dataProperties, List vectorProperties) properties) { var storagePropertyNameMap = new Dictionary(); - storagePropertyNameMap.Add(properties.keyProperty.Name, GetStoragePropertyName(properties.keyProperty, vectorStoreRecordDefinition)); + storagePropertyNameMap.Add(properties.keyProperty.DataModelPropertyName, properties.keyProperty.StoragePropertyName ?? properties.keyProperty.DataModelPropertyName); foreach (var dataProperty in properties.dataProperties) { - storagePropertyNameMap.Add(dataProperty.Name, GetStoragePropertyName(dataProperty, vectorStoreRecordDefinition)); + storagePropertyNameMap.Add(dataProperty.DataModelPropertyName, dataProperty.StoragePropertyName ?? dataProperty.DataModelPropertyName); } foreach (var vectorProperty in properties.vectorProperties) { - storagePropertyNameMap.Add(vectorProperty.Name, GetStoragePropertyName(vectorProperty, vectorStoreRecordDefinition)); + storagePropertyNameMap.Add(vectorProperty.DataModelPropertyName, vectorProperty.StoragePropertyName ?? vectorProperty.DataModelPropertyName); } return storagePropertyNameMap; diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs index 6db74c97cd41..2fce9a28a412 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs @@ -15,8 +15,9 @@ public sealed class VectorStoreRecordDataProperty : VectorStoreRecordProperty /// Initializes a new instance of the class. /// /// The name of the property. - public VectorStoreRecordDataProperty(string propertyName) - : base(propertyName) + /// The type of the property. + public VectorStoreRecordDataProperty(string propertyName, Type propertyType) + : base(propertyName, propertyType) { } @@ -28,16 +29,10 @@ public VectorStoreRecordDataProperty(VectorStoreRecordDataProperty source) : base(source) { this.IsFilterable = source.IsFilterable; - this.PropertyType = source.PropertyType; } /// /// Gets or sets a value indicating whether this data property is filterable. /// public bool IsFilterable { get; init; } - - /// - /// Gets or sets the type of the data property. - /// - public Type? PropertyType { get; init; } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs index 080d7ceaa906..d95dc11ab072 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.Diagnostics.CodeAnalysis; namespace Microsoft.SemanticKernel.Data; @@ -14,8 +15,9 @@ public sealed class VectorStoreRecordKeyProperty : VectorStoreRecordProperty /// Initializes a new instance of the class. /// /// The name of the property. - public VectorStoreRecordKeyProperty(string propertyName) - : base(propertyName) + /// The type of the property. + public VectorStoreRecordKeyProperty(string propertyName, Type propertyType) + : base(propertyName, propertyType) { } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs index ccb760d89a00..951b17afabaa 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.Diagnostics.CodeAnalysis; namespace Microsoft.SemanticKernel.Data; @@ -14,15 +15,21 @@ public abstract class VectorStoreRecordProperty /// Initializes a new instance of the class. /// /// The name of the property on the data model. - private protected VectorStoreRecordProperty(string dataModelPropertyName) + /// The type of the property. + private protected VectorStoreRecordProperty(string dataModelPropertyName, Type propertyType) { + Verify.NotNullOrWhiteSpace(dataModelPropertyName); + Verify.NotNull(propertyType); + this.DataModelPropertyName = dataModelPropertyName; + this.PropertyType = propertyType; } private protected VectorStoreRecordProperty(VectorStoreRecordProperty source) { this.DataModelPropertyName = source.DataModelPropertyName; this.StoragePropertyName = source.StoragePropertyName; + this.PropertyType = source.PropertyType; } /// @@ -38,4 +45,9 @@ private protected VectorStoreRecordProperty(VectorStoreRecordProperty source) /// be used. /// public string? StoragePropertyName { get; init; } + + /// + /// Gets or sets the type of the property. + /// + public Type PropertyType { get; private set; } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs index 78966cf0df9f..11be572d293e 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.Diagnostics.CodeAnalysis; namespace Microsoft.SemanticKernel.Data; @@ -14,8 +15,9 @@ public sealed class VectorStoreRecordVectorProperty : VectorStoreRecordProperty /// Initializes a new instance of the class. /// /// The name of the property. - public VectorStoreRecordVectorProperty(string propertyName) - : base(propertyName) + /// The type of the property. + public VectorStoreRecordVectorProperty(string propertyName, Type propertyType) + : base(propertyName, propertyType) { } diff --git a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs index e3fcfc310676..decfa8ef20ea 100644 --- a/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs +++ b/dotnet/src/SemanticKernel.Core/Data/VolatileVectorStoreRecordCollection.cs @@ -1,8 +1,10 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; +using System.Linq; using System.Reflection; using System.Runtime.CompilerServices; using System.Threading; @@ -48,19 +50,16 @@ public VolatileVectorStoreRecordCollection(string collectionName, VolatileVector this._collectionName = collectionName; this._internalCollection = new(); this._options = options ?? new VolatileVectorStoreRecordCollectionOptions(); + var vectorStoreRecordDefinition = this._options.VectorStoreRecordDefinition ?? VectorStoreRecordPropertyReader.CreateVectorStoreRecordDefinitionFromType(typeof(TRecord), true); - // Enumerate public properties using configuration or attributes. - (PropertyInfo keyProperty, List dataProperties, List vectorProperties) properties; - if (this._options.VectorStoreRecordDefinition is not null) + // Get the key property info. + var keyProperty = vectorStoreRecordDefinition.Properties.OfType().FirstOrDefault(); + if (keyProperty is null) { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), this._options.VectorStoreRecordDefinition, supportsMultipleVectors: true); - } - else - { - properties = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), supportsMultipleVectors: true); + throw new ArgumentException($"No Key property found on {typeof(TRecord).Name} or provided via {nameof(VectorStoreRecordDefinition)}"); } - this._keyPropertyInfo = properties.keyProperty; + this._keyPropertyInfo = typeof(TRecord).GetProperty(keyProperty.DataModelPropertyName) ?? throw new ArgumentException($"Key property {keyProperty.DataModelPropertyName} not found on {typeof(TRecord).Name}"); } /// diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 000bca81fd74..101ac0ae9f40 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -13,6 +13,44 @@ namespace SemanticKernel.UnitTests.Data; public class VectorStoreRecordPropertyReaderTests { + [Fact] + public void SplitDefinitionsAndVerifyReturnsProperties() + { + // Act. + var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify("testType", this._multiPropsDefinition, true, true); + + // Assert. + Assert.Equal("Key", properties.keyProperty.DataModelPropertyName); + Assert.Equal(2, properties.dataProperties.Count); + Assert.Equal(2, properties.vectorProperties.Count); + Assert.Equal("Data1", properties.dataProperties[0].DataModelPropertyName); + Assert.Equal("Data2", properties.dataProperties[1].DataModelPropertyName); + Assert.Equal("Vector1", properties.vectorProperties[0].DataModelPropertyName); + Assert.Equal("Vector2", properties.vectorProperties[1].DataModelPropertyName); + } + + [Theory] + [InlineData(false, true, "MultiProps")] + [InlineData(true, true, "NoKey")] + [InlineData(true, true, "MultiKeys")] + [InlineData(false, true, "NoVector")] + [InlineData(true, true, "NoVector")] + public void SplitDefinitionsAndVerifyThrowsForInvalidModel(bool supportsMultipleVectors, bool requiresAtLeastOneVector, string definitionName) + { + // Arrange. + var definition = definitionName switch + { + "MultiProps" => this._multiPropsDefinition, + "NoKey" => this._noKeyDefinition, + "MultiKeys" => this._multiKeysDefinition, + "NoVector" => this._noVectorDefinition, + _ => throw new ArgumentException("Invalid definition.") + }; + + // Act & Assert. + Assert.Throws(() => VectorStoreRecordPropertyReader.SplitDefinitionAndVerify("testType", definition, supportsMultipleVectors, requiresAtLeastOneVector)); + } + [Theory] [InlineData(true, false)] [InlineData(false, false)] @@ -127,9 +165,9 @@ public void FindPropertiesThrowsOnNoVectorPropertyWithSingleVectorSupport(bool u [InlineData("Vector", "MissingVector")] public void FindPropertiesUsingConfigThrowsForNotFoundProperties(string propertyType, string propertyName) { - var missingKeyDefinition = new VectorStoreRecordDefinition { Properties = [new VectorStoreRecordKeyProperty(propertyName)] }; - var missingDataDefinition = new VectorStoreRecordDefinition { Properties = [new VectorStoreRecordDataProperty(propertyName)] }; - var missingVectorDefinition = new VectorStoreRecordDefinition { Properties = [new VectorStoreRecordVectorProperty(propertyName)] }; + var missingKeyDefinition = new VectorStoreRecordDefinition { Properties = [new VectorStoreRecordKeyProperty(propertyName, typeof(string))] }; + var missingDataDefinition = new VectorStoreRecordDefinition { Properties = [new VectorStoreRecordDataProperty(propertyName, typeof(string))] }; + var missingVectorDefinition = new VectorStoreRecordDefinition { Properties = [new VectorStoreRecordVectorProperty(propertyName, typeof(ReadOnlyMemory))] }; var definition = propertyType switch { @@ -184,6 +222,7 @@ public void VerifyPropertyTypesPassForAllowedTypes() // Act. VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(string)], "Data"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(this._singlePropsDefinition.Properties.OfType(), [typeof(string)], "Data"); } [Fact] @@ -194,6 +233,7 @@ public void VerifyPropertyTypesPassForAllowedEnumerableTypes() // Act. VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(string)], "Data", supportEnumerable: true); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(this._enumerablePropsDefinition.Properties.OfType(), [typeof(string)], "Data", supportEnumerable: true); } [Fact] @@ -203,20 +243,22 @@ public void VerifyPropertyTypesFailsForDisallowedTypes() var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), true); // Act. - var ex = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(int), typeof(float)], "Data")); + var ex1 = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(int), typeof(float)], "Data")); + var ex2 = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyPropertyTypes(this._singlePropsDefinition.Properties.OfType(), [typeof(int), typeof(float)], "Data")); // Assert. - Assert.Equal("Data properties must be one of the supported types: System.Int32, System.Single. Type of the property 'Data' is System.String.", ex.Message); + Assert.Equal("Data properties must be one of the supported types: System.Int32, System.Single. Type of the property 'Data' is System.String.", ex1.Message); + Assert.Equal("Data properties must be one of the supported types: System.Int32, System.Single. Type of the property 'Data' is System.String.", ex2.Message); } [Fact] - public void VerifyStoragePropertyNameMapChecksAttributeAndFallsBackToPropertyName() + public void VerifyStoragePropertyNameMapChecksStorageNameAndFallsBackToPropertyName() { // Arrange. - var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), true); + var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify("testType", this._multiPropsDefinition, true, true); // Act. - var storageNameMap = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties, this._multiPropsDefinition); + var storageNameMap = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties); // Assert. Assert.Equal(5, storageNameMap.Count); @@ -227,7 +269,7 @@ public void VerifyStoragePropertyNameMapChecksAttributeAndFallsBackToPropertyNam Assert.Equal("Vector1", storageNameMap["Vector1"]); Assert.Equal("Vector2", storageNameMap["Vector2"]); - // From storage property name on vector store record property attribute. + // From storage property name on vector store record data property. Assert.Equal("data_2", storageNameMap["Data2"]); } @@ -259,6 +301,37 @@ public void VerifyGetJsonPropertyNameChecksJsonOptionsAndJsonAttributesAndFallsB Assert.Equal("vector-2", jsonNameMap["Vector2"]); } + [Fact] + public void VerifyBuildPropertyNameToJsonPropertyNameMapChecksJsonAttributesAndJsonOptionsAndFallsbackToPropertyNames() + { + // Arrange. + var options = new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseUpper }; + var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify("testType", this._multiPropsDefinition, true, true); + var propertiesInfo = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), true); + + // Act. + var jsonNameMap1 = VectorStoreRecordPropertyReader.BuildPropertyNameToJsonPropertyNameMap(properties, typeof(MultiPropsModel), options); + var jsonNameMap2 = VectorStoreRecordPropertyReader.BuildPropertyNameToJsonPropertyNameMap(propertiesInfo, typeof(MultiPropsModel), options); + + void assertJsonNameMap(Dictionary jsonNameMap) + { + Assert.Equal(5, jsonNameMap.Count); + + // From JsonNamingPolicy. + Assert.Equal("KEY", jsonNameMap["Key"]); + Assert.Equal("DATA1", jsonNameMap["Data1"]); + Assert.Equal("DATA2", jsonNameMap["Data2"]); + Assert.Equal("VECTOR1", jsonNameMap["Vector1"]); + + // From JsonPropertyName attribute. + Assert.Equal("vector-2", jsonNameMap["Vector2"]); + }; + + // Assert. + assertJsonNameMap(jsonNameMap1); + assertJsonNameMap(jsonNameMap2); + } + #pragma warning disable CA1812 // Invalid unused classes error, since I am using these for testing purposes above. private sealed class NoKeyModel @@ -277,7 +350,7 @@ private sealed class NoVectorModel { Properties = [ - new VectorStoreRecordKeyProperty("Key") + new VectorStoreRecordKeyProperty("Key", typeof(string)) ] }; @@ -294,8 +367,8 @@ private sealed class MultiKeysModel { Properties = [ - new VectorStoreRecordKeyProperty("Key1"), - new VectorStoreRecordKeyProperty("Key2") + new VectorStoreRecordKeyProperty("Key1", typeof(string)), + new VectorStoreRecordKeyProperty("Key2", typeof(string)) ] }; @@ -317,9 +390,9 @@ private sealed class SinglePropsModel { Properties = [ - new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data"), - new VectorStoreRecordVectorProperty("Vector") + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("Data", typeof(string)), + new VectorStoreRecordVectorProperty("Vector", typeof(ReadOnlyMemory)) ] }; @@ -348,11 +421,11 @@ private sealed class MultiPropsModel { Properties = [ - new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data1") { IsFilterable = true }, - new VectorStoreRecordDataProperty("Data2") { StoragePropertyName = "data_2" }, - new VectorStoreRecordVectorProperty("Vector1") { Dimensions = 4, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.DotProductSimilarity }, - new VectorStoreRecordVectorProperty("Vector2") + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("Data1", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("Data2", typeof(string)) { StoragePropertyName = "data_2" }, + new VectorStoreRecordVectorProperty("Vector1", typeof(ReadOnlyMemory)) { Dimensions = 4, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.DotProductSimilarity }, + new VectorStoreRecordVectorProperty("Vector2", typeof(ReadOnlyMemory)) ] }; @@ -376,5 +449,17 @@ private sealed class EnumerablePropsModel public string NotAnnotated { get; set; } = string.Empty; } + private readonly VectorStoreRecordDefinition _enumerablePropsDefinition = new() + { + Properties = + [ + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("EnumerableData", typeof(IEnumerable)), + new VectorStoreRecordDataProperty("ArrayData", typeof(string[])), + new VectorStoreRecordDataProperty("ListData", typeof(List)), + new VectorStoreRecordVectorProperty("Vector", typeof(ReadOnlyMemory)) + ] + }; + #pragma warning restore CA1812 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. } diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs index e627ae9829de..c70382481fbc 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VolatileVectorStoreRecordCollectionTests.cs @@ -292,9 +292,9 @@ private VolatileVectorStoreRecordCollection> Create { Properties = [ - new VectorStoreRecordKeyProperty("Key"), - new VectorStoreRecordDataProperty("Data"), - new VectorStoreRecordVectorProperty("Vector") + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("Data", typeof(string)), + new VectorStoreRecordVectorProperty("Vector", typeof(ReadOnlyMemory)) ] }; From 65cd240c70d95effe9166acd971308b699d907b9 Mon Sep 17 00:00:00 2001 From: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com> Date: Thu, 1 Aug 2024 06:59:01 -0700 Subject: [PATCH 43/48] .Net: [Feature branch] Small formatting improvements (#7588) ### Motivation and Context As per https://github.com/dotnet/runtime/issues/27939#issuecomment-531420515, small formatting improvements to use PascalCase for tuple return variables. In this case, the usage of such variables will be similar to C# property naming conventions (e.g. `properties.KeyProperty` instead of `properties.keyProperty`) ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...zureAISearchVectorStoreRecordCollection.cs | 12 ++--- .../PineconeVectorStoreRecordMapper.cs | 12 ++--- .../QdrantVectorStoreRecordCollection.cs | 2 +- .../QdrantVectorStoreRecordMapper.cs | 10 ++-- ...RedisHashSetVectorStoreRecordCollection.cs | 8 +-- .../RedisJsonVectorStoreRecordCollection.cs | 8 +-- .../Data/VectorStoreRecordPropertyReader.cs | 14 +++--- .../VectorStoreRecordPropertyReaderTests.cs | 50 +++++++++---------- 8 files changed, 58 insertions(+), 58 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs index 7a5fa4b02ed8..21018b39c223 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreRecordCollection.cs @@ -113,16 +113,16 @@ public AzureAISearchVectorStoreRecordCollection(SearchIndexClient searchIndexCli // Validate property types. var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, this._vectorStoreRecordDefinition, supportsMultipleVectors: true, requiresAtLeastOneVector: false); - VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.KeyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.DataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.VectorProperties, s_supportedVectorTypes, "Vector"); // Get storage names and store for later use. this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToJsonPropertyNameMap(properties, typeof(TRecord), jsonSerializerOptions); - this._keyStoragePropertyName = this._storagePropertyNames[properties.keyProperty.DataModelPropertyName]; - this._nonVectorStoragePropertyNames = properties.dataProperties + this._keyStoragePropertyName = this._storagePropertyNames[properties.KeyProperty.DataModelPropertyName]; + this._nonVectorStoragePropertyNames = properties.DataProperties .Cast() - .Concat([properties.keyProperty]) + .Concat([properties.KeyProperty]) .Select(x => this._storagePropertyNames[x.DataModelPropertyName]) .ToList(); } diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs index 0c9987fdae7a..da1d95ad6de9 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs @@ -71,14 +71,14 @@ public PineconeVectorStoreRecordMapper( { // Validate property types. var propertiesInfo = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), vectorStoreRecordDefinition, supportsMultipleVectors: false); - VectorStoreRecordPropertyReader.VerifyPropertyTypes([propertiesInfo.keyProperty], s_supportedKeyTypes, "Key"); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.dataProperties, s_supportedDataTypes, s_supportedEnumerableDataElementTypes, "Data"); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.vectorProperties, s_supportedVectorTypes, "Vector"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes([propertiesInfo.KeyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.DataProperties, s_supportedDataTypes, s_supportedEnumerableDataElementTypes, "Data"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.VectorProperties, s_supportedVectorTypes, "Vector"); // Assign. - this._keyPropertyInfo = propertiesInfo.keyProperty; - this._dataPropertiesInfo = propertiesInfo.dataProperties; - this._vectorPropertyInfo = propertiesInfo.vectorProperties[0]; + this._keyPropertyInfo = propertiesInfo.KeyProperty; + this._dataPropertiesInfo = propertiesInfo.DataProperties; + this._vectorPropertyInfo = propertiesInfo.VectorProperties[0]; // Get storage names and store for later use. var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, vectorStoreRecordDefinition, supportsMultipleVectors: false, requiresAtLeastOneVector: true); diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index 5d3f26f94d08..f269761c1a8d 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -91,7 +91,7 @@ internal QdrantVectorStoreRecordCollection(MockableQdrantClient qdrantClient, st // Validate property types. var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, this._vectorStoreRecordDefinition, supportsMultipleVectors: this._options.HasNamedVectors, requiresAtLeastOneVector: !this._options.HasNamedVectors); - VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.KeyProperty], s_supportedKeyTypes, "Key"); // Build a map of property names to storage names. this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties); diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs index 88dfbc0a7679..2c4238982391 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs @@ -82,14 +82,14 @@ public QdrantVectorStoreRecordMapper( // Validate property types. var propertiesInfo = VectorStoreRecordPropertyReader.FindProperties(typeof(TRecord), vectorStoreRecordDefinition, supportsMultipleVectors: hasNamedVectors); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.dataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.vectorProperties, s_supportedVectorTypes, "Vector"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.DataProperties, s_supportedDataTypes, "Data", supportEnumerable: true); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(propertiesInfo.VectorProperties, s_supportedVectorTypes, "Vector"); // Assign. this._hasNamedVectors = hasNamedVectors; - this._keyPropertyInfo = propertiesInfo.keyProperty; - this._dataPropertiesInfo = propertiesInfo.dataProperties; - this._vectorPropertiesInfo = propertiesInfo.vectorProperties; + this._keyPropertyInfo = propertiesInfo.KeyProperty; + this._dataPropertiesInfo = propertiesInfo.DataProperties; + this._vectorPropertiesInfo = propertiesInfo.VectorProperties; this._storagePropertyNames = storagePropertyNames; // Get json storage names and store for later use. diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs index 84c15d498375..e68edb98870e 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordCollection.cs @@ -103,14 +103,14 @@ public RedisHashSetVectorStoreRecordCollection(IDatabase database, string collec // Validate property types. var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, this._vectorStoreRecordDefinition, supportsMultipleVectors: true, requiresAtLeastOneVector: false); - VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, s_supportedDataTypes, "Data"); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.KeyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.DataProperties, s_supportedDataTypes, "Data"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.VectorProperties, s_supportedVectorTypes, "Vector"); // Lookup storage property names. this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToStorageNameMap(properties); this._dataStoragePropertyNames = properties - .dataProperties + .DataProperties .Select(x => this._storagePropertyNames[x.DataModelPropertyName]) .Select(RedisValue.Unbox) .ToArray(); diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs index 7cb36092c223..44a6bc41d195 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisJsonVectorStoreRecordCollection.cs @@ -90,16 +90,16 @@ public RedisJsonVectorStoreRecordCollection(IDatabase database, string collectio // Validate property types. var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify(typeof(TRecord).Name, this._vectorStoreRecordDefinition, supportsMultipleVectors: true, requiresAtLeastOneVector: false); - VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.keyProperty], s_supportedKeyTypes, "Key"); - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.vectorProperties, s_supportedVectorTypes, "Vector"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes([properties.KeyProperty], s_supportedKeyTypes, "Key"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.VectorProperties, s_supportedVectorTypes, "Vector"); // Lookup json storage property names. - var keyJsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(properties.keyProperty, typeof(TRecord), this._jsonSerializerOptions); + var keyJsonPropertyName = VectorStoreRecordPropertyReader.GetJsonPropertyName(properties.KeyProperty, typeof(TRecord), this._jsonSerializerOptions); // Lookup storage property names. this._storagePropertyNames = VectorStoreRecordPropertyReader.BuildPropertyNameToJsonPropertyNameMap(properties, typeof(TRecord), this._jsonSerializerOptions); this._dataStoragePropertyNames = properties - .dataProperties + .DataProperties .Select(x => this._storagePropertyNames[x.DataModelPropertyName]) .ToArray(); diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index 35547a5983a0..e35967d5afa0 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -33,7 +33,7 @@ internal static class VectorStoreRecordPropertyReader /// A value indicating whether we need at least one vector. /// The properties on the split into key, data and vector groupings. /// Thrown if there are any validation failures with the provided . - public static (VectorStoreRecordKeyProperty keyProperty, List dataProperties, List vectorProperties) SplitDefinitionAndVerify( + public static (VectorStoreRecordKeyProperty KeyProperty, List DataProperties, List VectorProperties) SplitDefinitionAndVerify( string typeName, VectorStoreRecordDefinition definition, bool supportsMultipleVectors, @@ -76,7 +76,7 @@ public static (VectorStoreRecordKeyProperty keyProperty, ListThe data model to find the properties on. /// A value indicating whether multiple vector properties are supported instead of just one. /// The categorized properties. - public static (PropertyInfo keyProperty, List dataProperties, List vectorProperties) FindProperties(Type type, bool supportsMultipleVectors) + public static (PropertyInfo KeyProperty, List DataProperties, List VectorProperties) FindProperties(Type type, bool supportsMultipleVectors) { var cache = supportsMultipleVectors ? s_multipleVectorsPropertiesCache : s_singleVectorPropertiesCache; @@ -158,7 +158,7 @@ public static (PropertyInfo keyProperty, List dataProperties, List /// The property configuration. /// A value indicating whether multiple vector properties are supported instead of just one. /// The categorized properties. - public static (PropertyInfo keyProperty, List dataProperties, List vectorProperties) FindProperties(Type type, VectorStoreRecordDefinition vectorStoreRecordDefinition, bool supportsMultipleVectors) + public static (PropertyInfo KeyProperty, List DataProperties, List VectorProperties) FindProperties(Type type, VectorStoreRecordDefinition vectorStoreRecordDefinition, bool supportsMultipleVectors) { PropertyInfo? keyProperty = null; List dataProperties = new(); @@ -250,11 +250,11 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT var definitionProperties = new List(); // Key property. - var keyAttribute = properties.keyProperty.GetCustomAttribute(); - definitionProperties.Add(new VectorStoreRecordKeyProperty(properties.keyProperty.Name, properties.keyProperty.PropertyType) { StoragePropertyName = keyAttribute!.StoragePropertyName }); + var keyAttribute = properties.KeyProperty.GetCustomAttribute(); + definitionProperties.Add(new VectorStoreRecordKeyProperty(properties.KeyProperty.Name, properties.KeyProperty.PropertyType) { StoragePropertyName = keyAttribute!.StoragePropertyName }); // Data properties. - foreach (var dataProperty in properties.dataProperties) + foreach (var dataProperty in properties.DataProperties) { var dataAttribute = dataProperty.GetCustomAttribute(); if (dataAttribute is not null) @@ -268,7 +268,7 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT } // Vector properties. - foreach (var vectorProperty in properties.vectorProperties) + foreach (var vectorProperty in properties.VectorProperties) { var vectorAttribute = vectorProperty.GetCustomAttribute(); if (vectorAttribute is not null) diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 101ac0ae9f40..80e2c0f3e448 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -20,13 +20,13 @@ public void SplitDefinitionsAndVerifyReturnsProperties() var properties = VectorStoreRecordPropertyReader.SplitDefinitionAndVerify("testType", this._multiPropsDefinition, true, true); // Assert. - Assert.Equal("Key", properties.keyProperty.DataModelPropertyName); - Assert.Equal(2, properties.dataProperties.Count); - Assert.Equal(2, properties.vectorProperties.Count); - Assert.Equal("Data1", properties.dataProperties[0].DataModelPropertyName); - Assert.Equal("Data2", properties.dataProperties[1].DataModelPropertyName); - Assert.Equal("Vector1", properties.vectorProperties[0].DataModelPropertyName); - Assert.Equal("Vector2", properties.vectorProperties[1].DataModelPropertyName); + Assert.Equal("Key", properties.KeyProperty.DataModelPropertyName); + Assert.Equal(2, properties.DataProperties.Count); + Assert.Equal(2, properties.VectorProperties.Count); + Assert.Equal("Data1", properties.DataProperties[0].DataModelPropertyName); + Assert.Equal("Data2", properties.DataProperties[1].DataModelPropertyName); + Assert.Equal("Vector1", properties.VectorProperties[0].DataModelPropertyName); + Assert.Equal("Vector2", properties.VectorProperties[1].DataModelPropertyName); } [Theory] @@ -64,11 +64,11 @@ public void FindPropertiesCanFindAllPropertiesOnSinglePropsModel(bool supportsMu VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), supportsMultipleVectors); // Assert. - Assert.Equal("Key", properties.keyProperty.Name); - Assert.Single(properties.dataProperties); - Assert.Single(properties.vectorProperties); - Assert.Equal("Data", properties.dataProperties[0].Name); - Assert.Equal("Vector", properties.vectorProperties[0].Name); + Assert.Equal("Key", properties.KeyProperty.Name); + Assert.Single(properties.DataProperties); + Assert.Single(properties.VectorProperties); + Assert.Equal("Data", properties.DataProperties[0].Name); + Assert.Equal("Vector", properties.VectorProperties[0].Name); } [Theory] @@ -82,13 +82,13 @@ public void FindPropertiesCanFindAllPropertiesOnMultiPropsModel(bool useConfig) VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), true); // Assert. - Assert.Equal("Key", properties.keyProperty.Name); - Assert.Equal(2, properties.dataProperties.Count); - Assert.Equal(2, properties.vectorProperties.Count); - Assert.Equal("Data1", properties.dataProperties[0].Name); - Assert.Equal("Data2", properties.dataProperties[1].Name); - Assert.Equal("Vector1", properties.vectorProperties[0].Name); - Assert.Equal("Vector2", properties.vectorProperties[1].Name); + Assert.Equal("Key", properties.KeyProperty.Name); + Assert.Equal(2, properties.DataProperties.Count); + Assert.Equal(2, properties.VectorProperties.Count); + Assert.Equal("Data1", properties.DataProperties[0].Name); + Assert.Equal("Data2", properties.DataProperties[1].Name); + Assert.Equal("Vector1", properties.VectorProperties[0].Name); + Assert.Equal("Vector2", properties.VectorProperties[1].Name); } [Theory] @@ -221,7 +221,7 @@ public void VerifyPropertyTypesPassForAllowedTypes() var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), true); // Act. - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(string)], "Data"); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.DataProperties, [typeof(string)], "Data"); VectorStoreRecordPropertyReader.VerifyPropertyTypes(this._singlePropsDefinition.Properties.OfType(), [typeof(string)], "Data"); } @@ -232,7 +232,7 @@ public void VerifyPropertyTypesPassForAllowedEnumerableTypes() var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(EnumerablePropsModel), true); // Act. - VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(string)], "Data", supportEnumerable: true); + VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.DataProperties, [typeof(string)], "Data", supportEnumerable: true); VectorStoreRecordPropertyReader.VerifyPropertyTypes(this._enumerablePropsDefinition.Properties.OfType(), [typeof(string)], "Data", supportEnumerable: true); } @@ -243,7 +243,7 @@ public void VerifyPropertyTypesFailsForDisallowedTypes() var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(SinglePropsModel), true); // Act. - var ex1 = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.dataProperties, [typeof(int), typeof(float)], "Data")); + var ex1 = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyPropertyTypes(properties.DataProperties, [typeof(int), typeof(float)], "Data")); var ex2 = Assert.Throws(() => VectorStoreRecordPropertyReader.VerifyPropertyTypes(this._singlePropsDefinition.Properties.OfType(), [typeof(int), typeof(float)], "Data")); // Assert. @@ -279,9 +279,9 @@ public void VerifyGetJsonPropertyNameChecksJsonOptionsAndJsonAttributesAndFallsB // Arrange. var options = new JsonSerializerOptions() { PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseUpper }; var properties = VectorStoreRecordPropertyReader.FindProperties(typeof(MultiPropsModel), true); - var allProperties = (new PropertyInfo[] { properties.keyProperty }) - .Concat(properties.dataProperties) - .Concat(properties.vectorProperties); + var allProperties = (new PropertyInfo[] { properties.KeyProperty }) + .Concat(properties.DataProperties) + .Concat(properties.VectorProperties); // Act. var jsonNameMap = allProperties From 2fec094d218ff6e19db54cb00af91d33105fb92b Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Thu, 1 Aug 2024 15:37:38 +0100 Subject: [PATCH 44/48] .Net: Make full text search configurable and improve support for tagging. (#7593) ### Motivation and Context As part of the new VectorStore abstractions, it's possible to create collections with indexing. Users may want to explicitly configure which fields are full text searchable, since enabling this can add a lot of cost. See [#7560](https://github.com/microsoft/semantic-kernel/issues/7560) ### Description - Adding option to make a property full text searchable - Adding implementations for AzureAISearch, Redis and Qdrant to support this option. - Updating Tagging support by enabling it where possible for enumerable string properties that are marked as filterable. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- ...VectorStoreCollectionCreateMappingTests.cs | 37 ++++++++- ...earchVectorStoreCollectionCreateMapping.cs | 15 ++-- ...econeVectorStoreCollectionCreateMapping.cs | 2 +- ...drantVectorStoreCollectionCreateMapping.cs | 6 +- .../QdrantVectorStoreRecordCollection.cs | 28 ++++++- ...RedisVectorStoreCollectionCreateMapping.cs | 75 +++++++++++++++++-- .../QdrantVectorStoreRecordCollectionTests.cs | 15 +++- ...HashSetVectorStoreRecordCollectionTests.cs | 4 +- ...disJsonVectorStoreRecordCollectionTests.cs | 4 +- ...VectorStoreCollectionCreateMappingTests.cs | 46 ++++++++---- .../AzureAISearchVectorStoreFixture.cs | 16 ++-- ...ineconeVectorStoreRecordCollectionTests.cs | 2 +- .../Memory/Qdrant/QdrantVectorStoreFixture.cs | 12 +-- ...disJsonVectorStoreRecordCollectionTests.cs | 4 + .../Memory/Redis/RedisVectorStoreFixture.cs | 17 +++-- .../Data/VectorStoreRecordPropertyReader.cs | 1 + .../VectorStoreRecordDataAttribute.cs | 11 +++ .../VectorStoreRecordVectorAttribute.cs | 10 +++ .../VectorStoreRecordDataProperty.cs | 12 +++ .../VectorStoreRecordVectorProperty.cs | 10 +++ .../VectorStoreRecordPropertyReaderTests.cs | 7 +- 21 files changed, 265 insertions(+), 69 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs index cc43e08b7d64..075880775324 100644 --- a/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs +++ b/dotnet/src/Connectors/Connectors.AzureAISearch.UnitTests/AzureAISearchVectorStoreCollectionCreateMappingTests.cs @@ -34,7 +34,7 @@ public void MapKeyFieldCreatesSearchableField() [Theory] [InlineData(true)] [InlineData(false)] - public void MapStringDataFieldCreatesSearchableField(bool isFilterable) + public void MapFilterableStringDataFieldCreatesSimpleField(bool isFilterable) { // Arrange var dataProperty = new VectorStoreRecordDataProperty("testdata", typeof(string)) { IsFilterable = isFilterable }; @@ -43,6 +43,26 @@ public void MapStringDataFieldCreatesSearchableField(bool isFilterable) // Act var result = AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, storagePropertyName); + // Assert + Assert.NotNull(result); + Assert.IsType(result); + Assert.Equal(storagePropertyName, result.Name); + Assert.False(result.IsKey); + Assert.Equal(isFilterable, result.IsFilterable); + } + + [Theory] + [InlineData(true)] + [InlineData(false)] + public void MapFullTextSearchableStringDataFieldCreatesSearchableField(bool isFilterable) + { + // Arrange + var dataProperty = new VectorStoreRecordDataProperty("testdata", typeof(string)) { IsFilterable = isFilterable, IsFullTextSearchable = true }; + var storagePropertyName = "test_data"; + + // Act + var result = AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, storagePropertyName); + // Assert Assert.NotNull(result); Assert.IsType(result); @@ -51,6 +71,17 @@ public void MapStringDataFieldCreatesSearchableField(bool isFilterable) Assert.Equal(isFilterable, result.IsFilterable); } + [Fact] + public void MapFullTextSearchableStringDataFieldThrowsForInvalidType() + { + // Arrange + var dataProperty = new VectorStoreRecordDataProperty("testdata", typeof(int)) { IsFullTextSearchable = true }; + var storagePropertyName = "test_data"; + + // Act & Assert + Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapDataField(dataProperty, storagePropertyName)); + } + [Theory] [InlineData(true)] [InlineData(false)] @@ -138,7 +169,7 @@ public void MapVectorFieldThrowsForUnsupportedDistanceFunction() var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)) { Dimensions = 10, DistanceFunction = DistanceFunction.ManhattanDistance }; var storagePropertyName = "test_vector"; - // Act + // Act & Assert Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty, storagePropertyName)); } @@ -149,7 +180,7 @@ public void MapVectorFieldThrowsForMissingDimensionsCount() var vectorProperty = new VectorStoreRecordVectorProperty("testvector", typeof(ReadOnlyMemory)); var storagePropertyName = "test_vector"; - // Act + // Act & Assert Assert.Throws(() => AzureAISearchVectorStoreCollectionCreateMapping.MapVectorField(vectorProperty, storagePropertyName)); } diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs index 7b832c667111..2ee086d69d53 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchVectorStoreCollectionCreateMapping.cs @@ -34,8 +34,13 @@ public static SearchableField MapKeyField(VectorStoreRecordKeyProperty keyProper /// Throws when the definition is missing required information. public static SimpleField MapDataField(VectorStoreRecordDataProperty dataProperty, string storagePropertyName) { - if (dataProperty.PropertyType == typeof(string)) + if (dataProperty.IsFullTextSearchable) { + if (dataProperty.PropertyType != typeof(string)) + { + throw new InvalidOperationException($"Property {nameof(dataProperty.IsFullTextSearchable)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' is set to true, but the property type is not a string. The Azure AI Search VectorStore supports {nameof(dataProperty.IsFullTextSearchable)} on string properties only."); + } + return new SearchableField(storagePropertyName) { IsFilterable = dataProperty.IsFilterable }; } @@ -69,7 +74,7 @@ public static (VectorSearchField vectorSearchField, VectorSearchAlgorithmConfigu { IndexKind.Hnsw => new HnswAlgorithmConfiguration(algorithmConfigName) { Parameters = new HnswParameters { Metric = algorithmMetric } }, IndexKind.Flat => new ExhaustiveKnnAlgorithmConfiguration(algorithmConfigName) { Parameters = new ExhaustiveKnnParameters { Metric = algorithmMetric } }, - _ => throw new InvalidOperationException($"Unsupported index kind '{indexKind}' on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") + _ => throw new InvalidOperationException($"Index kind '{indexKind}' on {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' is not supported by the Azure AI Search VectorStore.") }; var vectorSearchProfile = new VectorSearchProfile(vectorSearchProfileName, algorithmConfigName); @@ -111,7 +116,7 @@ public static VectorSearchAlgorithmMetric GetSDKDistanceAlgorithm(VectorStoreRec DistanceFunction.CosineSimilarity => VectorSearchAlgorithmMetric.Cosine, DistanceFunction.DotProductSimilarity => VectorSearchAlgorithmMetric.DotProduct, DistanceFunction.EuclideanDistance => VectorSearchAlgorithmMetric.Euclidean, - _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") + _ => throw new InvalidOperationException($"Distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' is not supported by the Azure AI Search VectorStore.") }; } @@ -134,7 +139,7 @@ public static SearchFieldDataType GetSDKFieldDataType(Type propertyType) Type dateTimeType when dateTimeType == typeof(DateTime) || dateTimeType == typeof(DateTime?) => SearchFieldDataType.DateTimeOffset, Type dateTimeOffsetType when dateTimeOffsetType == typeof(DateTimeOffset) || dateTimeOffsetType == typeof(DateTimeOffset?) => SearchFieldDataType.DateTimeOffset, Type collectionType when typeof(IEnumerable).IsAssignableFrom(collectionType) => SearchFieldDataType.Collection(GetSDKFieldDataType(GetEnumerableType(propertyType))), - _ => throw new InvalidOperationException($"Unsupported data type '{propertyType}' for {nameof(VectorStoreRecordDataProperty)}.") + _ => throw new InvalidOperationException($"Data type '{propertyType}' for {nameof(VectorStoreRecordDataProperty)} is not supported by the Azure AI Search VectorStore.") }; } @@ -165,6 +170,6 @@ public static Type GetEnumerableType(Type type) return enumerableInterface.GetGenericArguments()[0]; } - throw new InvalidOperationException($"Unsupported data type '{type}' for {nameof(VectorStoreRecordDataProperty)}."); + throw new InvalidOperationException($"Data type '{type}' for {nameof(VectorStoreRecordDataProperty)} is not supported by the Azure AI Search VectorStore."); } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs index d24219407050..0a50cf2ac399 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreCollectionCreateMapping.cs @@ -41,6 +41,6 @@ public static Metric GetSDKMetricAlgorithm(VectorStoreRecordVectorProperty vecto DistanceFunction.DotProductSimilarity => Metric.DotProduct, DistanceFunction.EuclideanDistance => Metric.Euclidean, null => Metric.Cosine, - _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") + _ => throw new InvalidOperationException($"Distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' is not supported by the Pinecone VectorStore.") }; } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs index 2117d5616de9..e637ae2e06ab 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreCollectionCreateMapping.cs @@ -39,7 +39,7 @@ internal static class QdrantVectorStoreCollectionCreateMapping { typeof(double?), PayloadSchemaType.Float }, { typeof(decimal?), PayloadSchemaType.Float }, - { typeof(string), PayloadSchemaType.Text }, + { typeof(string), PayloadSchemaType.Keyword }, { typeof(DateTime), PayloadSchemaType.Datetime }, { typeof(bool), PayloadSchemaType.Bool }, @@ -62,7 +62,7 @@ public static VectorParams MapSingleVector(VectorStoreRecordVectorProperty vecto if (vectorProperty!.IndexKind is not null && vectorProperty!.IndexKind != IndexKind.Hnsw) { - throw new InvalidOperationException($"Unsupported index kind '{vectorProperty!.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'."); + throw new InvalidOperationException($"Index kind '{vectorProperty!.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' is not supported by the Qdrant VectorStore."); } return new VectorParams { Size = (ulong)vectorProperty.Dimensions, Distance = QdrantVectorStoreCollectionCreateMapping.GetSDKDistanceAlgorithm(vectorProperty) }; @@ -112,7 +112,7 @@ public static Distance GetSDKDistanceAlgorithm(VectorStoreRecordVectorProperty v DistanceFunction.DotProductSimilarity => Distance.Dot, DistanceFunction.EuclideanDistance => Distance.Euclid, DistanceFunction.ManhattanDistance => Distance.Manhattan, - _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") + _ => throw new InvalidOperationException($"Distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' is not supported by the Qdrant VectorStore.") }; } } diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs index f269761c1a8d..a49c530b2cdb 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs @@ -128,8 +128,8 @@ public async Task CreateCollectionAsync(CancellationToken cancellationToken = de { if (!this._options.HasNamedVectors) { - // If we are not using named vectors, we can only have one vector property. We can assume we have at least one, since this is already verified in the constructor. - var singleVectorProperty = this._vectorStoreRecordDefinition.Properties.First(x => x is VectorStoreRecordVectorProperty vectorProperty) as VectorStoreRecordVectorProperty; + // If we are not using named vectors, we can only have one vector property. We can assume we have exactly one, since this is already verified in the constructor. + var singleVectorProperty = this._vectorStoreRecordDefinition.Properties.OfType().First(); // Map the single vector property to the qdrant config. var vectorParams = QdrantVectorStoreCollectionCreateMapping.MapSingleVector(singleVectorProperty!); @@ -145,7 +145,7 @@ await this.RunOperationAsync( else { // Since we are using named vectors, iterate over all vector properties. - var vectorProperties = this._vectorStoreRecordDefinition.Properties.Where(x => x is VectorStoreRecordVectorProperty).Select(x => (VectorStoreRecordVectorProperty)x); + var vectorProperties = this._vectorStoreRecordDefinition.Properties.OfType(); // Map the named vectors to the qdrant config. var vectorParamsMap = QdrantVectorStoreCollectionCreateMapping.MapNamedVectors(vectorProperties, this._storagePropertyNames); @@ -160,7 +160,7 @@ await this.RunOperationAsync( } // Add indexes for each of the data properties that require filtering. - var dataProperties = this._vectorStoreRecordDefinition.Properties.Where(x => x is VectorStoreRecordDataProperty).Select(x => (VectorStoreRecordDataProperty)x).Where(x => x.IsFilterable); + var dataProperties = this._vectorStoreRecordDefinition.Properties.OfType().Where(x => x.IsFilterable); foreach (var dataProperty in dataProperties) { var storageFieldName = this._storagePropertyNames[dataProperty.DataModelPropertyName]; @@ -174,6 +174,26 @@ await this.RunOperationAsync( schemaType, cancellationToken: cancellationToken)).ConfigureAwait(false); } + + // Add indexes for each of the data properties that require full text search. + dataProperties = this._vectorStoreRecordDefinition.Properties.OfType().Where(x => x.IsFullTextSearchable); + foreach (var dataProperty in dataProperties) + { + if (dataProperty.PropertyType != typeof(string)) + { + throw new InvalidOperationException($"Property {nameof(dataProperty.IsFullTextSearchable)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' is set to true, but the property type is not a string. The Qdrant VectorStore supports {nameof(dataProperty.IsFullTextSearchable)} on string properties only."); + } + + var storageFieldName = this._storagePropertyNames[dataProperty.DataModelPropertyName]; + + await this.RunOperationAsync( + "CreatePayloadIndex", + () => this._qdrantClient.CreatePayloadIndexAsync( + this._collectionName, + storageFieldName, + PayloadSchemaType.Text, + cancellationToken: cancellationToken)).ConfigureAwait(false); + } } /// diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs index ae0ec760d3e7..2bdb6a67b5ef 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisVectorStoreCollectionCreateMapping.cs @@ -1,8 +1,10 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Collections; using System.Collections.Generic; using System.Globalization; +using System.Linq; using Microsoft.SemanticKernel.Data; using NRedisStack.Search; @@ -63,18 +65,47 @@ public static Schema MapToSchema(IEnumerable properti } // Data property. - if (property is VectorStoreRecordDataProperty dataProperty && dataProperty.IsFilterable) + if (property is VectorStoreRecordDataProperty dataProperty && (dataProperty.IsFilterable || dataProperty.IsFullTextSearchable)) { var storageName = storagePropertyNames[dataProperty.DataModelPropertyName]; - if (dataProperty.PropertyType == typeof(string)) + if (dataProperty.IsFilterable && dataProperty.IsFullTextSearchable) { - schema.AddTextField(new FieldName($"$.{storageName}", storageName)); + throw new InvalidOperationException($"Property '{dataProperty.DataModelPropertyName}' has both {nameof(VectorStoreRecordDataProperty.IsFilterable)} and {nameof(VectorStoreRecordDataProperty.IsFullTextSearchable)} set to true, and this is not supported by the Redis VectorStore."); } - if (RedisVectorStoreCollectionCreateMapping.s_supportedFilterableNumericDataTypes.Contains(dataProperty.PropertyType)) + // Add full text search field index. + if (dataProperty.IsFullTextSearchable) { - schema.AddNumericField(new FieldName($"$.{storageName}", storageName)); + if (dataProperty.PropertyType == typeof(string) || (typeof(IEnumerable).IsAssignableFrom(dataProperty.PropertyType) && GetEnumerableType(dataProperty.PropertyType) == typeof(string))) + { + schema.AddTextField(new FieldName($"$.{storageName}", storageName)); + } + else + { + throw new InvalidOperationException($"Property {nameof(dataProperty.IsFullTextSearchable)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' is set to true, but the property type is not a string or IEnumerable. The Redis VectorStore supports {nameof(dataProperty.IsFullTextSearchable)} on string or IEnumerable properties only."); + } + } + + // Add filter field index. + if (dataProperty.IsFilterable) + { + if (dataProperty.PropertyType == typeof(string)) + { + schema.AddTagField(new FieldName($"$.{storageName}", storageName)); + } + else if (typeof(IEnumerable).IsAssignableFrom(dataProperty.PropertyType) && GetEnumerableType(dataProperty.PropertyType) == typeof(string)) + { + schema.AddTagField(new FieldName($"$.{storageName}.*", storageName)); + } + else if (RedisVectorStoreCollectionCreateMapping.s_supportedFilterableNumericDataTypes.Contains(dataProperty.PropertyType)) + { + schema.AddNumericField(new FieldName($"$.{storageName}", storageName)); + } + else + { + throw new InvalidOperationException($"Property '{dataProperty.DataModelPropertyName}' is marked as {nameof(VectorStoreRecordDataProperty.IsFilterable)}, but the property type '{dataProperty.PropertyType}' is not supported. Only string, IEnumerable and numeric properties are supported for filtering by the Redis VectorStore."); + } } continue; @@ -122,7 +153,7 @@ public static Schema.VectorField.VectorAlgo GetSDKIndexKind(VectorStoreRecordVec { IndexKind.Hnsw => Schema.VectorField.VectorAlgo.HNSW, IndexKind.Flat => Schema.VectorField.VectorAlgo.FLAT, - _ => throw new InvalidOperationException($"Unsupported index kind '{vectorProperty.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") + _ => throw new InvalidOperationException($"Index kind '{vectorProperty.IndexKind}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' is not supported by the Redis VectorStore.") }; } @@ -145,7 +176,37 @@ public static string GetSDKDistanceAlgorithm(VectorStoreRecordVectorProperty vec DistanceFunction.CosineSimilarity => "COSINE", DistanceFunction.DotProductSimilarity => "IP", DistanceFunction.EuclideanDistance => "L2", - _ => throw new InvalidOperationException($"Unsupported distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}'.") + _ => throw new InvalidOperationException($"Distance function '{vectorProperty.DistanceFunction}' for {nameof(VectorStoreRecordVectorProperty)} '{vectorProperty.DataModelPropertyName}' is not supported by the Redis VectorStore.") }; } + + /// + /// Gets the type of object stored in the given enumerable type. + /// + /// The enumerable to get the stored type for. + /// The type of object stored in the given enumerable type. + /// Thrown when the given type is not enumerable. + private static Type GetEnumerableType(Type type) + { + if (type is IEnumerable) + { + return typeof(object); + } + else if (type.IsArray) + { + return type.GetElementType()!; + } + + if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(IEnumerable<>)) + { + return type.GetGenericArguments()[0]; + } + + if (type.GetInterfaces().FirstOrDefault(i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(IEnumerable<>)) is Type enumerableInterface) + { + return enumerableInterface.GetGenericArguments()[0]; + } + + throw new InvalidOperationException($"Data type '{type}' for {nameof(VectorStoreRecordDataProperty)} is not supported by the Redis VectorStore."); + } } diff --git a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs index f2508d59d03a..1889ceef5fef 100644 --- a/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Qdrant.UnitTests/QdrantVectorStoreRecordCollectionTests.cs @@ -87,6 +87,15 @@ public async Task CanCreateCollectionAsync() this._testCancellationToken), Times.Once); + this._qdrantClientMock + .Verify( + x => x.CreatePayloadIndexAsync( + TestCollectionName, + "OriginalNameData", + PayloadSchemaType.Keyword, + this._testCancellationToken), + Times.Once); + this._qdrantClientMock .Verify( x => x.CreatePayloadIndexAsync( @@ -101,7 +110,7 @@ public async Task CanCreateCollectionAsync() x => x.CreatePayloadIndexAsync( TestCollectionName, "data_storage_name", - PayloadSchemaType.Text, + PayloadSchemaType.Keyword, this._testCancellationToken), Times.Once); } @@ -677,7 +686,7 @@ private static VectorStoreRecordDefinition CreateSinglePropsDefinition(Type keyT Properties = [ new VectorStoreRecordKeyProperty("Key", keyType), - new VectorStoreRecordDataProperty("OriginalNameData", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("OriginalNameData", typeof(string)) { IsFilterable = true, IsFullTextSearchable = true }, new VectorStoreRecordDataProperty("Data", typeof(string)) { IsFilterable = true, StoragePropertyName = "data_storage_name" }, new VectorStoreRecordVectorProperty("Vector", typeof(ReadOnlyMemory)) { StoragePropertyName = "vector_storage_name" } ] @@ -689,7 +698,7 @@ public sealed class SinglePropsModel [VectorStoreRecordKey] public required T Key { get; set; } - [VectorStoreRecordData(IsFilterable = true)] + [VectorStoreRecordData(IsFilterable = true, IsFullTextSearchable = true)] public string OriginalNameData { get; set; } = string.Empty; [JsonPropertyName("ignored_data_json_name")] diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs index 112b81ce01c7..a95179e86346 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisHashSetVectorStoreRecordCollectionTests.cs @@ -85,11 +85,11 @@ public async Task CanCreateCollectionAsync() "$.OriginalNameData", "AS", "OriginalNameData", - "TEXT", + "TAG", "$.data_storage_name", "AS", "data_storage_name", - "TEXT", + "TAG", "$.vector_storage_name", "AS", "vector_storage_name", diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs index 64d16b2fc244..58cda992db4d 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisJsonVectorStoreRecordCollectionTests.cs @@ -92,11 +92,11 @@ public async Task CanCreateCollectionAsync(bool useDefinition, bool useCustomJso "$.data1_json_name", "AS", "data1_json_name", - "TEXT", + "TAG", $"$.{expectedData2Name}", "AS", expectedData2Name, - "TEXT", + "TAG", "$.vector1_json_name", "AS", "vector1_json_name", diff --git a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs index eae7028f24ac..c5bb3b12b2c5 100644 --- a/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs +++ b/dotnet/src/Connectors/Connectors.Redis.UnitTests/RedisVectorStoreCollectionCreateMappingTests.cs @@ -23,6 +23,10 @@ public void MapToSchemaCreatesSchema() new VectorStoreRecordKeyProperty("Key", typeof(string)), new VectorStoreRecordDataProperty("FilterableString", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("FullTextSearchableString", typeof(string)) { IsFullTextSearchable = true }, + new VectorStoreRecordDataProperty("FilterableStringEnumerable", typeof(string[])) { IsFilterable = true }, + new VectorStoreRecordDataProperty("FullTextSearchableStringEnumerable", typeof(string[])) { IsFullTextSearchable = true }, + new VectorStoreRecordDataProperty("FilterableInt", typeof(int)) { IsFilterable = true }, new VectorStoreRecordDataProperty("FilterableNullableInt", typeof(int)) { IsFilterable = true }, @@ -35,6 +39,9 @@ public void MapToSchemaCreatesSchema() var storagePropertyNames = new Dictionary() { { "FilterableString", "FilterableString" }, + { "FullTextSearchableString", "FullTextSearchableString" }, + { "FilterableStringEnumerable", "FilterableStringEnumerable" }, + { "FullTextSearchableStringEnumerable", "FullTextSearchableStringEnumerable" }, { "FilterableInt", "FilterableInt" }, { "FilterableNullableInt", "FilterableNullableInt" }, { "NonFilterableString", "NonFilterableString" }, @@ -47,28 +54,35 @@ public void MapToSchemaCreatesSchema() // Assert. Assert.NotNull(schema); - Assert.Equal(5, schema.Fields.Count); + Assert.Equal(8, schema.Fields.Count); - Assert.IsType(schema.Fields[0]); - Assert.IsType(schema.Fields[1]); - Assert.IsType(schema.Fields[2]); - Assert.IsType(schema.Fields[3]); - Assert.IsType(schema.Fields[4]); + Assert.IsType(schema.Fields[0]); + Assert.IsType(schema.Fields[1]); + Assert.IsType(schema.Fields[2]); + Assert.IsType(schema.Fields[3]); + Assert.IsType(schema.Fields[4]); + Assert.IsType(schema.Fields[5]); + Assert.IsType(schema.Fields[6]); + Assert.IsType(schema.Fields[7]); VerifyFieldName(schema.Fields[0].FieldName, new List { "$.FilterableString", "AS", "FilterableString" }); - VerifyFieldName(schema.Fields[1].FieldName, new List { "$.FilterableInt", "AS", "FilterableInt" }); - VerifyFieldName(schema.Fields[2].FieldName, new List { "$.FilterableNullableInt", "AS", "FilterableNullableInt" }); + VerifyFieldName(schema.Fields[1].FieldName, new List { "$.FullTextSearchableString", "AS", "FullTextSearchableString" }); + VerifyFieldName(schema.Fields[2].FieldName, new List { "$.FilterableStringEnumerable.*", "AS", "FilterableStringEnumerable" }); + VerifyFieldName(schema.Fields[3].FieldName, new List { "$.FullTextSearchableStringEnumerable", "AS", "FullTextSearchableStringEnumerable" }); + + VerifyFieldName(schema.Fields[4].FieldName, new List { "$.FilterableInt", "AS", "FilterableInt" }); + VerifyFieldName(schema.Fields[5].FieldName, new List { "$.FilterableNullableInt", "AS", "FilterableNullableInt" }); - VerifyFieldName(schema.Fields[3].FieldName, new List { "$.VectorDefaultIndexingOptions", "AS", "VectorDefaultIndexingOptions" }); - VerifyFieldName(schema.Fields[4].FieldName, new List { "$.vector_specific_indexing_options", "AS", "vector_specific_indexing_options" }); + VerifyFieldName(schema.Fields[6].FieldName, new List { "$.VectorDefaultIndexingOptions", "AS", "VectorDefaultIndexingOptions" }); + VerifyFieldName(schema.Fields[7].FieldName, new List { "$.vector_specific_indexing_options", "AS", "vector_specific_indexing_options" }); - Assert.Equal("10", ((VectorField)schema.Fields[3]).Attributes!["DIM"]); - Assert.Equal("FLOAT32", ((VectorField)schema.Fields[3]).Attributes!["TYPE"]); - Assert.Equal("COSINE", ((VectorField)schema.Fields[3]).Attributes!["DISTANCE_METRIC"]); + Assert.Equal("10", ((VectorField)schema.Fields[6]).Attributes!["DIM"]); + Assert.Equal("FLOAT32", ((VectorField)schema.Fields[6]).Attributes!["TYPE"]); + Assert.Equal("COSINE", ((VectorField)schema.Fields[6]).Attributes!["DISTANCE_METRIC"]); - Assert.Equal("20", ((VectorField)schema.Fields[4]).Attributes!["DIM"]); - Assert.Equal("FLOAT32", ((VectorField)schema.Fields[4]).Attributes!["TYPE"]); - Assert.Equal("L2", ((VectorField)schema.Fields[4]).Attributes!["DISTANCE_METRIC"]); + Assert.Equal("20", ((VectorField)schema.Fields[7]).Attributes!["DIM"]); + Assert.Equal("FLOAT32", ((VectorField)schema.Fields[7]).Attributes!["TYPE"]); + Assert.Equal("L2", ((VectorField)schema.Fields[7]).Attributes!["DISTANCE_METRIC"]); } [Theory] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs index 23a83606af1d..19158ce56e4f 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/AzureAISearch/AzureAISearchVectorStoreFixture.cs @@ -54,12 +54,12 @@ public AzureAISearchVectorStoreFixture() Properties = new List { new VectorStoreRecordKeyProperty("HotelId", typeof(string)), - new VectorStoreRecordDataProperty("HotelName", typeof(string)), + new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true, IsFullTextSearchable = true }, new VectorStoreRecordDataProperty("Description", typeof(string)), new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4 }, - new VectorStoreRecordDataProperty("Tags", typeof(string[])), - new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool?)), - new VectorStoreRecordDataProperty("LastRenovationDate", typeof(DateTimeOffset?)), + new VectorStoreRecordDataProperty("Tags", typeof(string[])) { IsFilterable = true }, + new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool?)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("LastRenovationDate", typeof(DateTimeOffset?)) { IsFilterable = true }, new VectorStoreRecordDataProperty("Rating", typeof(float?)) } }; @@ -212,7 +212,7 @@ public class Hotel public string HotelId { get; set; } [SearchableField(IsSortable = true)] - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true, IsFullTextSearchable = true)] public string HotelName { get; set; } [SearchableField(AnalyzerName = LexicalAnalyzerName.Values.EnLucene)] @@ -223,18 +223,18 @@ public class Hotel public ReadOnlyMemory? DescriptionEmbedding { get; set; } [SearchableField(IsFilterable = true, IsFacetable = true)] - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] #pragma warning disable CA1819 // Properties should not return arrays public string[] Tags { get; set; } #pragma warning restore CA1819 // Properties should not return arrays [JsonPropertyName("parking_is_included")] [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] public bool? ParkingIncluded { get; set; } [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] public DateTimeOffset? LastRenovationDate { get; set; } [SimpleField(IsFilterable = true, IsSortable = true, IsFacetable = true)] diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs index 2a762fd316cd..411225101ffc 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs @@ -543,7 +543,7 @@ public async Task TryCreateIndexWithUnsSupportedMetricFailsAsync() var message = (await Assert.ThrowsAsync(() => recordCollection.CreateCollectionAsync())).Message; - Assert.Equal("Unsupported distance function 'just eyeball it' for VectorStoreRecordVectorProperty 'Embedding'.", message); + Assert.Equal("Distance function 'just eyeball it' for VectorStoreRecordVectorProperty 'Embedding' is not supported by the Pinecone VectorStore.", message); } #pragma warning disable CA1812 diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs index 1ed9825d4aa9..d1a314829547 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs @@ -35,7 +35,7 @@ public QdrantVectorStoreFixture() Properties = new List { new VectorStoreRecordKeyProperty("HotelId", typeof(ulong)), - new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true, IsFullTextSearchable = true }, new VectorStoreRecordDataProperty("HotelCode", typeof(int)) { IsFilterable = true }, new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool)) { IsFilterable = true, StoragePropertyName = "parking_is_included" }, new VectorStoreRecordDataProperty("HotelRating", typeof(float)) { IsFilterable = true }, @@ -49,9 +49,9 @@ public QdrantVectorStoreFixture() Properties = new List { new VectorStoreRecordKeyProperty("HotelId", typeof(Guid)), - new VectorStoreRecordDataProperty("HotelName", typeof(string)), + new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true, IsFullTextSearchable = true }, new VectorStoreRecordDataProperty("Description", typeof(string)), - new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) + new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance } } }; } @@ -272,7 +272,7 @@ public record HotelInfo() public ulong HotelId { get; init; } /// A string metadata field. - [VectorStoreRecordData(IsFilterable = true)] + [VectorStoreRecordData(IsFilterable = true, IsFullTextSearchable = true)] public string? HotelName { get; set; } /// An int metadata field. @@ -310,7 +310,7 @@ public record HotelInfoWithGuidId() public Guid HotelId { get; init; } /// A string metadata field. - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true, IsFullTextSearchable = true)] public string? HotelName { get; set; } /// A data field. @@ -318,7 +318,7 @@ public record HotelInfoWithGuidId() public string Description { get; set; } /// A vector field. - [VectorStoreRecordVector] + [VectorStoreRecordVector(4, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)] public ReadOnlyMemory? DescriptionEmbedding { get; set; } } } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs index a24f11dc7f51..9e7a2fde0561 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisJsonVectorStoreRecordCollectionTests.cs @@ -71,6 +71,7 @@ public async Task ItCanCreateACollectionUpsertAndGetAsync(bool useRecordDefiniti Assert.Equal(record.HotelName, getResult?.HotelName); Assert.Equal(record.HotelCode, getResult?.HotelCode); Assert.Equal(record.Tags, getResult?.Tags); + Assert.Equal(record.FTSTags, getResult?.FTSTags); Assert.Equal(record.ParkingIncluded, getResult?.ParkingIncluded); Assert.Equal(record.LastRenovationDate, getResult?.LastRenovationDate); Assert.Equal(record.Rating, getResult?.Rating); @@ -129,6 +130,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition Assert.Equal(record.HotelName, getResult?.HotelName); Assert.Equal(record.HotelCode, getResult?.HotelCode); Assert.Equal(record.Tags, getResult?.Tags); + Assert.Equal(record.FTSTags, getResult?.FTSTags); Assert.Equal(record.ParkingIncluded, getResult?.ParkingIncluded); Assert.Equal(record.LastRenovationDate, getResult?.LastRenovationDate); Assert.Equal(record.Rating, getResult?.Rating); @@ -202,6 +204,7 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool includeVectors, bool Assert.Equal("My Hotel 1", getResult?.HotelName); Assert.Equal(1, getResult?.HotelCode); Assert.Equal(new[] { "pool", "air conditioning", "concierge" }, getResult?.Tags); + Assert.Equal(new[] { "pool", "air conditioning", "concierge" }, getResult?.FTSTags); Assert.True(getResult?.ParkingIncluded); Assert.Equal(new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), getResult?.LastRenovationDate); Assert.Equal(3.6, getResult?.Rating); @@ -342,6 +345,7 @@ private static Hotel CreateTestHotel(string hotelId, int hotelCode) HotelName = $"My Hotel {hotelCode}", HotelCode = 1, Tags = ["pool", "air conditioning", "concierge"], + FTSTags = ["pool", "air conditioning", "concierge"], ParkingIncluded = true, LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), Rating = 3.6, diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs index 4d8987b483d6..417c74397ad5 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs @@ -42,9 +42,10 @@ public RedisVectorStoreFixture() new VectorStoreRecordKeyProperty("HotelId", typeof(string)), new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true }, new VectorStoreRecordDataProperty("HotelCode", typeof(int)) { IsFilterable = true }, - new VectorStoreRecordDataProperty("Description", typeof(string)), + new VectorStoreRecordDataProperty("Description", typeof(string)) { IsFullTextSearchable = true }, new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4 }, - new VectorStoreRecordDataProperty("Tags", typeof(string[])), + new VectorStoreRecordDataProperty("Tags", typeof(string[])) { IsFilterable = true }, + new VectorStoreRecordDataProperty("FTSTags", typeof(string[])) { IsFullTextSearchable = true }, new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool)) { StoragePropertyName = "parking_is_included" }, new VectorStoreRecordDataProperty("LastRenovationDate", typeof(DateTimeOffset)), new VectorStoreRecordDataProperty("Rating", typeof(double)), @@ -58,7 +59,7 @@ public RedisVectorStoreFixture() new VectorStoreRecordKeyProperty("HotelId", typeof(string)), new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true }, new VectorStoreRecordDataProperty("HotelCode", typeof(int)) { IsFilterable = true }, - new VectorStoreRecordDataProperty("Description", typeof(string)), + new VectorStoreRecordDataProperty("Description", typeof(string)) { IsFullTextSearchable = true }, new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4 }, new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool)) { StoragePropertyName = "parking_is_included" }, new VectorStoreRecordDataProperty("Rating", typeof(double)), @@ -117,6 +118,7 @@ public async Task InitializeAsync() Description = "This is a great hotel.", DescriptionEmbedding = embedding, Tags = new[] { "pool", "air conditioning", "concierge" }, + FTSTags = new[] { "pool", "air conditioning", "concierge" }, parking_is_included = true, LastRenovationDate = new DateTimeOffset(1970, 1, 18, 0, 0, 0, TimeSpan.Zero), Rating = 3.6, @@ -230,15 +232,18 @@ public class Hotel [VectorStoreRecordData(IsFilterable = true)] public int HotelCode { get; init; } - [VectorStoreRecordData] + [VectorStoreRecordData(IsFullTextSearchable = true)] public string Description { get; init; } [VectorStoreRecordVector(4)] public ReadOnlyMemory? DescriptionEmbedding { get; init; } #pragma warning disable CA1819 // Properties should not return arrays - [VectorStoreRecordData] + [VectorStoreRecordData(IsFilterable = true)] public string[] Tags { get; init; } + + [VectorStoreRecordData(IsFullTextSearchable = true)] + public string[] FTSTags { get; init; } #pragma warning restore CA1819 // Properties should not return arrays [JsonPropertyName("parking_is_included")] @@ -278,7 +283,7 @@ public class BasicHotel [VectorStoreRecordData(IsFilterable = true)] public int HotelCode { get; init; } - [VectorStoreRecordData] + [VectorStoreRecordData(IsFullTextSearchable = true)] public string Description { get; init; } [VectorStoreRecordVector(4)] diff --git a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs index e35967d5afa0..d4f06071f66b 100644 --- a/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs +++ b/dotnet/src/InternalUtilities/src/Data/VectorStoreRecordPropertyReader.cs @@ -262,6 +262,7 @@ public static VectorStoreRecordDefinition CreateVectorStoreRecordDefinitionFromT definitionProperties.Add(new VectorStoreRecordDataProperty(dataProperty.Name, dataProperty.PropertyType) { IsFilterable = dataAttribute.IsFilterable, + IsFullTextSearchable = dataAttribute.IsFullTextSearchable, StoragePropertyName = dataAttribute.StoragePropertyName }); } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs index 6c0f8c733645..55a931df26b0 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs @@ -19,8 +19,19 @@ public sealed class VectorStoreRecordDataAttribute : Attribute /// /// Gets or sets a value indicating whether this data property is filterable. /// + /// + /// Default is . + /// public bool IsFilterable { get; init; } + /// + /// Gets or sets a value indicating whether this data property is full text searchable. + /// + /// + /// Default is . + /// + public bool IsFullTextSearchable { get; init; } + /// /// Gets or sets an optional name to use for the property in storage, if different from the property name. /// E.g. the property name might be "MyProperty" but the storage name might be "my_property". diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs index 643a1cfb3233..5c7563b96ebc 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs @@ -44,18 +44,28 @@ public VectorStoreRecordVectorAttribute(int Dimensions, string? IndexKind, strin /// /// Gets or sets the number of dimensions that the vector has. /// + /// + /// This property is required when creating collections, but may be omitted if not using that functionality. + /// If not provided when trying to create a collection, create will fail. + /// public int? Dimensions { get; private set; } /// /// Gets the kind of index to use. /// /// + /// + /// Default varies by database type. See the documentation of your chosen database connector for more information. + /// public string? IndexKind { get; private set; } /// /// Gets the distance function to use when comparing vectors. /// /// + /// + /// Default varies by database type. See the documentation of your chosen database connector for more information. + /// public string? DistanceFunction { get; private set; } /// diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs index 2fce9a28a412..fc5717c07c09 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs @@ -29,10 +29,22 @@ public VectorStoreRecordDataProperty(VectorStoreRecordDataProperty source) : base(source) { this.IsFilterable = source.IsFilterable; + this.IsFullTextSearchable = source.IsFullTextSearchable; } /// /// Gets or sets a value indicating whether this data property is filterable. /// + /// + /// Default is . + /// public bool IsFilterable { get; init; } + + /// + /// Gets or sets a value indicating whether this data property is full text searchable. + /// + /// + /// Default is . + /// + public bool IsFullTextSearchable { get; init; } } diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs index 11be572d293e..86c13bf62056 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs @@ -36,17 +36,27 @@ public VectorStoreRecordVectorProperty(VectorStoreRecordVectorProperty source) /// /// Gets or sets the number of dimensions that the vector has. /// + /// + /// This property is required when creating collections, but may be omitted if not using that functionality. + /// If not provided when trying to create a collection, create will fail. + /// public int? Dimensions { get; init; } /// /// Gets the kind of index to use. /// /// + /// + /// Default varies by database type. See the documentation of your chosen database connector for more information. + /// public string? IndexKind { get; init; } /// /// Gets the distance function to use when comparing vectors. /// /// + /// + /// Default varies by database type. See the documentation of your chosen database connector for more information. + /// public string? DistanceFunction { get; init; } } diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs index 80e2c0f3e448..cfddd8437425 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreRecordPropertyReaderTests.cs @@ -206,6 +206,9 @@ public void CreateVectorStoreRecordDefinitionFromTypeConvertsAllProps() Assert.True(data1.IsFilterable); Assert.False(data2.IsFilterable); + Assert.True(data1.IsFullTextSearchable); + Assert.False(data2.IsFullTextSearchable); + Assert.Equal(typeof(string), data1.PropertyType); Assert.Equal(typeof(string), data2.PropertyType); @@ -401,7 +404,7 @@ private sealed class MultiPropsModel [VectorStoreRecordKey] public string Key { get; set; } = string.Empty; - [VectorStoreRecordData(IsFilterable = true)] + [VectorStoreRecordData(IsFilterable = true, IsFullTextSearchable = true)] public string Data1 { get; set; } = string.Empty; [VectorStoreRecordData] @@ -422,7 +425,7 @@ private sealed class MultiPropsModel Properties = [ new VectorStoreRecordKeyProperty("Key", typeof(string)), - new VectorStoreRecordDataProperty("Data1", typeof(string)) { IsFilterable = true }, + new VectorStoreRecordDataProperty("Data1", typeof(string)) { IsFilterable = true, IsFullTextSearchable = true }, new VectorStoreRecordDataProperty("Data2", typeof(string)) { StoragePropertyName = "data_2" }, new VectorStoreRecordVectorProperty("Vector1", typeof(ReadOnlyMemory)) { Dimensions = 4, IndexKind = IndexKind.Flat, DistanceFunction = DistanceFunction.DotProductSimilarity }, new VectorStoreRecordVectorProperty("Vector2", typeof(ReadOnlyMemory)) From 1d3cf943f6fbc2f7fb13616b9f7305b9d0263932 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Fri, 2 Aug 2024 11:32:47 +0100 Subject: [PATCH 45/48] .Net: VectorStore: Adding a simple example a custom mapper example. (#7598) ### Description Adding two more examples for the VectorStore functionality: - A super simple example, showing just the most basic of functionality, that's easier to get started with. - An example which shows how to use a custom mapper for a case where your storage model and data model doesn't match or you need to optimize mapping. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../VectorStoreQdrantContainerFixture.cs | 18 ++ .../VectorStore_DataIngestion_CustomMapper.cs | 204 ++++++++++++++++++ ...> VectorStore_DataIngestion_MultiStore.cs} | 2 +- .../VectorStore_DataIngestion_Simple.cs | 113 ++++++++++ dotnet/samples/Concepts/README.md | 3 + 5 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_CustomMapper.cs rename dotnet/samples/Concepts/Memory/{VectorStore_DataIngestion.cs => VectorStore_DataIngestion_MultiStore.cs} (97%) create mode 100644 dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_Simple.cs diff --git a/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs index 59b5449120e0..820b5d3bf172 100644 --- a/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs +++ b/dotnet/samples/Concepts/Memory/VectorStoreFixtures/VectorStoreQdrantContainerFixture.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using Docker.DotNet; +using Qdrant.Client; namespace Memory.VectorStoreFixtures; @@ -24,6 +25,23 @@ public async Task ManualInitializeAsync() using var dockerClientConfiguration = new DockerClientConfiguration(); this._dockerClient = dockerClientConfiguration.CreateClient(); this._qdrantContainerId = await VectorStoreInfra.SetupQdrantContainerAsync(this._dockerClient); + + // Delay until the Qdrant server is ready. + var qdrantClient = new QdrantClient("localhost"); + var succeeded = false; + var attemptCount = 0; + while (!succeeded && attemptCount++ < 10) + { + try + { + await qdrantClient.ListCollectionsAsync(); + succeeded = true; + } + catch (Exception) + { + await Task.Delay(1000); + } + } } } diff --git a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_CustomMapper.cs b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_CustomMapper.cs new file mode 100644 index 000000000000..db8e259f4e7a --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_CustomMapper.cs @@ -0,0 +1,204 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json; +using System.Text.Json.Nodes; +using Memory.VectorStoreFixtures; +using Microsoft.SemanticKernel.Connectors.OpenAI; +using Microsoft.SemanticKernel.Connectors.Redis; +using Microsoft.SemanticKernel.Data; +using Microsoft.SemanticKernel.Embeddings; +using StackExchange.Redis; + +namespace Memory; + +/// +/// An example showing how to ingest data into a vector store using with a custom mapper. +/// In this example, the storage model differs significantly from the data model, so a custom mapper is used to map between the two. +/// A is used to define the schema of the storage model, and this means that the connector +/// will not try and infer the schema from the data model. +/// In storage the data is stored as a JSON object that looks similar to this: +/// +/// { +/// "Term": "API", +/// "Definition": "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data.", +/// "DefinitionEmbedding": [ ... ] +/// } +/// +/// However, the data model is a class with a property for key and two dictionaries for the data (Term and Definition) and vector (DefinitionEmbedding). +/// +/// The example shows the following steps: +/// 1. Create an embedding generator. +/// 2. Create a Redis Vector Store using a custom factory for creating collections. +/// When constructing a collection, the factory injects a custom mapper that maps between the data model and the storage model if required. +/// 3. Ingest some data into the vector store. +/// 4. Read the data back from the vector store. +/// +/// You need a local instance of Docker running, since the associated fixture will try and start a Redis container in the local docker instance to run against. +/// +public class VectorStore_DataIngestion_CustomMapper(ITestOutputHelper output, VectorStoreRedisContainerFixture redisFixture) : BaseTest(output), IClassFixture +{ + /// + /// A record definition for the glossary entries that defines the storage schema of the record. + /// + private static readonly VectorStoreRecordDefinition s_glossaryDefinition = new() + { + Properties = new List + { + new VectorStoreRecordKeyProperty("Key", typeof(string)), + new VectorStoreRecordDataProperty("Term", typeof(string)), + new VectorStoreRecordDataProperty("Definition", typeof(string)), + new VectorStoreRecordVectorProperty("DefinitionEmbedding", typeof(ReadOnlyMemory)) { Dimensions = 1536, DistanceFunction = DistanceFunction.DotProductSimilarity } + } + }; + + [Fact] + public async Task ExampleAsync() + { + // Create an embedding generation service. + var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( + TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, + TestConfiguration.AzureOpenAIEmbeddings.Endpoint, + TestConfiguration.AzureOpenAIEmbeddings.ApiKey); + + // Initiate the docker container and construct the vector store using the custom factory for creating collections. + await redisFixture.ManualInitializeAsync(); + ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost:6379"); + var vectorStore = new RedisVectorStore(redis.GetDatabase(), new() { VectorStoreCollectionFactory = new Factory() }); + + // Get and create collection if it doesn't exist, using the record definition containing the storage model. + var collection = vectorStore.GetCollection("skglossary", s_glossaryDefinition); + await collection.CreateCollectionIfNotExistsAsync(); + + // Create glossary entries and generate embeddings for them. + var glossaryEntries = CreateGlossaryEntries().ToList(); + var tasks = glossaryEntries.Select(entry => Task.Run(async () => + { + entry.Vectors["DefinitionEmbedding"] = await textEmbeddingGenerationService.GenerateEmbeddingAsync((string)entry.Data["Definition"]); + })); + await Task.WhenAll(tasks); + + // Upsert the glossary entries into the collection and return their keys. + var upsertedKeysTasks = glossaryEntries.Select(x => collection.UpsertAsync(x)); + var upsertedKeys = await Task.WhenAll(upsertedKeysTasks); + + // Retrieve one of the upserted records from the collection. + var upsertedRecord = await collection.GetAsync(upsertedKeys.First(), new() { IncludeVectors = true }); + + // Write upserted keys and one of the upserted records to the console. + Console.WriteLine($"Upserted keys: {string.Join(", ", upsertedKeys)}"); + Console.WriteLine($"Upserted record: {JsonSerializer.Serialize(upsertedRecord)}"); + } + + /// + /// A custom mapper that maps between the data model and the storage model. + /// + private sealed class Mapper : IVectorStoreRecordMapper + { + public (string Key, JsonNode Node) MapFromDataToStorageModel(GenericDataModel dataModel) + { + var jsonObject = new JsonObject(); + + jsonObject.Add("Term", dataModel.Data["Term"].ToString()); + jsonObject.Add("Definition", dataModel.Data["Definition"].ToString()); + + var vector = (ReadOnlyMemory)dataModel.Vectors["DefinitionEmbedding"]; + var jsonArray = new JsonArray(vector.ToArray().Select(x => JsonValue.Create(x)).ToArray()); + jsonObject.Add("DefinitionEmbedding", jsonArray); + + return (dataModel.Key, jsonObject); + } + + public GenericDataModel MapFromStorageToDataModel((string Key, JsonNode Node) storageModel, StorageToDataModelMapperOptions options) + { + var dataModel = new GenericDataModel + { + Key = storageModel.Key, + Data = new Dictionary + { + { "Term", (string)storageModel.Node["Term"]! }, + { "Definition", (string)storageModel.Node["Definition"]! } + }, + Vectors = new Dictionary + { + { "DefinitionEmbedding", new ReadOnlyMemory(storageModel.Node["DefinitionEmbedding"]!.AsArray().Select(x => (float)x!).ToArray()) } + } + }; + + return dataModel; + } + } + + /// + /// A factory for creating collections in the vector store + /// + private sealed class Factory : IRedisVectorStoreRecordCollectionFactory + { + public IVectorStoreRecordCollection CreateVectorStoreRecordCollection(IDatabase database, string name, VectorStoreRecordDefinition? vectorStoreRecordDefinition) + where TKey : notnull + where TRecord : class + { + // If the record definition is the glossary definition and the record type is the generic data model, inject the custom mapper into the collection options. + if (vectorStoreRecordDefinition == s_glossaryDefinition && typeof(TRecord) == typeof(GenericDataModel)) + { + var customCollection = new RedisJsonVectorStoreRecordCollection(database, name, new() { VectorStoreRecordDefinition = vectorStoreRecordDefinition, JsonNodeCustomMapper = new Mapper() }) as IVectorStoreRecordCollection; + return customCollection!; + } + + // Otherwise, just create a standard collection with the default mapper. + var collection = new RedisJsonVectorStoreRecordCollection(database, name, new() { VectorStoreRecordDefinition = vectorStoreRecordDefinition }) as IVectorStoreRecordCollection; + return collection!; + } + } + + /// + /// Sample generic data model class that can store any data. + /// + private sealed class GenericDataModel + { + public string Key { get; set; } + + public Dictionary Data { get; set; } + + public Dictionary Vectors { get; set; } + } + + /// + /// Create some sample glossary entries using the generic data model. + /// + /// A list of sample glossary entries. + private static IEnumerable CreateGlossaryEntries() + { + yield return new GenericDataModel + { + Key = "1", + Data = new() + { + { "Term", "API" }, + { "Definition", "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data." } + }, + Vectors = new() + }; + + yield return new GenericDataModel + { + Key = "2", + Data = new() + { + { "Term", "Connectors" }, + { "Definition", "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc." } + }, + Vectors = new() + }; + + yield return new GenericDataModel + { + Key = "3", + Data = new() + { + { "Term", "RAG" }, + { "Definition", "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt)." } + }, + Vectors = new() + }; + } +} diff --git a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_MultiStore.cs similarity index 97% rename from dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs rename to dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_MultiStore.cs index 411e4ab8f8a4..18f0e5b476ca 100644 --- a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion.cs +++ b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_MultiStore.cs @@ -28,7 +28,7 @@ namespace Memory; /// For some databases in this sample (Redis & Qdrant), you need a local instance of Docker running, since the associated fixtures will try and start containers in the local docker instance to run against. /// [Collection("Sequential")] -public class VectorStore_DataIngestion(ITestOutputHelper output, VectorStoreRedisContainerFixture redisFixture, VectorStoreQdrantContainerFixture qdrantFixture) : BaseTest(output), IClassFixture, IClassFixture +public class VectorStore_DataIngestion_MultiStore(ITestOutputHelper output, VectorStoreRedisContainerFixture redisFixture, VectorStoreQdrantContainerFixture qdrantFixture) : BaseTest(output), IClassFixture, IClassFixture { /// /// Example with dependency injection. diff --git a/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_Simple.cs b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_Simple.cs new file mode 100644 index 000000000000..341e5c2bbda2 --- /dev/null +++ b/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_Simple.cs @@ -0,0 +1,113 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json; +using Memory.VectorStoreFixtures; +using Microsoft.SemanticKernel.Connectors.OpenAI; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Microsoft.SemanticKernel.Data; +using Microsoft.SemanticKernel.Embeddings; +using Qdrant.Client; + +namespace Memory; + +/// +/// A simple example showing how to ingest data into a vector store using . +/// +/// The example shows the following steps: +/// 1. Create an embedding generator. +/// 2. Create a Qdrant Vector Store. +/// 3. Ingest some data into the vector store. +/// 4. Read the data back from the vector store. +/// +/// You need a local instance of Docker running, since the associated fixture will try and start a Qdrant container in the local docker instance to run against. +/// +[Collection("Sequential")] +public class VectorStore_DataIngestion_Simple(ITestOutputHelper output, VectorStoreQdrantContainerFixture qdrantFixture) : BaseTest(output), IClassFixture +{ + [Fact] + public async Task ExampleAsync() + { + // Create an embedding generation service. + var textEmbeddingGenerationService = new AzureOpenAITextEmbeddingGenerationService( + TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, + TestConfiguration.AzureOpenAIEmbeddings.Endpoint, + TestConfiguration.AzureOpenAIEmbeddings.ApiKey); + + // Initiate the docker container and construct the vector store. + await qdrantFixture.ManualInitializeAsync(); + var vectorStore = new QdrantVectorStore(new QdrantClient("localhost")); + + // Get and create collection if it doesn't exist. + var collection = vectorStore.GetCollection("skglossary"); + await collection.CreateCollectionIfNotExistsAsync(); + + // Create glossary entries and generate embeddings for them. + var glossaryEntries = CreateGlossaryEntries().ToList(); + var tasks = glossaryEntries.Select(entry => Task.Run(async () => + { + entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition); + })); + await Task.WhenAll(tasks); + + // Upsert the glossary entries into the collection and return their keys. + var upsertedKeysTasks = glossaryEntries.Select(x => collection.UpsertAsync(x)); + var upsertedKeys = await Task.WhenAll(upsertedKeysTasks); + + // Retrieve one of the upserted records from the collection. + var upsertedRecord = await collection.GetAsync(upsertedKeys.First(), new() { IncludeVectors = true }); + + // Write upserted keys and one of the upserted records to the console. + Console.WriteLine($"Upserted keys: {string.Join(", ", upsertedKeys)}"); + Console.WriteLine($"Upserted record: {JsonSerializer.Serialize(upsertedRecord)}"); + } + + /// + /// Sample model class that represents a glossary entry. + /// + /// + /// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store. + /// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration. + /// + private sealed class Glossary + { + [VectorStoreRecordKey] + public ulong Key { get; set; } + + [VectorStoreRecordData] + public string Term { get; set; } + + [VectorStoreRecordData] + public string Definition { get; set; } + + [VectorStoreRecordVector(1536)] + public ReadOnlyMemory DefinitionEmbedding { get; set; } + } + + /// + /// Create some sample glossary entries. + /// + /// A list of sample glossary entries. + private static IEnumerable CreateGlossaryEntries() + { + yield return new Glossary + { + Key = 1, + Term = "API", + Definition = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data." + }; + + yield return new Glossary + { + Key = 2, + Term = "Connectors", + Definition = "Connectors allow you to integrate with various services provide AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc." + }; + + yield return new Glossary + { + Key = 3, + Term = "RAG", + Definition = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt)." + }; + } +} diff --git a/dotnet/samples/Concepts/README.md b/dotnet/samples/Concepts/README.md index 77427c605193..26eef28982a7 100644 --- a/dotnet/samples/Concepts/README.md +++ b/dotnet/samples/Concepts/README.md @@ -104,6 +104,9 @@ Down below you can find the code snippets that demonstrate the usage of many Sem - [TextMemoryPlugin_GeminiEmbeddingGeneration](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextMemoryPlugin_GeminiEmbeddingGeneration.cs) - [TextMemoryPlugin_MultipleMemoryStore](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextMemoryPlugin_MultipleMemoryStore.cs) - [TextMemoryPlugin_RecallJsonSerializationWithOptions](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/TextMemoryPlugin_RecallJsonSerializationWithOptions.cs) +- [VectorStore_DataIngestion_Simple: A simple example of how to do data ingestion into a vector store when getting started.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_Simple.cs) +- [VectorStore_DataIngestion_MultiStore: An example of data ingestion that uses the same code to ingest into multiple vector stores types.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_MultiStore.cs) +- [VectorStore_DataIngestion_CustomMapper: An example that shows how to use a custom mapper for when your data model and storage model doesn't match.](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Memory/VectorStore_DataIngestion_CustomMapper.cs) ## Optimization - Examples of different cost and performance optimization techniques From a7bc61f2754969bd198c378f4a92b19343dee0ee Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Fri, 2 Aug 2024 18:01:57 +0100 Subject: [PATCH 46/48] .Net: Adding improvements to xml docs based on suggestions from team. (#7618) ### Description A few improvements to the VectorStore xml docs based on suggestions from the team. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- .../RecordAttributes/VectorStoreRecordDataAttribute.cs | 1 + .../Data/RecordAttributes/VectorStoreRecordKeyAttribute.cs | 5 ++++- .../RecordAttributes/VectorStoreRecordVectorAttribute.cs | 5 ++++- .../Data/RecordDefinition/DistanceFunction.cs | 7 ++++++- .../Data/RecordDefinition/IndexKind.cs | 7 ++++++- .../Data/RecordDefinition/VectorStoreRecordDataProperty.cs | 5 ++++- .../Data/RecordDefinition/VectorStoreRecordDefinition.cs | 5 ++++- .../Data/RecordDefinition/VectorStoreRecordKeyProperty.cs | 5 ++++- .../Data/RecordDefinition/VectorStoreRecordProperty.cs | 5 ++++- .../RecordDefinition/VectorStoreRecordVectorProperty.cs | 5 ++++- .../Data/RecordOptions/DeleteRecordOptions.cs | 6 ++++-- .../Data/RecordOptions/GetRecordOptions.cs | 2 +- .../Data/RecordOptions/UpsertRecordOptions.cs | 5 ++++- 13 files changed, 50 insertions(+), 13 deletions(-) diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs index 55a931df26b0..f31b5c38352e 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordDataAttribute.cs @@ -11,6 +11,7 @@ namespace Microsoft.SemanticKernel.Data; /// /// Marking a property as 'data' means that the property is not a key, and not a vector, but optionally /// this property may have an associated vector field containing an embedding for this data. +/// The characteristics defined here will influence how the property is treated by the vector store. /// [Experimental("SKEXP0001")] [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordKeyAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordKeyAttribute.cs index d85a3dff6da2..32376956b853 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordKeyAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordKeyAttribute.cs @@ -6,8 +6,11 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Attribute to mark a property on a record class as the key under which data is stored in a vector store. +/// Attribute to mark a property on a record class as the key under which the record is stored in a vector store. /// +/// +/// The characteristics defined here will influence how the property is treated by the vector store. +/// [Experimental("SKEXP0001")] [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] public sealed class VectorStoreRecordKeyAttribute : Attribute diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs index 5c7563b96ebc..74a2a0796811 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordAttributes/VectorStoreRecordVectorAttribute.cs @@ -6,8 +6,11 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Attribute to mark a property on a record class as the vector. +/// Attribute to mark a property on a record class as a vector. /// +/// +/// The characteristics defined here will influence how the property is treated by the vector store. +/// [Experimental("SKEXP0001")] [AttributeUsage(AttributeTargets.Property, AllowMultiple = false)] public sealed class VectorStoreRecordVectorAttribute : Attribute diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs index 9163ebb39c87..32601243966b 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/DistanceFunction.cs @@ -5,8 +5,13 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Defines the distance functions that can be used to compare vectors. +/// Defines a list of well known distance functions that can be used to compare vectors. /// +/// +/// Not all Vector Store connectors support all distance functions and some connectors may +/// support additional distance functions that are not defined here. See the documentation +/// for each connector for more information on what is supported. +/// [Experimental("SKEXP0001")] public static class DistanceFunction { diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs index 0a59454c42b7..364baaa8e727 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/IndexKind.cs @@ -5,8 +5,13 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Defines the index types that can be used to index vectors. +/// Defines a list of well known index types that can be used to index vectors. /// +/// +/// Not all Vector Store connectors support all index types and some connectors may +/// support additional index types that are not defined here. See the documentation +/// for each connector for more information on what is supported. +/// [Experimental("SKEXP0001")] public static class IndexKind { diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs index fc5717c07c09..9dec25aa4ce1 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDataProperty.cs @@ -6,8 +6,11 @@ namespace Microsoft.SemanticKernel.Data; /// -/// A description of a data property on a record for storage in a vector store. +/// Defines a data property on a vector store record. /// +/// +/// The characteristics defined here will influence how the property is treated by the vector store. +/// [Experimental("SKEXP0001")] public sealed class VectorStoreRecordDataProperty : VectorStoreRecordProperty { diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDefinition.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDefinition.cs index 558bfc77b953..455bd5842c47 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDefinition.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordDefinition.cs @@ -6,8 +6,11 @@ namespace Microsoft.SemanticKernel.Data; /// -/// A description of the properties of a record stored in a vector store, plus how the properties are used. +/// A description of the properties of a record stored in a vector store. /// +/// +/// Each property contains additional information about how the property will be treated by the vector store. +/// [Experimental("SKEXP0001")] public sealed class VectorStoreRecordDefinition { diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs index d95dc11ab072..6ba9725e2da4 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordKeyProperty.cs @@ -6,8 +6,11 @@ namespace Microsoft.SemanticKernel.Data; /// -/// A description of a key property on a record for storage in a vector store. +/// Defines a key property on a vector store record. /// +/// +/// The characteristics defined here will influence how the property is treated by the vector store. +/// [Experimental("SKEXP0001")] public sealed class VectorStoreRecordKeyProperty : VectorStoreRecordProperty { diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs index 951b17afabaa..400ae7065355 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordProperty.cs @@ -6,8 +6,11 @@ namespace Microsoft.SemanticKernel.Data; /// -/// A description of a property on a record for storage in a vector store. +/// Defines a base property class for properties on a vector store record. /// +/// +/// The characteristics defined here will influence how the property is treated by the vector store. +/// [Experimental("SKEXP0001")] public abstract class VectorStoreRecordProperty { diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs index 86c13bf62056..4f4b3a1bce0a 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordDefinition/VectorStoreRecordVectorProperty.cs @@ -6,8 +6,11 @@ namespace Microsoft.SemanticKernel.Data; /// -/// A description of a vector property on a record for storage in a vector store. +/// Defines a vector property on a vector store record. /// +/// +/// The characteristics defined here will influence how the property is treated by the vector store. +/// [Experimental("SKEXP0001")] public sealed class VectorStoreRecordVectorProperty : VectorStoreRecordProperty { diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs index 6de2e07f66c4..4f034d125a6d 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/DeleteRecordOptions.cs @@ -5,9 +5,11 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Optional options when calling . -/// Reserved for future use. +/// Options when calling . /// +/// +/// This class does not currently include any options, but is added for future extensibility of the API. +/// [Experimental("SKEXP0001")] public class DeleteRecordOptions { diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs index 18e59ec7b9d9..5330e076acea 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/GetRecordOptions.cs @@ -5,7 +5,7 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Optional options when calling . +/// Options when calling . /// [Experimental("SKEXP0001")] public class GetRecordOptions diff --git a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs index 8655cc25fde5..c1d9cba35b5d 100644 --- a/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs +++ b/dotnet/src/SemanticKernel.Abstractions/Data/RecordOptions/UpsertRecordOptions.cs @@ -5,9 +5,12 @@ namespace Microsoft.SemanticKernel.Data; /// -/// Optional options when calling . +/// Options when calling . /// Reserved for future use. /// +/// +/// This class does not currently include any options, but is added for future extensibility of the API. +/// [Experimental("SKEXP0001")] public class UpsertRecordOptions { From d9c31e25b15b752c12b79da6da5f9d2a6cd83d4f Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Tue, 6 Aug 2024 10:58:31 +0100 Subject: [PATCH 47/48] .Net: Fix service collection extensions return type typo. (#7897) ### Description Fix service collection extensions return type typo. ### Contribution Checklist - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone :smile: --- .../AzureAISearchServiceCollectionExtensions.cs | 6 +++--- .../PineconeServiceCollectionExtensions.cs | 4 ++-- .../QdrantServiceCollectionExtensions.cs | 4 ++-- .../RedisServiceCollectionExtensions.cs | 4 ++-- .../SemanticKernel.Core/Data/ServiceCollectionExtensions.cs | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs index f7dca74cb00b..3c55d6ade628 100644 --- a/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.AzureAISearch/AzureAISearchServiceCollectionExtensions.cs @@ -20,7 +20,7 @@ public static class AzureAISearchServiceCollectionExtensions /// The to register the on. /// Optional options to further configure the . /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { // If we are not constructing the SearchIndexClient, add the IVectorStore as transient, since we @@ -48,7 +48,7 @@ public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollec /// The credential to authenticate to Azure AI Search with. /// Optional options to further configure the . /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, Uri endpoint, TokenCredential tokenCredential, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { Verify.NotNull(endpoint); @@ -77,7 +77,7 @@ public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollec /// The credential to authenticate to Azure AI Search with. /// Optional options to further configure the . /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddAzureAISearchVectorStore(this IServiceCollection services, Uri endpoint, AzureKeyCredential credential, AzureAISearchVectorStoreOptions? options = default, string? serviceId = default) { Verify.NotNull(endpoint); diff --git a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs index eb7b8b15ff78..8473d4fbd79e 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeServiceCollectionExtensions.cs @@ -17,7 +17,7 @@ public static class PineconeServiceCollectionExtensions /// The to register the on. /// Optional options to further configure the . /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddPineconeVectorStore(this IServiceCollection services, PineconeVectorStoreOptions? options = default, string? serviceId = default) { // If we are not constructing the PineconeClient, add the IVectorStore as transient, since we @@ -44,7 +44,7 @@ public static IServiceCollection AddPineconeVectorStore(this IServiceCollection /// The api key for Pinecone. /// Optional options to further configure the . /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddPineconeVectorStore(this IServiceCollection services, string apiKey, PineconeVectorStoreOptions? options = default, string? serviceId = default) { services.AddKeyedSingleton( diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs index 1b006146aba6..b534b2ea7578 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantServiceCollectionExtensions.cs @@ -17,7 +17,7 @@ public static class QdrantServiceCollectionExtensions /// The to register the on. /// Optional options to further configure the . /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddQdrantVectorStore(this IServiceCollection services, QdrantVectorStoreOptions? options = default, string? serviceId = default) { // If we are not constructing the QdrantClient, add the IVectorStore as transient, since we @@ -46,7 +46,7 @@ public static IServiceCollection AddQdrantVectorStore(this IServiceCollection se /// The Qdrant service API key. /// Optional options to further configure the . /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddQdrantVectorStore(this IServiceCollection services, string host, int port = 6334, bool https = false, string? apiKey = default, QdrantVectorStoreOptions? options = default, string? serviceId = default) { services.AddKeyedSingleton( diff --git a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs index a2a0c93d5492..5a55b12f8c39 100644 --- a/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs +++ b/dotnet/src/Connectors/Connectors.Memory.Redis/RedisServiceCollectionExtensions.cs @@ -17,7 +17,7 @@ public static class RedisServiceCollectionExtensions /// The to register the on. /// Optional options to further configure the . /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddRedisVectorStore(this IServiceCollection services, RedisVectorStoreOptions? options = default, string? serviceId = default) { // If we are not constructing the ConnectionMultiplexer, add the IVectorStore as transient, since we @@ -44,7 +44,7 @@ public static IServiceCollection AddRedisVectorStore(this IServiceCollection ser /// The Redis connection configuration string. If not provided, an instance will be requested from the dependency injection container. /// Optional options to further configure the . /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddRedisVectorStore(this IServiceCollection services, string redisConnectionConfiguration, RedisVectorStoreOptions? options = default, string? serviceId = default) { // If we are constructing the ConnectionMultiplexer, add the IVectorStore as singleton, since we are managing the lifetime diff --git a/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs b/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs index 9d789f8ce93f..83aaf7b57af4 100644 --- a/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs +++ b/dotnet/src/SemanticKernel.Core/Data/ServiceCollectionExtensions.cs @@ -16,7 +16,7 @@ public static class ServiceCollectionExtensions /// /// The to register the on. /// An optional service id to use as the service key. - /// The kernel builder. + /// The service collection. public static IServiceCollection AddVolatileVectorStore(this IServiceCollection services, string? serviceId = default) { services.AddKeyedSingleton(serviceId); From 46c3c89f5c5dbc355794ac231b509e142f4fb770 Mon Sep 17 00:00:00 2001 From: westey <164392973+westey-m@users.noreply.github.com> Date: Tue, 6 Aug 2024 16:50:47 +0100 Subject: [PATCH 48/48] .Net: Increase connection timeout for redis integration tests. (#7901) ### Motivation and Context Integration tests for redis are intermittently failing with connection errors, so increasing the connection timeout. ### Description Increase connection timeout for redis integration tests. ### Contribution Checklist - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone :smile: --- .../Connectors/Memory/Redis/RedisVectorStoreFixture.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs index 417c74397ad5..3256cae3e79e 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Redis/RedisVectorStoreFixture.cs @@ -85,7 +85,7 @@ public async Task InitializeAsync() this._containerId = await SetupRedisContainerAsync(this._client); // Connect to redis. - ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost:6379"); + ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost:6379,connectTimeout=60000,connectRetry=5"); this.Database = redis.GetDatabase(); // Create a schema for the vector store.