Skip to content

Commit 7b06fcf

Browse files
TheovanKraaysobychacko
authored andcommitted
Add Azure CosmosDB vector store support
- Implement core vector store module for CosmosDB integration - Add Spring Boot auto-configuration capabilities - Integrate batch processing strategy for optimized operations - Include comprehensive tests for core and auto-config modules - Add reference docs for the CosmosDB vector store support
1 parent 2c17577 commit 7b06fcf

File tree

15 files changed

+1526
-0
lines changed

15 files changed

+1526
-0
lines changed

pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
<module>document-readers/pdf-reader</module>
2828
<module>document-readers/tika-reader</module>
2929

30+
<module>vector-stores/spring-ai-azure-cosmos-db-store</module>
3031
<module>vector-stores/spring-ai-azure-store</module>
3132
<module>vector-stores/spring-ai-cassandra-store</module>
3233
<module>vector-stores/spring-ai-chroma-store</module>

spring-ai-core/src/main/java/org/springframework/ai/observation/conventions/VectorStoreProvider.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ public enum VectorStoreProvider {
3434
AZURE("azure"),
3535
CASSANDRA("cassandra"),
3636
CHROMA("chroma"),
37+
COSMOSDB("cosmosdb"),
3738
ELASTICSEARCH("elasticsearch"),
3839
GEMFIRE("gemfire"),
3940
HANA("hana"),
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
= Azure Cosmos DB
2+
3+
This section walks you through setting up `CosmosDBVectorStore` to store document embeddings and perform similarity searches.
4+
5+
== What is Azure Cosmos DB?
6+
7+
link:https://azure.microsoft.com/en-us/services/cosmos-db/[Azure Cosmos DB] is Microsoft's globally distributed cloud-native database service designed for mission-critical applications.
8+
It offers high availability, low latency, and the ability to scale horizontally to meet modern application demands.
9+
It was built from the ground up with global distribution, fine-grained multi-tenancy, and horizontal scalability at its core.
10+
It is a foundational service in Azure, used by most of Microsoft’s mission critical applications at global scale, including Teams, Skype, Xbox Live, Office 365, Bing, Azure Active Directory, Azure Portal, Microsoft Store, and many others.
11+
It is also used by thousands of external customers including OpenAI for ChatGPT and other mission-critical AI applications that require elastic scale, turnkey global distribution, and low latency and high availability across the planet.
12+
13+
== What is DiskANN?
14+
15+
DiskANN (Disk-based Approximate Nearest Neighbor Search) is an innovative technology used in Azure Cosmos DB to enhance the performance of vector searches.
16+
It enables efficient and scalable similarity searches across high-dimensional data by indexing embeddings stored in Cosmos DB.
17+
18+
DiskANN provides the following benefits:
19+
20+
* **Efficiency**: By utilizing disk-based structures, DiskANN significantly reduces the time required to find nearest neighbors compared to traditional methods.
21+
* **Scalability**: It can handle large datasets that exceed memory capacity, making it suitable for various applications, including machine learning and AI-driven solutions.
22+
* **Low Latency**: DiskANN minimizes latency during search operations, ensuring that applications can retrieve results quickly even with substantial data volumes.
23+
24+
In the context of Spring AI for Azure Cosmos DB, vector searches will create and leverage DiskANN indexes to ensure optimal performance for similarity queries.
25+
26+
== Setting up Azure Cosmos DB Vector Store with Auto Configuration
27+
28+
The following code demonstrates how to set up the `CosmosDBVectorStore` with auto-configuration:
29+
30+
```java
31+
package com.example.demo;
32+
33+
import io.micrometer.observation.ObservationRegistry;
34+
import org.slf4j.Logger;
35+
import org.slf4j.LoggerFactory;
36+
import org.springframework.ai.document.Document;
37+
import org.springframework.ai.vectorstore.SearchRequest;
38+
import org.springframework.ai.vectorstore.VectorStore;
39+
import org.springframework.beans.factory.annotation.Autowired;
40+
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
41+
import org.springframework.boot.CommandLineRunner;
42+
import org.springframework.boot.SpringApplication;
43+
import org.springframework.boot.autoconfigure.SpringBootApplication;
44+
import org.springframework.context.annotation.Bean;
45+
import org.springframework.context.annotation.Lazy;
46+
47+
import java.util.List;
48+
import java.util.Map;
49+
import java.util.UUID;
50+
51+
import static org.assertj.core.api.Assertions.assertThat;
52+
53+
@SpringBootApplication
54+
@EnableAutoConfiguration
55+
public class DemoApplication implements CommandLineRunner {
56+
57+
private static final Logger log = LoggerFactory.getLogger(DemoApplication.class);
58+
59+
@Lazy
60+
@Autowired
61+
private VectorStore vectorStore;
62+
63+
public static void main(String[] args) {
64+
SpringApplication.run(DemoApplication.class, args);
65+
}
66+
67+
@Override
68+
public void run(String... args) throws Exception {
69+
Document document1 = new Document(UUID.randomUUID().toString(), "Sample content1", Map.of("key1", "value1"));
70+
Document document2 = new Document(UUID.randomUUID().toString(), "Sample content2", Map.of("key2", "value2"));
71+
vectorStore.add(List.of(document1, document2));
72+
List<Document> results = vectorStore.similaritySearch(SearchRequest.query("Sample content").withTopK(1));
73+
74+
log.info("Search results: {}", results);
75+
76+
// Remove the documents from the vector store
77+
vectorStore.delete(List.of(document1.getId(), document2.getId()));
78+
}
79+
80+
@Bean
81+
public ObservationRegistry observationRegistry() {
82+
return ObservationRegistry.create();
83+
}
84+
}
85+
```
86+
87+
88+
== Auto Configuration
89+
90+
Add the following dependency to your Maven project:
91+
92+
[source,xml]
93+
----
94+
<dependency>
95+
<groupId>org.springframework.ai</groupId>
96+
<artifactId>spring-ai-azure-cosmos-db-store-spring-boot-starter</artifactId>
97+
</dependency>
98+
----
99+
100+
== Configuration Properties
101+
102+
The following configuration properties are available for the Cosmos DB vector store:
103+
104+
|===========================
105+
| Property | Description
106+
| spring.ai.vectorstore.cosmosdb.databaseName | The name of the Cosmos DB database to use.
107+
| spring.ai.vectorstore.cosmosdb.containerName | The name of the Cosmos DB container to use.
108+
| spring.ai.vectorstore.cosmosdb.partitionKeyPath | The path for the partition key.
109+
| spring.ai.vectorstore.cosmosdb.metadataFields | Comma-separated list of metadata fields.
110+
| spring.ai.vectorstore.cosmosdb.vectorStoreThoughput | The throughput for the vector store.
111+
| spring.ai.vectorstore.cosmosdb.vectorDimensions | The number of dimensions for the vectors.
112+
| spring.ai.vectorstore.cosmosdb.endpoint | The endpoint for the Cosmos DB.
113+
| spring.ai.vectorstore.cosmosdb.key | The key for the Cosmos DB.
114+
|===========================
115+
116+
117+
== Complex Searches with Filters
118+
119+
You can perform more complex searches using filters in the Cosmos DB vector store.
120+
Below is a sample demonstrating how to use filters in your search queries.
121+
122+
[source,java]
123+
----
124+
Map<String, Object> metadata1 = new HashMap<>();
125+
metadata1.put("country", "UK");
126+
metadata1.put("year", 2021);
127+
metadata1.put("city", "London");
128+
129+
Map<String, Object> metadata2 = new HashMap<>();
130+
metadata2.put("country", "NL");
131+
metadata2.put("year", 2022);
132+
metadata2.put("city", "Amsterdam");
133+
134+
Document document1 = new Document("1", "A document about the UK", metadata1);
135+
Document document2 = new Document("2", "A document about the Netherlands", metadata2);
136+
137+
vectorStore.add(List.of(document1, document2));
138+
139+
FilterExpressionBuilder builder = new FilterExpressionBuilder();
140+
List<Document> results = vectorStore.similaritySearch(SearchRequest.query("The World")
141+
.withTopK(10)
142+
.withFilterExpression((builder.in("country", "UK", "NL")).build()));
143+
----
144+
145+
== Setting up Azure Cosmos DB Vector Store without Auto Configuration
146+
147+
The following code demonstrates how to set up the `CosmosDBVectorStore` without relying on auto-configuration:
148+
149+
```java
150+
package com.example.demo;
151+
152+
import com.azure.cosmos.CosmosAsyncClient;
153+
import com.azure.cosmos.CosmosClientBuilder;
154+
import io.micrometer.observation.ObservationRegistry;
155+
import org.springframework.ai.document.Document;
156+
import org.springframework.ai.embedding.EmbeddingModel;
157+
import org.springframework.ai.transformers.TransformersEmbeddingModel;
158+
import org.springframework.ai.vectorstore.CosmosDBVectorStore;
159+
import org.springframework.ai.vectorstore.CosmosDBVectorStoreConfig;
160+
import org.springframework.ai.vectorstore.VectorStore;
161+
import org.springframework.beans.factory.annotation.Autowired;
162+
import org.springframework.boot.CommandLineRunner;
163+
import org.springframework.boot.SpringApplication;
164+
import org.springframework.boot.autoconfigure.SpringBootApplication;
165+
import org.springframework.context.annotation.Bean;
166+
import org.springframework.context.annotation.Lazy;
167+
168+
import java.util.List;
169+
import java.util.Map;
170+
import java.util.UUID;
171+
172+
@SpringBootApplication
173+
public class DemoApplication implements CommandLineRunner {
174+
175+
@Lazy
176+
@Autowired
177+
private VectorStore vectorStore;
178+
179+
@Lazy
180+
@Autowired
181+
private EmbeddingModel embeddingModel;
182+
183+
public static void main(String[] args) {
184+
SpringApplication.run(DemoApplication.class, args);
185+
}
186+
187+
@Override
188+
public void run(String... args) throws Exception {
189+
Document document1 = new Document(UUID.randomUUID().toString(), "Sample content1", Map.of("key1", "value1"));
190+
Document document2 = new Document(UUID.randomUUID().toString(), "Sample content2", Map.of("key2", "value2"));
191+
vectorStore.add(List.of(document1, document2));
192+
193+
List<Document> results = vectorStore.similaritySearch(SearchRequest.query("Sample content").withTopK(1));
194+
log.info("Search results: {}", results);
195+
}
196+
197+
@Bean
198+
public ObservationRegistry observationRegistry() {
199+
return ObservationRegistry.create();
200+
}
201+
202+
@Bean
203+
public VectorStore vectorStore(ObservationRegistry observationRegistry) {
204+
CosmosDBVectorStoreConfig config = new CosmosDBVectorStoreConfig();
205+
config.setDatabaseName("spring-ai-sample");
206+
config.setContainerName("container");
207+
config.setMetadataFields("country,city");
208+
config.setVectorStoreThoughput(400);
209+
210+
CosmosAsyncClient cosmosClient = new CosmosClientBuilder()
211+
.endpoint(System.getenv("COSMOSDB_AI_ENDPOINT"))
212+
.userAgentSuffix("SpringAI-CDBNoSQL-VectorStore")
213+
.key(System.getenv("COSMOSDB_AI_KEY"))
214+
.gatewayMode()
215+
.buildAsyncClient();
216+
217+
return new CosmosDBVectorStore(observationRegistry, null, cosmosClient, config, embeddingModel);
218+
}
219+
220+
@Bean
221+
public EmbeddingModel embeddingModel() {
222+
return new TransformersEmbeddingModel();
223+
}
224+
}
225+
```
226+
227+
== Manual Dependency Setup
228+
229+
Add the following dependency in your Maven project:
230+
231+
[source,xml]
232+
----
233+
<dependency>
234+
<groupId>org.springframework.ai</groupId>
235+
<artifactId>spring-ai-azure-cosmos-db-store</artifactId>
236+
</dependency>
237+
----

spring-ai-spring-boot-autoconfigure/pom.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,14 @@
387387
<optional>true</optional>
388388
</dependency>
389389

390+
<!-- Azure Cosmos DB vector store -->
391+
<dependency>
392+
<groupId>org.springframework.ai</groupId>
393+
<artifactId>spring-ai-azure-cosmos-db-store</artifactId>
394+
<version>${project.parent.version}</version>
395+
<optional>true</optional>
396+
</dependency>
397+
390398
<!-- test dependencies -->
391399

392400
<dependency>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Copyright 2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.autoconfigure.vectorstore.cosmosdb;
18+
19+
import com.azure.cosmos.CosmosClientBuilder;
20+
import org.springframework.ai.embedding.EmbeddingModel;
21+
import org.springframework.ai.vectorstore.CosmosDBVectorStore;
22+
import org.springframework.ai.vectorstore.CosmosDBVectorStoreConfig;
23+
import org.springframework.ai.vectorstore.observation.VectorStoreObservationConvention;
24+
import org.springframework.beans.factory.ObjectProvider;
25+
import org.springframework.boot.autoconfigure.AutoConfiguration;
26+
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
27+
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
28+
import org.springframework.boot.context.properties.EnableConfigurationProperties;
29+
import org.springframework.context.annotation.Bean;
30+
import com.azure.cosmos.CosmosAsyncClient;
31+
import io.micrometer.observation.ObservationRegistry;
32+
33+
/**
34+
* @author Theo van Kraay
35+
* @since 1.0.0
36+
*/
37+
38+
@AutoConfiguration
39+
@ConditionalOnClass({ CosmosDBVectorStore.class, EmbeddingModel.class, CosmosAsyncClient.class })
40+
@EnableConfigurationProperties(CosmosDBVectorStoreProperties.class)
41+
public class CosmosDBVectorStoreAutoConfiguration {
42+
43+
String endpoint;
44+
45+
String key;
46+
47+
@Bean
48+
public CosmosAsyncClient cosmosClient(CosmosDBVectorStoreProperties properties) {
49+
return new CosmosClientBuilder().endpoint(properties.getEndpoint())
50+
.userAgentSuffix("SpringAI-CDBNoSQL-VectorStore")
51+
.key(properties.getKey())
52+
.gatewayMode()
53+
.buildAsyncClient();
54+
}
55+
56+
@Bean
57+
@ConditionalOnMissingBean
58+
public CosmosDBVectorStore cosmosDBVectorStore(ObservationRegistry observationRegistry,
59+
ObjectProvider<VectorStoreObservationConvention> customObservationConvention,
60+
CosmosDBVectorStoreProperties properties, CosmosAsyncClient cosmosAsyncClient,
61+
EmbeddingModel embeddingModel) {
62+
63+
CosmosDBVectorStoreConfig config = new CosmosDBVectorStoreConfig();
64+
config.setDatabaseName(properties.getDatabaseName());
65+
config.setContainerName(properties.getContainerName());
66+
config.setMetadataFields(properties.getMetadataFields());
67+
config.setVectorStoreThoughput(properties.getVectorStoreThoughput());
68+
config.setVectorDimensions(properties.getVectorDimensions());
69+
return new CosmosDBVectorStore(observationRegistry, customObservationConvention.getIfAvailable(),
70+
cosmosAsyncClient, config, embeddingModel);
71+
}
72+
73+
}

0 commit comments

Comments
 (0)