|
| 1 | += Azure Cosmos DB |
| 2 | + |
| 3 | +This section walks you through setting up `CosmosDBVectorStore` to store document embeddings and perform similarity searches. |
| 4 | + |
| 5 | +== What is Azure Cosmos DB? |
| 6 | + |
| 7 | +link:https://azure.microsoft.com/en-us/services/cosmos-db/[Azure Cosmos DB] is Microsoft's globally distributed cloud-native database service designed for mission-critical applications. |
| 8 | +It offers high availability, low latency, and the ability to scale horizontally to meet modern application demands. |
| 9 | +It was built from the ground up with global distribution, fine-grained multi-tenancy, and horizontal scalability at its core. |
| 10 | +It is a foundational service in Azure, used by most of Microsoft’s mission critical applications at global scale, including Teams, Skype, Xbox Live, Office 365, Bing, Azure Active Directory, Azure Portal, Microsoft Store, and many others. |
| 11 | +It is also used by thousands of external customers including OpenAI for ChatGPT and other mission-critical AI applications that require elastic scale, turnkey global distribution, and low latency and high availability across the planet. |
| 12 | + |
| 13 | +== What is DiskANN? |
| 14 | + |
| 15 | +DiskANN (Disk-based Approximate Nearest Neighbor Search) is an innovative technology used in Azure Cosmos DB to enhance the performance of vector searches. |
| 16 | +It enables efficient and scalable similarity searches across high-dimensional data by indexing embeddings stored in Cosmos DB. |
| 17 | + |
| 18 | +DiskANN provides the following benefits: |
| 19 | + |
| 20 | +* **Efficiency**: By utilizing disk-based structures, DiskANN significantly reduces the time required to find nearest neighbors compared to traditional methods. |
| 21 | +* **Scalability**: It can handle large datasets that exceed memory capacity, making it suitable for various applications, including machine learning and AI-driven solutions. |
| 22 | +* **Low Latency**: DiskANN minimizes latency during search operations, ensuring that applications can retrieve results quickly even with substantial data volumes. |
| 23 | + |
| 24 | +In the context of Spring AI for Azure Cosmos DB, vector searches will create and leverage DiskANN indexes to ensure optimal performance for similarity queries. |
| 25 | + |
| 26 | +== Setting up Azure Cosmos DB Vector Store with Auto Configuration |
| 27 | + |
| 28 | +The following code demonstrates how to set up the `CosmosDBVectorStore` with auto-configuration: |
| 29 | + |
| 30 | +```java |
| 31 | +package com.example.demo; |
| 32 | + |
| 33 | +import io.micrometer.observation.ObservationRegistry; |
| 34 | +import org.slf4j.Logger; |
| 35 | +import org.slf4j.LoggerFactory; |
| 36 | +import org.springframework.ai.document.Document; |
| 37 | +import org.springframework.ai.vectorstore.SearchRequest; |
| 38 | +import org.springframework.ai.vectorstore.VectorStore; |
| 39 | +import org.springframework.beans.factory.annotation.Autowired; |
| 40 | +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; |
| 41 | +import org.springframework.boot.CommandLineRunner; |
| 42 | +import org.springframework.boot.SpringApplication; |
| 43 | +import org.springframework.boot.autoconfigure.SpringBootApplication; |
| 44 | +import org.springframework.context.annotation.Bean; |
| 45 | +import org.springframework.context.annotation.Lazy; |
| 46 | + |
| 47 | +import java.util.List; |
| 48 | +import java.util.Map; |
| 49 | +import java.util.UUID; |
| 50 | + |
| 51 | +import static org.assertj.core.api.Assertions.assertThat; |
| 52 | + |
| 53 | +@SpringBootApplication |
| 54 | +@EnableAutoConfiguration |
| 55 | +public class DemoApplication implements CommandLineRunner { |
| 56 | + |
| 57 | + private static final Logger log = LoggerFactory.getLogger(DemoApplication.class); |
| 58 | + |
| 59 | + @Lazy |
| 60 | + @Autowired |
| 61 | + private VectorStore vectorStore; |
| 62 | + |
| 63 | + public static void main(String[] args) { |
| 64 | + SpringApplication.run(DemoApplication.class, args); |
| 65 | + } |
| 66 | + |
| 67 | + @Override |
| 68 | + public void run(String... args) throws Exception { |
| 69 | + Document document1 = new Document(UUID.randomUUID().toString(), "Sample content1", Map.of("key1", "value1")); |
| 70 | + Document document2 = new Document(UUID.randomUUID().toString(), "Sample content2", Map.of("key2", "value2")); |
| 71 | + vectorStore.add(List.of(document1, document2)); |
| 72 | + List<Document> results = vectorStore.similaritySearch(SearchRequest.query("Sample content").withTopK(1)); |
| 73 | + |
| 74 | + log.info("Search results: {}", results); |
| 75 | + |
| 76 | + // Remove the documents from the vector store |
| 77 | + vectorStore.delete(List.of(document1.getId(), document2.getId())); |
| 78 | + } |
| 79 | + |
| 80 | + @Bean |
| 81 | + public ObservationRegistry observationRegistry() { |
| 82 | + return ObservationRegistry.create(); |
| 83 | + } |
| 84 | +} |
| 85 | +``` |
| 86 | + |
| 87 | + |
| 88 | +== Auto Configuration |
| 89 | + |
| 90 | +Add the following dependency to your Maven project: |
| 91 | + |
| 92 | +[source,xml] |
| 93 | +---- |
| 94 | +<dependency> |
| 95 | + <groupId>org.springframework.ai</groupId> |
| 96 | + <artifactId>spring-ai-azure-cosmos-db-store-spring-boot-starter</artifactId> |
| 97 | +</dependency> |
| 98 | +---- |
| 99 | + |
| 100 | +== Configuration Properties |
| 101 | + |
| 102 | +The following configuration properties are available for the Cosmos DB vector store: |
| 103 | + |
| 104 | +|=========================== |
| 105 | +| Property | Description |
| 106 | +| spring.ai.vectorstore.cosmosdb.databaseName | The name of the Cosmos DB database to use. |
| 107 | +| spring.ai.vectorstore.cosmosdb.containerName | The name of the Cosmos DB container to use. |
| 108 | +| spring.ai.vectorstore.cosmosdb.partitionKeyPath | The path for the partition key. |
| 109 | +| spring.ai.vectorstore.cosmosdb.metadataFields | Comma-separated list of metadata fields. |
| 110 | +| spring.ai.vectorstore.cosmosdb.vectorStoreThoughput | The throughput for the vector store. |
| 111 | +| spring.ai.vectorstore.cosmosdb.vectorDimensions | The number of dimensions for the vectors. |
| 112 | +| spring.ai.vectorstore.cosmosdb.endpoint | The endpoint for the Cosmos DB. |
| 113 | +| spring.ai.vectorstore.cosmosdb.key | The key for the Cosmos DB. |
| 114 | +|=========================== |
| 115 | + |
| 116 | + |
| 117 | +== Complex Searches with Filters |
| 118 | + |
| 119 | +You can perform more complex searches using filters in the Cosmos DB vector store. |
| 120 | +Below is a sample demonstrating how to use filters in your search queries. |
| 121 | + |
| 122 | +[source,java] |
| 123 | +---- |
| 124 | +Map<String, Object> metadata1 = new HashMap<>(); |
| 125 | +metadata1.put("country", "UK"); |
| 126 | +metadata1.put("year", 2021); |
| 127 | +metadata1.put("city", "London"); |
| 128 | +
|
| 129 | +Map<String, Object> metadata2 = new HashMap<>(); |
| 130 | +metadata2.put("country", "NL"); |
| 131 | +metadata2.put("year", 2022); |
| 132 | +metadata2.put("city", "Amsterdam"); |
| 133 | +
|
| 134 | +Document document1 = new Document("1", "A document about the UK", metadata1); |
| 135 | +Document document2 = new Document("2", "A document about the Netherlands", metadata2); |
| 136 | +
|
| 137 | +vectorStore.add(List.of(document1, document2)); |
| 138 | +
|
| 139 | +FilterExpressionBuilder builder = new FilterExpressionBuilder(); |
| 140 | +List<Document> results = vectorStore.similaritySearch(SearchRequest.query("The World") |
| 141 | + .withTopK(10) |
| 142 | + .withFilterExpression((builder.in("country", "UK", "NL")).build())); |
| 143 | +---- |
| 144 | + |
| 145 | +== Setting up Azure Cosmos DB Vector Store without Auto Configuration |
| 146 | + |
| 147 | +The following code demonstrates how to set up the `CosmosDBVectorStore` without relying on auto-configuration: |
| 148 | + |
| 149 | +```java |
| 150 | +package com.example.demo; |
| 151 | + |
| 152 | +import com.azure.cosmos.CosmosAsyncClient; |
| 153 | +import com.azure.cosmos.CosmosClientBuilder; |
| 154 | +import io.micrometer.observation.ObservationRegistry; |
| 155 | +import org.springframework.ai.document.Document; |
| 156 | +import org.springframework.ai.embedding.EmbeddingModel; |
| 157 | +import org.springframework.ai.transformers.TransformersEmbeddingModel; |
| 158 | +import org.springframework.ai.vectorstore.CosmosDBVectorStore; |
| 159 | +import org.springframework.ai.vectorstore.CosmosDBVectorStoreConfig; |
| 160 | +import org.springframework.ai.vectorstore.VectorStore; |
| 161 | +import org.springframework.beans.factory.annotation.Autowired; |
| 162 | +import org.springframework.boot.CommandLineRunner; |
| 163 | +import org.springframework.boot.SpringApplication; |
| 164 | +import org.springframework.boot.autoconfigure.SpringBootApplication; |
| 165 | +import org.springframework.context.annotation.Bean; |
| 166 | +import org.springframework.context.annotation.Lazy; |
| 167 | + |
| 168 | +import java.util.List; |
| 169 | +import java.util.Map; |
| 170 | +import java.util.UUID; |
| 171 | + |
| 172 | +@SpringBootApplication |
| 173 | +public class DemoApplication implements CommandLineRunner { |
| 174 | + |
| 175 | + @Lazy |
| 176 | + @Autowired |
| 177 | + private VectorStore vectorStore; |
| 178 | + |
| 179 | + @Lazy |
| 180 | + @Autowired |
| 181 | + private EmbeddingModel embeddingModel; |
| 182 | + |
| 183 | + public static void main(String[] args) { |
| 184 | + SpringApplication.run(DemoApplication.class, args); |
| 185 | + } |
| 186 | + |
| 187 | + @Override |
| 188 | + public void run(String... args) throws Exception { |
| 189 | + Document document1 = new Document(UUID.randomUUID().toString(), "Sample content1", Map.of("key1", "value1")); |
| 190 | + Document document2 = new Document(UUID.randomUUID().toString(), "Sample content2", Map.of("key2", "value2")); |
| 191 | + vectorStore.add(List.of(document1, document2)); |
| 192 | + |
| 193 | + List<Document> results = vectorStore.similaritySearch(SearchRequest.query("Sample content").withTopK(1)); |
| 194 | + log.info("Search results: {}", results); |
| 195 | + } |
| 196 | + |
| 197 | + @Bean |
| 198 | + public ObservationRegistry observationRegistry() { |
| 199 | + return ObservationRegistry.create(); |
| 200 | + } |
| 201 | + |
| 202 | + @Bean |
| 203 | + public VectorStore vectorStore(ObservationRegistry observationRegistry) { |
| 204 | + CosmosDBVectorStoreConfig config = new CosmosDBVectorStoreConfig(); |
| 205 | + config.setDatabaseName("spring-ai-sample"); |
| 206 | + config.setContainerName("container"); |
| 207 | + config.setMetadataFields("country,city"); |
| 208 | + config.setVectorStoreThoughput(400); |
| 209 | + |
| 210 | + CosmosAsyncClient cosmosClient = new CosmosClientBuilder() |
| 211 | + .endpoint(System.getenv("COSMOSDB_AI_ENDPOINT")) |
| 212 | + .userAgentSuffix("SpringAI-CDBNoSQL-VectorStore") |
| 213 | + .key(System.getenv("COSMOSDB_AI_KEY")) |
| 214 | + .gatewayMode() |
| 215 | + .buildAsyncClient(); |
| 216 | + |
| 217 | + return new CosmosDBVectorStore(observationRegistry, null, cosmosClient, config, embeddingModel); |
| 218 | + } |
| 219 | + |
| 220 | + @Bean |
| 221 | + public EmbeddingModel embeddingModel() { |
| 222 | + return new TransformersEmbeddingModel(); |
| 223 | + } |
| 224 | +} |
| 225 | +``` |
| 226 | + |
| 227 | +== Manual Dependency Setup |
| 228 | + |
| 229 | +Add the following dependency in your Maven project: |
| 230 | + |
| 231 | +[source,xml] |
| 232 | +---- |
| 233 | +<dependency> |
| 234 | + <groupId>org.springframework.ai</groupId> |
| 235 | + <artifactId>spring-ai-azure-cosmos-db-store</artifactId> |
| 236 | +</dependency> |
| 237 | +---- |
0 commit comments