Skip to content

Commit a84256a

Browse files
Validate documents for text/media check when adding to vector store (#3687)
- Since the non-text documents aren't supported, we need a validation to throw exception if non text documents are encountered - Add tests - Add test to QdrantVectorStoreIT with the usecase provided by @devMtn30 - Add simple vector store test for the validation Fixes #3609 Auto-cherry-pick to 1.0.x Signed-off-by: Ilayaperumal Gopinathan <ilayaperumal.gopinathan@broadcom.com>
1 parent 3f4b5d4 commit a84256a

File tree

3 files changed

+48
-1
lines changed

3 files changed

+48
-1
lines changed

spring-ai-vector-store/src/main/java/org/springframework/ai/vectorstore/observation/AbstractObservationVectorStore.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public AbstractObservationVectorStore(AbstractVectorStoreBuilder<?> builder) {
7474
*/
7575
@Override
7676
public void add(List<Document> documents) {
77-
77+
validateNonTextDocuments(documents);
7878
VectorStoreObservationContext observationContext = this
7979
.createObservationContextBuilder(VectorStoreObservationContext.Operation.ADD.value())
8080
.build();
@@ -85,6 +85,17 @@ public void add(List<Document> documents) {
8585
.observe(() -> this.doAdd(documents));
8686
}
8787

88+
private void validateNonTextDocuments(List<Document> documents) {
89+
if (documents == null)
90+
return;
91+
for (Document document : documents) {
92+
if (document != null && !document.isText()) {
93+
throw new IllegalArgumentException(
94+
"Only text documents are supported for now. One of the documents contains non-text content.");
95+
}
96+
}
97+
}
98+
8899
@Override
89100
public void delete(List<String> deleteDocIds) {
90101

spring-ai-vector-store/src/test/java/org/springframework/ai/vectorstore/SimpleVectorStoreTests.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,18 @@
3232
import org.junit.jupiter.api.io.CleanupMode;
3333
import org.junit.jupiter.api.io.TempDir;
3434

35+
import org.springframework.ai.content.Media;
3536
import org.springframework.ai.document.Document;
3637
import org.springframework.ai.embedding.EmbeddingModel;
38+
import org.springframework.core.io.ByteArrayResource;
3739
import org.springframework.core.io.Resource;
40+
import org.springframework.util.MimeType;
3841

3942
import static org.assertj.core.api.Assertions.assertThat;
4043
import static org.assertj.core.api.Assertions.assertThatThrownBy;
4144
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
45+
import static org.junit.jupiter.api.Assertions.assertEquals;
46+
import static org.junit.jupiter.api.Assertions.assertThrows;
4247
import static org.mockito.ArgumentMatchers.any;
4348
import static org.mockito.Mockito.mock;
4449
import static org.mockito.Mockito.when;
@@ -259,4 +264,15 @@ void shouldHandleNullVectors() {
259264
.hasMessage("Vectors must not be null");
260265
}
261266

267+
@Test
268+
void shouldFailNonTextDocuments() {
269+
Media media = new Media(MimeType.valueOf("image/png"), new ByteArrayResource(new byte[] { 0x00 }));
270+
271+
Document imgDoc = Document.builder().media(media).metadata(Map.of("fileName", "pixel.png")).build();
272+
273+
Exception exception = assertThrows(IllegalArgumentException.class, () -> this.vectorStore.add(List.of(imgDoc)));
274+
assertEquals("Only text documents are supported for now. One of the documents contains non-text content.",
275+
exception.getMessage());
276+
}
277+
262278
}

vector-stores/spring-ai-qdrant-store/src/test/java/org/springframework/ai/vectorstore/qdrant/QdrantVectorStoreIT.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.testcontainers.junit.jupiter.Testcontainers;
3737
import org.testcontainers.qdrant.QdrantContainer;
3838

39+
import org.springframework.ai.content.Media;
3940
import org.springframework.ai.document.Document;
4041
import org.springframework.ai.document.DocumentMetadata;
4142
import org.springframework.ai.embedding.EmbeddingModel;
@@ -48,8 +49,12 @@
4849
import org.springframework.boot.SpringBootConfiguration;
4950
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
5051
import org.springframework.context.annotation.Bean;
52+
import org.springframework.core.io.ByteArrayResource;
53+
import org.springframework.util.MimeType;
5154

5255
import static org.assertj.core.api.Assertions.assertThat;
56+
import static org.junit.jupiter.api.Assertions.assertEquals;
57+
import static org.junit.jupiter.api.Assertions.assertThrows;
5358

5459
/**
5560
* @author Anush Shetty
@@ -58,6 +63,7 @@
5863
* @author Thomas Vitale
5964
* @author Soby Chacko
6065
* @author Jonghoon Park
66+
* @author Kim San
6167
* @since 0.8.1
6268
*/
6369
@Testcontainers
@@ -333,6 +339,20 @@ void shouldConvertLongToString() {
333339
});
334340
}
335341

342+
@Test
343+
void testNonTextDocuments() {
344+
this.contextRunner.run(context -> {
345+
QdrantVectorStore vectorStore = context.getBean(QdrantVectorStore.class);
346+
Media media = new Media(MimeType.valueOf("image/png"), new ByteArrayResource(new byte[] { 0x00 }));
347+
348+
Document imgDoc = Document.builder().media(media).metadata(Map.of("fileName", "pixel.png")).build();
349+
350+
Exception exception = assertThrows(IllegalArgumentException.class, () -> vectorStore.add(List.of(imgDoc)));
351+
assertEquals("Only text documents are supported for now. One of the documents contains non-text content.",
352+
exception.getMessage());
353+
});
354+
}
355+
336356
@SpringBootConfiguration
337357
public static class TestApplication {
338358

0 commit comments

Comments
 (0)