Skip to content

Commit fa8f246

Browse files
sobychackoilayaperumalg
authored andcommitted
GH-3160: Handle element types correctly in CassandraFilterExpressionConverter.doValue
Fixes: 3160 #3160 When using a filter expression with IN operator on a collection field in CassandraVectorStore.similaritySearch, a ClassCastException was thrown because the code attempted to format individual collection elements using the collection's codec instead of the element type's codec. This fix modifies doValue to detect when we are formatting elements inside a collection type and use the appropriate element type codec. While Cassandra does not support using the IN operator directly on collection columns, this fix ensures we generate syntactically correct CQL rather than throwing a Java exception. The change specifically addresses ListType collections by using the element type codec for individual elements within the list. Signed-off-by: Soby Chacko <soby.chacko@broadcom.com>
1 parent 30add80 commit fa8f246

File tree

2 files changed

+67
-1
lines changed

2 files changed

+67
-1
lines changed

vector-stores/spring-ai-cassandra-store/src/main/java/org/springframework/ai/vectorstore/cassandra/CassandraFilterExpressionConverter.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
import java.util.stream.Collectors;
2424

2525
import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata;
26+
import com.datastax.oss.driver.api.core.type.DataType;
2627
import com.datastax.oss.driver.api.core.type.DataTypes;
28+
import com.datastax.oss.driver.api.core.type.ListType;
2729
import com.datastax.oss.driver.api.core.type.codec.registry.CodecRegistry;
2830
import com.datastax.oss.driver.shaded.guava.common.base.Preconditions;
2931

@@ -118,10 +120,19 @@ private void doListValue(ColumnMetadata column, Object v, StringBuilder context)
118120
}
119121

120122
private void doValue(ColumnMetadata column, Object v, StringBuilder context) {
123+
124+
DataType dataType = column.getType();
125+
126+
// Check if we're handling an element inside a collection for an IN clause
127+
if ((dataType instanceof ListType) && !(v instanceof Collection)) {
128+
// Extract the element type from the collection type
129+
dataType = ((ListType) dataType).getElementType();
130+
}
131+
121132
if (DataTypes.SMALLINT.equals(column.getType())) {
122133
v = ((Number) v).shortValue();
123134
}
124-
context.append(CodecRegistry.DEFAULT.codecFor(column.getType()).format(v));
135+
context.append(CodecRegistry.DEFAULT.codecFor(dataType).format(v));
125136
}
126137

127138
private Optional<ColumnMetadata> getColumn(String name) {

vector-stores/spring-ai-cassandra-store/src/test/java/org/springframework/ai/vectorstore/cassandra/CassandraVectorStoreIT.java

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,61 @@ void getNativeClientTest() {
522522
});
523523
}
524524

525+
@Test
526+
void searchWithCollectionFilter() {
527+
this.contextRunner.run(context -> {
528+
try (CassandraVectorStore store = createTestStore(context,
529+
new SchemaColumn("currencies", DataTypes.listOf(DataTypes.TEXT), SchemaColumnTags.INDEXED))) {
530+
531+
// Create test documents with different currency lists
532+
var btcDocument = new Document("BTC_doc", "Bitcoin document", Map.of("currencies", List.of("BTC")));
533+
var ethDocument = new Document("ETH_doc", "Ethereum document", Map.of("currencies", List.of("ETH")));
534+
var multiCurrencyDocument = new Document("MULTI_doc", "Multi-currency document",
535+
Map.of("currencies", List.of("BTC", "ETH", "SOL")));
536+
537+
store.add(List.of(btcDocument, ethDocument, multiCurrencyDocument));
538+
539+
// Verify initial state
540+
List<Document> results = store
541+
.similaritySearch(SearchRequest.builder().query("document").topK(5).build());
542+
assertThat(results).hasSize(3);
543+
544+
try {
545+
// Test filtering with IN operator on a collection field
546+
Filter.Expression filterExpression = new Filter.Expression(Filter.ExpressionType.IN,
547+
new Filter.Key("currencies"), new Filter.Value(List.of("BTC")));
548+
549+
// Search using programmatic filter
550+
store.similaritySearch(SearchRequest.builder()
551+
.query("document")
552+
.topK(5)
553+
.similarityThresholdAll()
554+
.filterExpression(filterExpression)
555+
.build());
556+
557+
// If we get here without an exception, it means Cassandra
558+
// unexpectedly accepted the query,
559+
// which is surprising since Cassandra doesn't support the IN operator
560+
// on collection columns.
561+
// This would indicate a potential change in Cassandra's behavior.
562+
Assertions.fail("Expected InvalidQueryException from Cassandra");
563+
}
564+
catch (InvalidQueryException e) {
565+
// This is the expected outcome: Cassandra rejects the query with a
566+
// specific error
567+
// indicating that collection columns cannot be used with IN
568+
// operators, which is
569+
// a documented limitation of Cassandra's query language. Support for
570+
// collection
571+
// filtering via CONTAINS would be needed for this type of query to
572+
// work.
573+
assertThat(e.getMessage()).contains("Collection column 'currencies'");
574+
assertThat(e.getMessage()).contains("cannot be restricted by a 'IN' relation");
575+
}
576+
}
577+
});
578+
}
579+
525580
@Test
526581
void throwsExceptionOnInvalidIndexNameWithSchemaValidation() {
527582
this.contextRunner.run(context -> {

0 commit comments

Comments
 (0)