Skip to content

Commit daf131b

Browse files
michaelsembwevertzolov
authored andcommitted
Fix column creation, when adding additional normal and embedding colums), and make index name unique (for when there are multiple vector indexes in the same keyspace)
And change stream to for-loop when converting List<Double> to Float[] for performance
1 parent a50969e commit daf131b

File tree

6 files changed

+40
-8
lines changed

6 files changed

+40
-8
lines changed

spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/cassandra/CassandraVectorStoreProperties.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public class CassandraVectorStoreProperties {
3333

3434
private String table = CassandraVectorStoreConfig.DEFAULT_TABLE_NAME;
3535

36-
private String indexName = CassandraVectorStoreConfig.DEFAULT_INDEX_NAME;
36+
private String indexName = null;
3737

3838
private String contentColumnName = CassandraVectorStoreConfig.DEFAULT_CONTENT_COLUMN_NAME;
3939

spring-ai-spring-boot-autoconfigure/src/test/java/org/springframework/ai/autoconfigure/vectorstore/cassandra/CassandraVectorStorePropertiesTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ void defaultValues() {
3434
assertThat(props.getTable()).isEqualTo(CassandraVectorStoreConfig.DEFAULT_TABLE_NAME);
3535
assertThat(props.getContentColumnName()).isEqualTo(CassandraVectorStoreConfig.DEFAULT_CONTENT_COLUMN_NAME);
3636
assertThat(props.getEmbeddingColumnName()).isEqualTo(CassandraVectorStoreConfig.DEFAULT_EMBEDDING_COLUMN_NAME);
37-
assertThat(props.getIndexName()).isEqualTo(CassandraVectorStoreConfig.DEFAULT_INDEX_NAME);
37+
assertThat(props.getIndexName()).isNull();
3838
assertThat(props.getDisallowSchemaCreation()).isFalse();
3939
assertThat(props.getFixedThreadPoolExecutorSize())
4040
.isEqualTo(CassandraVectorStoreConfig.DEFAULT_ADD_CONCURRENCY);

vector-stores/spring-ai-cassandra/src/main/java/org/springframework/ai/vectorstore/CassandraVectorStore.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ public Optional<Boolean> delete(List<String> idList) {
206206
@Override
207207
public List<Document> similaritySearch(SearchRequest request) {
208208
Preconditions.checkArgument(request.getTopK() <= 1000);
209-
var embedding = this.embeddingClient.embed(request.getQuery()).stream().map(Double::floatValue).toList();
209+
var embedding = toFloatArray(this.embeddingClient.embed(request.getQuery()));
210210
CqlVector<Float> cqlVector = CqlVector.newInstance(embedding);
211211

212212
String whereClause = "";
@@ -350,4 +350,13 @@ private String getDocumentId(Row row) {
350350
return this.conf.primaryKeyTranslator.apply(primaryKeyValues);
351351
}
352352

353+
private static Float[] toFloatArray(List<Double> embeddingDouble) {
354+
Float[] embeddingFloat = new Float[embeddingDouble.size()];
355+
int i = 0;
356+
for (Double d : embeddingDouble) {
357+
embeddingFloat[i++] = d.floatValue();
358+
}
359+
return embeddingFloat;
360+
}
361+
353362
}

vector-stores/spring-ai-cassandra/src/main/java/org/springframework/ai/vectorstore/CassandraVectorStoreConfig.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,12 @@
4444
import com.datastax.oss.driver.api.querybuilder.schema.CreateTableStart;
4545
import com.datastax.oss.driver.shaded.guava.common.annotations.VisibleForTesting;
4646
import com.datastax.oss.driver.shaded.guava.common.base.Preconditions;
47+
4748
import org.slf4j.Logger;
4849
import org.slf4j.LoggerFactory;
4950

51+
import org.springframework.lang.Nullable;
52+
5053
/**
5154
* Configuration for the Cassandra vector store.
5255
*
@@ -71,7 +74,7 @@ public final class CassandraVectorStoreConfig implements AutoCloseable {
7174

7275
public static final String DEFAULT_ID_NAME = "id";
7376

74-
public static final String DEFAULT_INDEX_NAME = "embedding_index";
77+
public static final String DEFAULT_INDEX_SUFFIX = "idx";
7578

7679
public static final String DEFAULT_CONTENT_COLUMN_NAME = "content";
7780

@@ -186,7 +189,7 @@ public static class Builder {
186189

187190
private List<SchemaColumn> clusteringKeys = List.of();
188191

189-
private String indexName = DEFAULT_INDEX_NAME;
192+
private String indexName = null;
190193

191194
private String contentColumnName = DEFAULT_CONTENT_COLUMN_NAME;
192195

@@ -257,6 +260,8 @@ public Builder withClusteringKeys(List<SchemaColumn> clusteringKeys) {
257260
return this;
258261
}
259262

263+
/** defaults (if null) to '<table_name>_<embedding_column_name>_idx' **/
264+
@Nullable
260265
public Builder withIndexName(String name) {
261266
this.indexName = name;
262267
return this;
@@ -324,6 +329,9 @@ public Builder withPrimaryKeyTranslator(PrimaryKeyTranslator primaryKeyTranslato
324329
}
325330

326331
public CassandraVectorStoreConfig build() {
332+
if (null == this.indexName) {
333+
this.indexName = String.format("%s_%s_%s", this.table, this.embeddingColumnName, DEFAULT_INDEX_SUFFIX);
334+
}
327335
for (SchemaColumn metadata : this.metadataColumns) {
328336

329337
Preconditions.checkArgument(
@@ -530,7 +538,7 @@ private void ensureTableColumnsExist(int vectorDimension) {
530538
// special case for embedding column, bc JAVA-3118, as above
531539
StringBuilder alterTableStmt = new StringBuilder(((BuildableQuery) alterTable).asCql());
532540
if (newColumns.isEmpty() && !addContent) {
533-
alterTableStmt.append(" ADD ");
541+
alterTableStmt.append(" ADD (");
534542
}
535543
else {
536544
alterTableStmt.setLength(alterTableStmt.length() - 1);
@@ -539,7 +547,7 @@ private void ensureTableColumnsExist(int vectorDimension) {
539547
alterTableStmt.append(this.schema.embedding)
540548
.append(" vector<float,")
541549
.append(vectorDimension)
542-
.append(">");
550+
.append(">)");
543551

544552
logger.debug("Executing {}", alterTableStmt.toString());
545553
this.session.execute(alterTableStmt.toString());

vector-stores/spring-ai-cassandra/src/test/java/org/springframework/ai/vectorstore/CassandraRichSchemaVectorStoreIT.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ void ensureSchemaNoCreation() {
134134
@Test
135135
void ensureSchemaPartialCreation() {
136136
this.contextRunner.run(context -> {
137-
for (int i = 0; i < 4; ++i) {
137+
int PARTIAL_FILES = 5;
138+
for (int i = 0; i < PARTIAL_FILES; ++i) {
138139
executeCqlFile(context, format("test_wiki_partial_%d_schema.cql", i));
139140
var wrapper = createStore(context, List.of(), false, false);
140141
try {
@@ -148,6 +149,10 @@ void ensureSchemaPartialCreation() {
148149
wrapper.store().close();
149150
}
150151
}
152+
// make sure there's not more files to test
153+
Assertions.assertThrows(IOException.class, () -> {
154+
executeCqlFile(context, format("test_wiki_partial_%d_schema.cql", PARTIAL_FILES));
155+
});
151156
});
152157
}
153158

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
CREATE KEYSPACE IF NOT EXISTS test_wikidata WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};
2+
3+
CREATE TABLE IF NOT EXISTS test_wikidata.articles (
4+
wiki text,
5+
language text,
6+
title text,
7+
chunk_no int,
8+
messages text,
9+
PRIMARY KEY ((wiki, language, title), chunk_no)
10+
);

0 commit comments

Comments
 (0)