1
1
/*
2
- * Copyright 2023-2023 the original author or authors.
2
+ * Copyright 2023-2024 the original author or authors.
3
3
*
4
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
5
* you may not use this file except in compliance with the License.
57
57
* offers functionalities like adding, deleting, and performing similarity searches on
58
58
* documents.
59
59
*
60
- * The store utilizes RedisJSON and RediSearch to handle JSON documents and to index and
60
+ * The store utilizes RedisJSON and RedisSearch to handle JSON documents and to index and
61
61
* search vector data. It supports various vector algorithms (e.g., FLAT, HSNW) for
62
62
* efficient similarity searches. Additionally, it allows for custom metadata fields in
63
63
* the documents to be stored alongside the vector and content data.
68
68
* them.
69
69
*
70
70
* @author Julien Ruaux
71
+ * @author Christian Tzolov
71
72
* @see VectorStore
72
73
* @see RedisVectorStoreConfig
73
74
* @see EmbeddingClient
@@ -115,6 +116,20 @@ public static final class RedisVectorStoreConfig {
115
116
116
117
private final List <MetadataField > metadataFields ;
117
118
119
+ private RedisVectorStoreConfig () {
120
+ this (builder ());
121
+ }
122
+
123
+ private RedisVectorStoreConfig (Builder builder ) {
124
+ this .uri = builder .uri ;
125
+ this .indexName = builder .indexName ;
126
+ this .prefix = builder .prefix ;
127
+ this .contentFieldName = builder .contentFieldName ;
128
+ this .embeddingFieldName = builder .embeddingFieldName ;
129
+ this .vectorAlgorithm = builder .vectorAlgorithm ;
130
+ this .metadataFields = builder .metadataFields ;
131
+ }
132
+
118
133
/**
119
134
* Start building a new configuration.
120
135
* @return The entry point for creating a new configuration.
@@ -132,16 +147,6 @@ public static RedisVectorStoreConfig defaultConfig() {
132
147
return builder ().build ();
133
148
}
134
149
135
- private RedisVectorStoreConfig (Builder builder ) {
136
- this .uri = builder .uri ;
137
- this .indexName = builder .indexName ;
138
- this .prefix = builder .prefix ;
139
- this .contentFieldName = builder .contentFieldName ;
140
- this .embeddingFieldName = builder .embeddingFieldName ;
141
- this .vectorAlgorithm = builder .vectorAlgorithm ;
142
- this .metadataFields = builder .metadataFields ;
143
- }
144
-
145
150
public static class Builder {
146
151
147
152
private String uri = DEFAULT_URI ;
@@ -290,22 +295,23 @@ public RedisVectorStore(RedisVectorStoreConfig config, EmbeddingClient embedding
290
295
this .jedis = new JedisPooled (config .uri );
291
296
this .embeddingClient = embeddingClient ;
292
297
this .config = config ;
298
+ this .filterExpressionConverter = new RedisFilterExpressionConverter (this .config .metadataFields );
293
299
}
294
300
295
301
public JedisPooled getJedis () {
296
- return jedis ;
302
+ return this . jedis ;
297
303
}
298
304
299
305
@ Override
300
306
public void add (List <Document > documents ) {
301
- Pipeline pipeline = jedis .pipelined ();
307
+ Pipeline pipeline = this . jedis .pipelined ();
302
308
for (Document document : documents ) {
303
309
var embedding = this .embeddingClient .embed (document );
304
310
document .setEmbedding (embedding );
305
311
306
312
var fields = new HashMap <String , Object >();
307
- fields .put (config .embeddingFieldName , embedding );
308
- fields .put (config .contentFieldName , document .getContent ());
313
+ fields .put (this . config .embeddingFieldName , embedding );
314
+ fields .put (this . config .contentFieldName , document .getContent ());
309
315
fields .putAll (document .getMetadata ());
310
316
pipeline .jsonSetWithEscape (key (document .getId ()), JSON_SET_PATH , fields );
311
317
}
@@ -321,12 +327,12 @@ public void add(List<Document> documents) {
321
327
}
322
328
323
329
private String key (String id ) {
324
- return config .prefix + id ;
330
+ return this . config .prefix + id ;
325
331
}
326
332
327
333
@ Override
328
334
public Optional <Boolean > delete (List <String > idList ) {
329
- Pipeline pipeline = jedis .pipelined ();
335
+ Pipeline pipeline = this . jedis .pipelined ();
330
336
for (String id : idList ) {
331
337
pipeline .jsonDel (key (id ));
332
338
}
@@ -350,21 +356,21 @@ public List<Document> similaritySearch(SearchRequest request) {
350
356
351
357
String filter = nativeExpressionFilter (request );
352
358
353
- String queryString = String .format (QUERY_FORMAT , filter , request .getTopK (), config .embeddingFieldName ,
359
+ String queryString = String .format (QUERY_FORMAT , filter , request .getTopK (), this . config .embeddingFieldName ,
354
360
EMBEDDING_PARAM_NAME , DISTANCE_FIELD_NAME );
355
361
356
362
List <String > returnFields = new ArrayList <>();
357
- config .metadataFields .stream ().map (MetadataField ::name ).forEach (returnFields ::add );
358
- returnFields .add (config .embeddingFieldName );
359
- returnFields .add (config .contentFieldName );
363
+ this . config .metadataFields .stream ().map (MetadataField ::name ).forEach (returnFields ::add );
364
+ returnFields .add (this . config .embeddingFieldName );
365
+ returnFields .add (this . config .contentFieldName );
360
366
returnFields .add (DISTANCE_FIELD_NAME );
361
367
var embedding = toFloatArray (this .embeddingClient .embed (request .getQuery ()));
362
368
Query query = new Query (queryString ).addParam (EMBEDDING_PARAM_NAME , RediSearchUtil .toByteArray (embedding ))
363
369
.returnFields (returnFields .toArray (new String [0 ]))
364
370
.setSortBy (DISTANCE_FIELD_NAME , true )
365
371
.dialect (2 );
366
372
367
- SearchResult result = jedis .ftSearch (config .indexName , query );
373
+ SearchResult result = this . jedis .ftSearch (this . config .indexName , query );
368
374
return result .getDocuments ()
369
375
.stream ()
370
376
.filter (d -> similarityScore (d ) >= request .getSimilarityThreshold ())
@@ -373,9 +379,10 @@ public List<Document> similaritySearch(SearchRequest request) {
373
379
}
374
380
375
381
private Document toDocument (redis .clients .jedis .search .Document doc ) {
376
- var id = doc .getId ().substring (config .prefix .length ());
377
- var content = doc .hasProperty (config .contentFieldName ) ? doc .getString (config .contentFieldName ) : null ;
378
- Map <String , Object > metadata = config .metadataFields .stream ()
382
+ var id = doc .getId ().substring (this .config .prefix .length ());
383
+ var content = doc .hasProperty (this .config .contentFieldName ) ? doc .getString (this .config .contentFieldName )
384
+ : null ;
385
+ Map <String , Object > metadata = this .config .metadataFields .stream ()
379
386
.map (MetadataField ::name )
380
387
.filter (doc ::hasProperty )
381
388
.collect (Collectors .toMap (Function .identity (), doc ::getString ));
@@ -391,44 +398,41 @@ private String nativeExpressionFilter(SearchRequest request) {
391
398
if (request .getFilterExpression () == null ) {
392
399
return "*" ;
393
400
}
394
- return "(" + filterExpressionConverter .convertExpression (request .getFilterExpression ()) + ")" ;
401
+ return "(" + this . filterExpressionConverter .convertExpression (request .getFilterExpression ()) + ")" ;
395
402
}
396
403
397
404
@ Override
398
405
public void afterPropertiesSet () {
399
406
400
407
// If index already exists don't do anything
401
- if (jedis .ftList ().contains (config .indexName )) {
408
+ if (this . jedis .ftList ().contains (this . config .indexName )) {
402
409
return ;
403
410
}
404
411
405
- String response = jedis .ftCreate (config .indexName ,
406
- FTCreateParams .createParams ().on (IndexDataType .JSON ).addPrefix (config .prefix ), schemaFields ());
412
+ String response = this . jedis .ftCreate (this . config .indexName ,
413
+ FTCreateParams .createParams ().on (IndexDataType .JSON ).addPrefix (this . config .prefix ), schemaFields ());
407
414
if (!RESPONSE_OK .test (response )) {
408
415
String message = MessageFormat .format ("Could not create index: {0}" , response );
409
416
throw new RuntimeException (message );
410
417
}
411
-
412
- filterExpressionConverter = new RedisFilterExpressionConverter (config .metadataFields );
413
-
414
418
}
415
419
416
420
private Iterable <SchemaField > schemaFields () {
417
421
Map <String , Object > vectorAttrs = new HashMap <>();
418
- vectorAttrs .put ("DIM" , embeddingClient .dimensions ());
422
+ vectorAttrs .put ("DIM" , this . embeddingClient .dimensions ());
419
423
vectorAttrs .put ("DISTANCE_METRIC" , DEFAULT_DISTANCE_METRIC );
420
424
vectorAttrs .put ("TYPE" , VECTOR_TYPE_FLOAT32 );
421
425
List <SchemaField > fields = new ArrayList <>();
422
- fields .add (TextField .of (jsonPath (config .contentFieldName )).as (config .contentFieldName ).weight (1.0 ));
426
+ fields .add (TextField .of (jsonPath (this . config .contentFieldName )).as (this . config .contentFieldName ).weight (1.0 ));
423
427
fields .add (VectorField .builder ()
424
- .fieldName (jsonPath (config .embeddingFieldName ))
428
+ .fieldName (jsonPath (this . config .embeddingFieldName ))
425
429
.algorithm (vectorAlgorithm ())
426
430
.attributes (vectorAttrs )
427
- .as (config .embeddingFieldName )
431
+ .as (this . config .embeddingFieldName )
428
432
.build ());
429
433
430
- if (!CollectionUtils .isEmpty (config .metadataFields )) {
431
- for (MetadataField field : config .metadataFields ) {
434
+ if (!CollectionUtils .isEmpty (this . config .metadataFields )) {
435
+ for (MetadataField field : this . config .metadataFields ) {
432
436
fields .add (schemaField (field ));
433
437
}
434
438
}
0 commit comments