Skip to content

Commit c3dd57e

Browse files
committed
Fix escaping issue in filterExpression for RedisVectorStore file name filtering
1 parent 5b55d59 commit c3dd57e

File tree

1 file changed

+30
-19
lines changed

1 file changed

+30
-19
lines changed

spring-ai-vector-store/src/main/java/org/springframework/ai/vectorstore/SearchRequest.java

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,14 @@
1717
package org.springframework.ai.vectorstore;
1818

1919
import java.util.Objects;
20+
import java.util.Map;
2021

2122
import org.springframework.ai.document.Document;
2223
import org.springframework.ai.vectorstore.filter.Filter;
2324
import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
2425
import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser;
2526
import org.springframework.lang.Nullable;
2627
import org.springframework.util.Assert;
27-
import java.util.regex.Pattern;
28-
import java.util.regex.Matcher;
2928

3029
/**
3130
* Similarity search request. Use the {@link SearchRequest#builder()} to create the
@@ -61,6 +60,8 @@ public class SearchRequest {
6160
@Nullable
6261
private Filter.Expression filterExpression;
6362

63+
private static final Map<Character, String> ESCAPE_TEXT = Map.of('\\', "\\\\", '.', "\\.");
64+
6465
/**
6566
* Copy an existing {@link SearchRequest.Builder} instance.
6667
* @param originalSearchRequest {@link SearchRequest} instance to copy.
@@ -193,7 +194,7 @@ public Builder similarityThresholdAll() {
193194
/**
194195
* Retrieves documents by query embedding similarity and matching the filters.
195196
* Value of 'null' means that no metadata filters will be applied to the search.
196-
*
197+
* <p>
197198
* For example if the {@link Document#getMetadata()} schema is:
198199
*
199200
* <pre>{@code
@@ -205,7 +206,7 @@ public Builder similarityThresholdAll() {
205206
* "isActive": <Boolean>
206207
* &#125;
207208
* }</pre>
208-
*
209+
* <p>
209210
* you can constrain the search result to only UK countries with isActive=true and
210211
* year equal or greater 2020. You can build this such metadata filter
211212
* programmatically like this:
@@ -217,10 +218,10 @@ public Builder similarityThresholdAll() {
217218
* new Expression(GTE, new Key("year"), new Value(2020)),
218219
* new Expression(EQ, new Key("isActive"), new Value(true))));
219220
* }</pre>
220-
*
221+
* <p>
221222
* The {@link Filter.Expression} is portable across all vector stores.<br/>
222-
*
223-
*
223+
* <p>
224+
* <p>
224225
* The {@link FilterExpressionBuilder} is a DSL creating expressions
225226
* programmatically:
226227
*
@@ -232,7 +233,7 @@ public Builder similarityThresholdAll() {
232233
* b.gte("year", 2020),
233234
* b.eq("isActive", true)));
234235
* }</pre>
235-
*
236+
* <p>
236237
* The {@link FilterExpressionTextParser} converts textual, SQL like filter
237238
* expression language into {@link Filter.Expression}:
238239
*
@@ -262,21 +263,21 @@ public Builder filterExpression(@Nullable Filter.Expression expression) {
262263
* "isActive": <Boolean>
263264
* &#125;
264265
* }</pre>
265-
*
266+
* <p>
266267
* then you can constrain the search result with metadata filter expressions like:
267268
*
268269
* <pre>{@code
269270
* country == 'UK' && year >= 2020 && isActive == true
270271
* Or
271272
* country == 'BG' && (city NOT IN ['Sofia', 'Plovdiv'] || price < 134.34)
272273
* }</pre>
273-
*
274+
* <p>
274275
* This ensures that the response contains only embeddings that match the
275276
* specified filer criteria. <br/>
276-
*
277+
* <p>
277278
* The declarative, SQL like, filter syntax is portable across all vector stores
278279
* supporting the filter search feature.<br/>
279-
*
280+
* <p>
280281
* The {@link FilterExpressionTextParser} is used to convert the text filter
281282
* expression into {@link Filter.Expression}.
282283
* @param textExpression declarative, portable, SQL like, metadata filter syntax.
@@ -290,14 +291,24 @@ public Builder filterExpression(@Nullable String textExpression) {
290291
}
291292

292293
private String escapeTextExpression(String expression) {
293-
Pattern pattern = Pattern.compile("'([^']*)'");
294-
Matcher matcher = pattern.matcher(expression);
295-
StringBuffer sb = new StringBuffer();
296-
while (matcher.find()) {
297-
String content = matcher.group(1).replace("\\", "\\\\").replace(".", "\\.");
298-
matcher.appendReplacement(sb, "'" + content + "'");
294+
StringBuilder sb = new StringBuilder(expression.length() + 8);
295+
boolean inQuote = false;
296+
297+
for (int i = 0; i < expression.length(); i++) {
298+
char ch = expression.charAt(i);
299+
300+
if (ch == '\'') {
301+
inQuote = !inQuote;
302+
sb.append(ch);
303+
}
304+
else if (inQuote) {
305+
sb.append(ESCAPE_TEXT.getOrDefault(ch, String.valueOf(ch)));
306+
}
307+
else {
308+
sb.append(ch);
309+
}
299310
}
300-
matcher.appendTail(sb);
311+
301312
return sb.toString();
302313
}
303314

0 commit comments

Comments
 (0)