Skip to content

Fixed support for VectorStore search filters with multiple $like/$ilike/$in/$nin on same column #217

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 34 additions & 31 deletions langchain_postgres/v2/async_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ async def aadd_embeddings(
values_stmt = "VALUES (:langchain_id, :content, :embedding"

if not embedding and can_inline_embed:
values_stmt = f"VALUES (:langchain_id, :content, {self.embedding_service.embed_query_inline(content)}" # type: ignore
# type: ignore
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you move the comment back to the original location in order for the lint to pass?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Thank you

values_stmt = f"VALUES (:langchain_id, :content, {self.embedding_service.embed_query_inline(content)}"

if self.hybrid_search_config and self.hybrid_search_config.tsv_column:
lang = (
Expand Down Expand Up @@ -1095,46 +1096,48 @@ def _handle_field_filter(
operator = "$eq"
filter_value = value

suffix_id = str(uuid.uuid4()).split("-")[0]
if operator in COMPARISONS_TO_NATIVE:
# Then we implement an equality filter
# native is trusted input
native = COMPARISONS_TO_NATIVE[operator]
id = str(uuid.uuid4()).split("-")[0]
return f"{field} {native} :{field}_{id}", {f"{field}_{id}": filter_value}
param_name = f"{field}_{suffix_id}"
return f"{field} {native} :{param_name}", {f"{param_name}": filter_value}
elif operator == "$between":
# Use AND with two comparisons
low, high = filter_value

return f"({field} BETWEEN :{field}_low AND :{field}_high)", {
f"{field}_low": low,
f"{field}_high": high,
low_param_name = f"{field}_low_{suffix_id}"
high_param_name = f"{field}_high_{suffix_id}"
return f"({field} BETWEEN :{low_param_name} AND :{high_param_name})", {
f"{low_param_name}": low,
f"{high_param_name}": high,
}
elif operator in {"$in", "$nin", "$like", "$ilike"}:
elif operator in {"$in", "$nin"}:
# We'll do force coercion to text
if operator in {"$in", "$nin"}:
for val in filter_value:
if not isinstance(val, (str, int, float)):
raise NotImplementedError(
f"Unsupported type: {type(val)} for value: {val}"
)

if isinstance(val, bool): # b/c bool is an instance of int
raise NotImplementedError(
f"Unsupported type: {type(val)} for value: {val}"
)

if operator in {"$in"}:
return f"{field} = ANY(:{field}_in)", {f"{field}_in": filter_value}
elif operator in {"$nin"}:
return f"{field} <> ALL (:{field}_nin)", {f"{field}_nin": filter_value}
elif operator in {"$like"}:
return f"({field} LIKE :{field}_like)", {f"{field}_like": filter_value}
elif operator in {"$ilike"}:
return f"({field} ILIKE :{field}_ilike)", {
f"{field}_ilike": filter_value
for val in filter_value:
if not isinstance(val, (str, int, float)):
raise NotImplementedError(
f"Unsupported type: {type(val)} for value: {val}"
)

if isinstance(val, bool): # b/c bool is an instance of int
raise NotImplementedError(
f"Unsupported type: {type(val)} for value: {val}"
)
param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
if operator == "$in":
return f"{field} = ANY(:{param_name})", {f"{param_name}": filter_value}
else: # i.e. $nin
return f"{field} <> ALL (:{param_name})", {
f"{param_name}": filter_value
}
else:
raise NotImplementedError()

elif operator in {"$like", "$ilike"}:
param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
if operator == "$like":
return f"({field} LIKE :{param_name})", {f"{param_name}": filter_value}
else: # i.e. $ilike
return f"({field} ILIKE :{param_name})", {f"{param_name}": filter_value}
elif operator == "$exists":
if not isinstance(filter_value, bool):
raise ValueError(
Expand Down
7 changes: 6 additions & 1 deletion tests/unit_tests/fixtures/metadata_filtering_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@
{"name": {"$nin": ["Smart Fitness Tracker", "Stainless Steel Water Bottle"]}},
["WH001", "EC002"],
),
## with numeric fields
# with numeric fields
(
{"available_quantity": {"$nin": [50, 0, 10]}},
["FT004"],
Expand All @@ -225,6 +225,11 @@
{"name": {"$like": "%less%"}}, # adam and jane
["WH001", "WB003"],
),
# Test combination of $like and $and
(
{"$or": [{"code": {"$like": "WH00%"}}, {"code": {"$like": "EC00%"}}]},
["WH001", "EC002"],
),
# These involve the special operator $exists
(
{"tags": {"$exists": False}},
Expand Down
Loading