From bd53ecc129be3550be9778a430d71a793726b0a9 Mon Sep 17 00:00:00 2001 From: "AD101\\z004nm6m" Date: Fri, 13 Jun 2025 15:45:40 +0000 Subject: [PATCH 1/4] added new test case --- tests/unit_tests/fixtures/metadata_filtering_data.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/unit_tests/fixtures/metadata_filtering_data.py b/tests/unit_tests/fixtures/metadata_filtering_data.py index 62a114d1..d115aa19 100644 --- a/tests/unit_tests/fixtures/metadata_filtering_data.py +++ b/tests/unit_tests/fixtures/metadata_filtering_data.py @@ -210,7 +210,7 @@ {"name": {"$nin": ["Smart Fitness Tracker", "Stainless Steel Water Bottle"]}}, ["WH001", "EC002"], ), - ## with numeric fields + # with numeric fields ( {"available_quantity": {"$nin": [50, 0, 10]}}, ["FT004"], @@ -225,6 +225,11 @@ {"name": {"$like": "%less%"}}, # adam and jane ["WH001", "WB003"], ), + # Test combination of $like and $and + ( + {"$or": [{"code": {"$like": "WH001"}}, {"code": {"$like": "EC002"}}]}, + ["WH001", "EC002"], + ), # These involve the special operator $exists ( {"tags": {"$exists": False}}, From 997a3100f4b2e619a7dbe8c46c982745c0a264e7 Mon Sep 17 00:00:00 2001 From: "AD101\\z004nm6m" Date: Fri, 13 Jun 2025 15:46:58 +0000 Subject: [PATCH 2/4] Fixed async_vectostore filter function --- langchain_postgres/v2/async_vectorstore.py | 65 +++++++++++----------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/langchain_postgres/v2/async_vectorstore.py b/langchain_postgres/v2/async_vectorstore.py index f07082c9..85e50216 100644 --- a/langchain_postgres/v2/async_vectorstore.py +++ b/langchain_postgres/v2/async_vectorstore.py @@ -303,7 +303,8 @@ async def aadd_embeddings( values_stmt = "VALUES (:langchain_id, :content, :embedding" if not embedding and can_inline_embed: - values_stmt = f"VALUES (:langchain_id, :content, {self.embedding_service.embed_query_inline(content)}" # type: ignore + # type: ignore + values_stmt = f"VALUES (:langchain_id, :content, {self.embedding_service.embed_query_inline(content)}" if self.hybrid_search_config and self.hybrid_search_config.tsv_column: lang = ( @@ -1095,46 +1096,48 @@ def _handle_field_filter( operator = "$eq" filter_value = value + suffix_id = str(uuid.uuid4()).split("-")[0] if operator in COMPARISONS_TO_NATIVE: # Then we implement an equality filter # native is trusted input native = COMPARISONS_TO_NATIVE[operator] - id = str(uuid.uuid4()).split("-")[0] - return f"{field} {native} :{field}_{id}", {f"{field}_{id}": filter_value} + param_name = f"{field}_{suffix_id}" + return f"{field} {native} :{param_name}", {f"{param_name}": filter_value} elif operator == "$between": # Use AND with two comparisons low, high = filter_value - - return f"({field} BETWEEN :{field}_low AND :{field}_high)", { - f"{field}_low": low, - f"{field}_high": high, + low_param_name = f"{field}_low_{suffix_id}" + high_param_name = f"{field}_high_{suffix_id}" + return f"({field} BETWEEN :{low_param_name} AND :{high_param_name})", { + f"{low_param_name}": low, + f"{high_param_name}": high, } - elif operator in {"$in", "$nin", "$like", "$ilike"}: + elif operator in {"$in", "$nin"}: # We'll do force coercion to text - if operator in {"$in", "$nin"}: - for val in filter_value: - if not isinstance(val, (str, int, float)): - raise NotImplementedError( - f"Unsupported type: {type(val)} for value: {val}" - ) - - if isinstance(val, bool): # b/c bool is an instance of int - raise NotImplementedError( - f"Unsupported type: {type(val)} for value: {val}" - ) - - if operator in {"$in"}: - return f"{field} = ANY(:{field}_in)", {f"{field}_in": filter_value} - elif operator in {"$nin"}: - return f"{field} <> ALL (:{field}_nin)", {f"{field}_nin": filter_value} - elif operator in {"$like"}: - return f"({field} LIKE :{field}_like)", {f"{field}_like": filter_value} - elif operator in {"$ilike"}: - return f"({field} ILIKE :{field}_ilike)", { - f"{field}_ilike": filter_value + for val in filter_value: + if not isinstance(val, (str, int, float)): + raise NotImplementedError( + f"Unsupported type: {type(val)} for value: {val}" + ) + + if isinstance(val, bool): # b/c bool is an instance of int + raise NotImplementedError( + f"Unsupported type: {type(val)} for value: {val}" + ) + param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}" + if operator == "$in": + return f"{field} = ANY(:{param_name})", {f"{param_name}": filter_value} + else: # i.e. $nin + return f"{field} <> ALL (:{param_name})", { + f"{param_name}": filter_value } - else: - raise NotImplementedError() + + elif operator in {"$like", "$ilike"}: + param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}" + if operator == "$like": + return f"({field} LIKE :{param_name})", {f"{param_name}": filter_value} + else: # i.e. $ilike + return f"({field} ILIKE :{param_name})", {f"{param_name}": filter_value} elif operator == "$exists": if not isinstance(filter_value, bool): raise ValueError( From 2e292f6fb10fe80ab49f996731599b3f89ff8bc5 Mon Sep 17 00:00:00 2001 From: "AD101\\z004nm6m" Date: Fri, 13 Jun 2025 15:54:22 +0000 Subject: [PATCH 3/4] Added wildcards to the testcase value --- tests/unit_tests/fixtures/metadata_filtering_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/fixtures/metadata_filtering_data.py b/tests/unit_tests/fixtures/metadata_filtering_data.py index d115aa19..8df8c01c 100644 --- a/tests/unit_tests/fixtures/metadata_filtering_data.py +++ b/tests/unit_tests/fixtures/metadata_filtering_data.py @@ -227,7 +227,7 @@ ), # Test combination of $like and $and ( - {"$or": [{"code": {"$like": "WH001"}}, {"code": {"$like": "EC002"}}]}, + {"$or": [{"code": {"$like": "WH00%"}}, {"code": {"$like": "EC00%"}}]}, ["WH001", "EC002"], ), # These involve the special operator $exists From 6bb304b7e437c90cd0db1297b87aaa79bba1650c Mon Sep 17 00:00:00 2001 From: "AD101\\z004nm6m" Date: Fri, 13 Jun 2025 20:12:30 +0000 Subject: [PATCH 4/4] moved comment to original position --- langchain_postgres/v2/async_vectorstore.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/langchain_postgres/v2/async_vectorstore.py b/langchain_postgres/v2/async_vectorstore.py index 85e50216..fd0bfd75 100644 --- a/langchain_postgres/v2/async_vectorstore.py +++ b/langchain_postgres/v2/async_vectorstore.py @@ -303,8 +303,7 @@ async def aadd_embeddings( values_stmt = "VALUES (:langchain_id, :content, :embedding" if not embedding and can_inline_embed: - # type: ignore - values_stmt = f"VALUES (:langchain_id, :content, {self.embedding_service.embed_query_inline(content)}" + values_stmt = f"VALUES (:langchain_id, :content, {self.embedding_service.embed_query_inline(content)}" # type: ignore if self.hybrid_search_config and self.hybrid_search_config.tsv_column: lang = (