diff --git a/CHANGELOG.md b/CHANGELOG.md index 2abf20061..fcec45486 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,11 @@ - Improved log output readability in Retrievers and GraphRAG and added embedded vector to retriever result metadata for debugging. +### Fixed + +- Fixed a bug where the `$nin` operator for metadata pre-filtering in retrievers would create an invalid Cypher query. + + ## 1.6.1 ### Added diff --git a/examples/customize/retrievers/use_pre_filters.py b/examples/customize/retrievers/use_pre_filters.py index cc8c5e2fb..6e381cf3e 100644 --- a/examples/customize/retrievers/use_pre_filters.py +++ b/examples/customize/retrievers/use_pre_filters.py @@ -4,26 +4,29 @@ from neo4j_graphrag.embeddings import OpenAIEmbeddings from neo4j_graphrag.retrievers import VectorRetriever -URI = "neo4j://localhost:7687" -AUTH = ("neo4j", "password") - -INDEX_NAME = "embedding-name" +URI = "neo4j+s://demo.neo4jlabs.com" +AUTH = ("recommendations", "recommendations") +DATABASE = "recommendations" +INDEX_NAME = "moviePlotsEmbedding" DIMENSION = 1536 -# Connect to Neo4j database -driver = neo4j.GraphDatabase.driver(URI, auth=AUTH) - -# Initialize the retriever -retriever = VectorRetriever(driver, INDEX_NAME, embedder=OpenAIEmbeddings()) +# Connect to Neo4j database +with neo4j.GraphDatabase.driver(URI, auth=AUTH) as driver: + # Initialize the retriever + retriever = VectorRetriever(driver, INDEX_NAME, embedder=OpenAIEmbeddings()) -# Perform the search -query_text = "Find me a book about Fremen" -pre_filters = {"int_property": {"$gt": 100}} -print( - retriever.search( + # Perform the search + query_text = "Find me a movie about love" + pre_filters = {"int_property": {"$gt": 100}} + # pre_filters = { + # "year": { + # "$nin": ["1999", "2000"] + # } + # } + retriever_result = retriever.search( query_text=query_text, top_k=1, filters=pre_filters, ) -) + print(retriever_result) diff --git a/src/neo4j_graphrag/filters.py b/src/neo4j_graphrag/filters.py index 2431a12b0..ed51863e8 100644 --- a/src/neo4j_graphrag/filters.py +++ b/src/neo4j_graphrag/filters.py @@ -100,7 +100,9 @@ def cleaned_value(self, value: list[Union[str, int, float]]) -> Any: class NinOperator(InOperator): - CYPHER_OPERATOR = "NOT IN" + def lhs(self, field: str) -> str: + lhs = super().lhs(field) + return f"NOT {lhs}" class LikeOperator(Operator): @@ -114,8 +116,8 @@ def cleaned_value(self, value: str) -> str: class ILikeOperator(LikeOperator): def lhs(self, field: str) -> str: - safe_field_cypher = self.safe_field_cypher(field) - return f"toLower({self.node_alias}.{safe_field_cypher})" + lhs = super().lhs(field) + return f"toLower({lhs})" def cleaned_value(self, value: str) -> str: value = super().cleaned_value(value) @@ -368,6 +370,5 @@ def get_metadata_filter( contains the query parameters """ param_store = ParameterStore() - return _construct_metadata_filter( - filter, param_store, node_alias=node_alias - ), param_store.params + query = _construct_metadata_filter(filter, param_store, node_alias=node_alias) + return query, param_store.params diff --git a/tests/unit/test_filters.py b/tests/unit/test_filters.py index 14065ccec..a62168d9f 100644 --- a/tests/unit/test_filters.py +++ b/tests/unit/test_filters.py @@ -165,7 +165,7 @@ def test_single_condition_cypher_nin(param_store_empty: ParameterStore) -> None: generated = _single_condition_cypher( "field", NinOperator, ["a", "b", "c"], param_store=param_store_empty ) - assert generated == "node.field NOT IN $param_0" + assert generated == "NOT node.field IN $param_0" assert param_store_empty.params == {"param_0": ["a", "b", "c"]} @@ -552,7 +552,7 @@ def test_get_metadata_filter_in_operator() -> None: def test_get_metadata_filter_not_in_operator() -> None: filters = {"field": {"$nin": ["a", "b"]}} query, params = get_metadata_filter(filters) - assert query == "node.field NOT IN $param_0" + assert query == "NOT node.field IN $param_0" assert params == {"param_0": ["a", "b"]}