Skip to content

Fix "NOT IN" operator #327

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@

- Improved log output readability in Retrievers and GraphRAG and added embedded vector to retriever result metadata for debugging.

### Fixed

- Fixed a bug where the `$nin` operator for metadata pre-filtering in retrievers would create an invalid Cypher query.


## 1.6.1

### Added
Expand Down
33 changes: 18 additions & 15 deletions examples/customize/retrievers/use_pre_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,29 @@
from neo4j_graphrag.embeddings import OpenAIEmbeddings
from neo4j_graphrag.retrievers import VectorRetriever

URI = "neo4j://localhost:7687"
AUTH = ("neo4j", "password")

INDEX_NAME = "embedding-name"
URI = "neo4j+s://demo.neo4jlabs.com"
AUTH = ("recommendations", "recommendations")
DATABASE = "recommendations"
INDEX_NAME = "moviePlotsEmbedding"
DIMENSION = 1536

# Connect to Neo4j database
driver = neo4j.GraphDatabase.driver(URI, auth=AUTH)


# Initialize the retriever
retriever = VectorRetriever(driver, INDEX_NAME, embedder=OpenAIEmbeddings())
# Connect to Neo4j database
with neo4j.GraphDatabase.driver(URI, auth=AUTH) as driver:
# Initialize the retriever
retriever = VectorRetriever(driver, INDEX_NAME, embedder=OpenAIEmbeddings())

# Perform the search
query_text = "Find me a book about Fremen"
pre_filters = {"int_property": {"$gt": 100}}
print(
retriever.search(
# Perform the search
query_text = "Find me a movie about love"
pre_filters = {"int_property": {"$gt": 100}}
# pre_filters = {
# "year": {
# "$nin": ["1999", "2000"]
# }
# }
retriever_result = retriever.search(
query_text=query_text,
top_k=1,
filters=pre_filters,
)
)
print(retriever_result)
13 changes: 7 additions & 6 deletions src/neo4j_graphrag/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ def cleaned_value(self, value: list[Union[str, int, float]]) -> Any:


class NinOperator(InOperator):
CYPHER_OPERATOR = "NOT IN"
def lhs(self, field: str) -> str:
lhs = super().lhs(field)
return f"NOT {lhs}"


class LikeOperator(Operator):
Expand All @@ -114,8 +116,8 @@ def cleaned_value(self, value: str) -> str:

class ILikeOperator(LikeOperator):
def lhs(self, field: str) -> str:
safe_field_cypher = self.safe_field_cypher(field)
return f"toLower({self.node_alias}.{safe_field_cypher})"
lhs = super().lhs(field)
return f"toLower({lhs})"

def cleaned_value(self, value: str) -> str:
value = super().cleaned_value(value)
Expand Down Expand Up @@ -368,6 +370,5 @@ def get_metadata_filter(
contains the query parameters
"""
param_store = ParameterStore()
return _construct_metadata_filter(
filter, param_store, node_alias=node_alias
), param_store.params
query = _construct_metadata_filter(filter, param_store, node_alias=node_alias)
return query, param_store.params
4 changes: 2 additions & 2 deletions tests/unit/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_single_condition_cypher_nin(param_store_empty: ParameterStore) -> None:
generated = _single_condition_cypher(
"field", NinOperator, ["a", "b", "c"], param_store=param_store_empty
)
assert generated == "node.field NOT IN $param_0"
assert generated == "NOT node.field IN $param_0"
assert param_store_empty.params == {"param_0": ["a", "b", "c"]}


Expand Down Expand Up @@ -552,7 +552,7 @@ def test_get_metadata_filter_in_operator() -> None:
def test_get_metadata_filter_not_in_operator() -> None:
filters = {"field": {"$nin": ["a", "b"]}}
query, params = get_metadata_filter(filters)
assert query == "node.field NOT IN $param_0"
assert query == "NOT node.field IN $param_0"
assert params == {"param_0": ["a", "b"]}


Expand Down