From 4d5af5a580b724a910cff0b1be01b4c5fff182ce Mon Sep 17 00:00:00 2001 From: Will Tai Date: Wed, 26 Feb 2025 13:55:44 +0000 Subject: [PATCH 1/4] Raise SearchQueryParseError when HybridRetriever and HybridCypherRetriever encounters invalid Lucene string --- src/neo4j_graphrag/exceptions.py | 4 ++ src/neo4j_graphrag/retrievers/hybrid.py | 39 +++++++++----- tests/unit/retrievers/test_hybrid.py | 71 +++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 12 deletions(-) diff --git a/src/neo4j_graphrag/exceptions.py b/src/neo4j_graphrag/exceptions.py index bde7cfe83..3c0fdc0b3 100644 --- a/src/neo4j_graphrag/exceptions.py +++ b/src/neo4j_graphrag/exceptions.py @@ -128,3 +128,7 @@ class PromptMissingPlaceholderError(Neo4jGraphRagError): class InvalidHybridSearchRankerError(Neo4jGraphRagError): """Exception raised when an invalid ranker type for Hybrid Search is provided.""" + + +class SearchQueryParseError(Neo4jGraphRagError): + """Exception raised when there is a query parse error in the text search string.""" diff --git a/src/neo4j_graphrag/retrievers/hybrid.py b/src/neo4j_graphrag/retrievers/hybrid.py index 2edd5449a..fb257bf94 100644 --- a/src/neo4j_graphrag/retrievers/hybrid.py +++ b/src/neo4j_graphrag/retrievers/hybrid.py @@ -26,6 +26,7 @@ EmbeddingRequiredError, RetrieverInitializationError, SearchValidationError, + SearchQueryParseError, ) from neo4j_graphrag.neo4j_queries import get_search_query from neo4j_graphrag.retrievers.base import Retriever @@ -218,12 +219,19 @@ def get_search_results( logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters) logger.debug("HybridRetriever Cypher query: %s", search_query) - records, _, _ = self.driver.execute_query( - search_query, - parameters, - database_=self.neo4j_database, - routing_=neo4j.RoutingControl.READ, - ) + try: + records, _, _ = self.driver.execute_query( + search_query, + parameters, + database_=self.neo4j_database, + routing_=neo4j.RoutingControl.READ, + ) + except neo4j.exceptions.ClientError as e: + if "org.apache.lucene.queryparser.classic.ParseException" in str(e): + raise SearchQueryParseError( + f"Invalid Lucene query generated from query_text: {query_text}" + ) from e + raise return RawSearchResult( records=records, ) @@ -395,12 +403,19 @@ def get_search_results( logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters) logger.debug("HybridRetriever Cypher query: %s", search_query) - records, _, _ = self.driver.execute_query( - search_query, - parameters, - database_=self.neo4j_database, - routing_=neo4j.RoutingControl.READ, - ) + try: + records, _, _ = self.driver.execute_query( + search_query, + parameters, + database_=self.neo4j_database, + routing_=neo4j.RoutingControl.READ, + ) + except neo4j.exceptions.ClientError as e: + if "org.apache.lucene.queryparser.classic.ParseException" in str(e): + raise SearchQueryParseError( + f"Invalid Lucene query generated from query_text: {query_text}" + ) from e + raise return RawSearchResult( records=records, ) diff --git a/tests/unit/retrievers/test_hybrid.py b/tests/unit/retrievers/test_hybrid.py index d38f1ab10..7a6fc505b 100644 --- a/tests/unit/retrievers/test_hybrid.py +++ b/tests/unit/retrievers/test_hybrid.py @@ -21,6 +21,7 @@ EmbeddingRequiredError, RetrieverInitializationError, SearchValidationError, + SearchQueryParseError, ) from neo4j_graphrag.neo4j_queries import get_search_query from neo4j_graphrag.retrievers import HybridCypherRetriever, HybridRetriever @@ -793,3 +794,73 @@ def test_hybrid_cypher_linear_ranker( ], metadata={"__retriever": "HybridCypherRetriever"}, ) + + +@patch("neo4j_graphrag.retrievers.HybridRetriever._fetch_index_infos") +@patch("neo4j_graphrag.retrievers.base.get_version") +def test_hybrid_retriever_invalid_lucene_query_error( + mock_get_version: MagicMock, + _fetch_index_infos_mock: MagicMock, + driver: MagicMock, + embedder: MagicMock, +) -> None: + mock_get_version.return_value = ((5, 23, 0), False, False) + + error_message = ( + "Failed to invoke procedure `db.index.fulltext.queryNodes`: " + "Caused by: org.apache.lucene.queryparser.classic.ParseException: " + 'Encountered " "~aliens " at line 1, column 0.' + ) + client_error = neo4j.exceptions.ClientError(error_message) + driver.execute_query.side_effect = client_error + + retriever = HybridRetriever( + driver=driver, + vector_index_name="vector-index", + fulltext_index_name="fulltext-index", + embedder=embedder, + ) + retriever.neo4j_version_is_5_23_or_above = True + retriever._embedding_node_property = "embedding" + + with pytest.raises( + SearchQueryParseError, match="Invalid Lucene query generated from query_text" + ): + retriever.search(query_text="~aliens", top_k=5) + + +@patch("neo4j_graphrag.retrievers.HybridCypherRetriever._fetch_index_infos") +@patch("neo4j_graphrag.retrievers.base.get_version") +def test_hybrid_cypher_retriever_invalid_lucene_query_error( + mock_get_version: MagicMock, + _fetch_index_infos_mock: MagicMock, + driver: MagicMock, + embedder: MagicMock, +) -> None: + mock_get_version.return_value = ((5, 23, 0), False, False) + retrieval_query = """ + RETURN node.id AS node_id, node.text AS text, score, {test: $param} AS metadata + """ + + error_message = ( + "Failed to invoke procedure `db.index.fulltext.queryNodes`: " + "Caused by: org.apache.lucene.queryparser.classic.ParseException: " + 'Encountered " "~aliens " at line 1, column 0.' + ) + client_error = neo4j.exceptions.ClientError(error_message) + driver.execute_query.side_effect = client_error + + retriever = HybridCypherRetriever( + driver=driver, + vector_index_name="vector-index", + fulltext_index_name="fulltext-index", + embedder=embedder, + retrieval_query=retrieval_query, + ) + retriever.neo4j_version_is_5_23_or_above = True + retriever._embedding_node_property = "embedding" + + with pytest.raises( + SearchQueryParseError, match="Invalid Lucene query generated from query_text" + ): + retriever.search(query_text="~aliens", top_k=5) From 4e36222abbfedd9d123c7463ea8b16406a5264b6 Mon Sep 17 00:00:00 2001 From: Will Tai Date: Wed, 26 Feb 2025 14:23:39 +0000 Subject: [PATCH 2/4] Update Documentation for exceptions --- docs/Makefile | 2 +- docs/source/api.rst | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/docs/Makefile b/docs/Makefile index d2403af1f..8b4991a81 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -2,7 +2,7 @@ # # You can set these variables from the command line. -VERSION = SNAPSHOT +VERSION = 1.4.0 PRODUCT = neo4j-graphrag-python SPHINXOPTS = SPHINXBUILD = poetry run sphinx-build diff --git a/docs/source/api.rst b/docs/source/api.rst index b94985098..f27bf3af7 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -445,6 +445,16 @@ Errors * :class:`neo4j_graphrag.exceptions.LLMGenerationError` + * :class:`neo4j_graphrag.exceptions.SchemaValidationError` + + * :class:`neo4j_graphrag.exceptions.PdfLoaderError` + + * :class:`neo4j_graphrag.exceptions.PromptMissingPlaceholderError` + + * :class:`neo4j_graphrag.exceptions.InvalidHybridSearchRankerError` + + * :class:`neo4j_graphrag.exceptions.SearchQueryParseError` + * :class:`neo4j_graphrag.experimental.pipeline.exceptions.PipelineDefinitionError` * :class:`neo4j_graphrag.experimental.pipeline.exceptions.PipelineMissingDependencyError` @@ -559,6 +569,41 @@ LLMGenerationError :show-inheritance: +SchemaValidationError +===================== + +.. autoclass:: neo4j_graphrag.exceptions.SchemaValidationError + :show-inheritance: + + +PdfLoaderError +============== + +.. autoclass:: neo4j_graphrag.exceptions.PdfLoaderError + :show-inheritance: + + +PromptMissingPlaceholderError +============================= + +.. autoclass:: neo4j_graphrag.exceptions.PromptMissingPlaceholderError + :show-inheritance: + + +InvalidHybridSearchRankerError +============================== + +.. autoclass:: neo4j_graphrag.exceptions.InvalidHybridSearchRankerError + :show-inheritance: + + +SearchQueryParseError +===================== + +.. autoclass:: neo4j_graphrag.exceptions.SearchQueryParseError + :show-inheritance: + + PipelineDefinitionError ======================= From 78565273d8a006a0572d51ceacaa3329bed9edd6 Mon Sep 17 00:00:00 2001 From: Will Tai Date: Wed, 26 Feb 2025 14:26:10 +0000 Subject: [PATCH 3/4] Update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b70d1a82..8123110bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## Next +### Added +- Introduced SearchQueryParseError for handling invalid Lucene query strings in HybridRetriever and HybridCypherRetriever. + ## 1.5.0 ### Added From 1c42743630cbda7c43de2fb3dd6bc936e8eb33d4 Mon Sep 17 00:00:00 2001 From: Will Tai Date: Fri, 28 Feb 2025 11:28:13 +0000 Subject: [PATCH 4/4] Revert docs Makefile version to SNAPSHOT --- docs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Makefile b/docs/Makefile index 8b4991a81..d2403af1f 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -2,7 +2,7 @@ # # You can set these variables from the command line. -VERSION = 1.4.0 +VERSION = SNAPSHOT PRODUCT = neo4j-graphrag-python SPHINXOPTS = SPHINXBUILD = poetry run sphinx-build