diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b70d1a82..8123110bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## Next +### Added +- Introduced SearchQueryParseError for handling invalid Lucene query strings in HybridRetriever and HybridCypherRetriever. + ## 1.5.0 ### Added diff --git a/docs/source/api.rst b/docs/source/api.rst index b94985098..f27bf3af7 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -445,6 +445,16 @@ Errors * :class:`neo4j_graphrag.exceptions.LLMGenerationError` + * :class:`neo4j_graphrag.exceptions.SchemaValidationError` + + * :class:`neo4j_graphrag.exceptions.PdfLoaderError` + + * :class:`neo4j_graphrag.exceptions.PromptMissingPlaceholderError` + + * :class:`neo4j_graphrag.exceptions.InvalidHybridSearchRankerError` + + * :class:`neo4j_graphrag.exceptions.SearchQueryParseError` + * :class:`neo4j_graphrag.experimental.pipeline.exceptions.PipelineDefinitionError` * :class:`neo4j_graphrag.experimental.pipeline.exceptions.PipelineMissingDependencyError` @@ -559,6 +569,41 @@ LLMGenerationError :show-inheritance: +SchemaValidationError +===================== + +.. autoclass:: neo4j_graphrag.exceptions.SchemaValidationError + :show-inheritance: + + +PdfLoaderError +============== + +.. autoclass:: neo4j_graphrag.exceptions.PdfLoaderError + :show-inheritance: + + +PromptMissingPlaceholderError +============================= + +.. autoclass:: neo4j_graphrag.exceptions.PromptMissingPlaceholderError + :show-inheritance: + + +InvalidHybridSearchRankerError +============================== + +.. autoclass:: neo4j_graphrag.exceptions.InvalidHybridSearchRankerError + :show-inheritance: + + +SearchQueryParseError +===================== + +.. autoclass:: neo4j_graphrag.exceptions.SearchQueryParseError + :show-inheritance: + + PipelineDefinitionError ======================= diff --git a/src/neo4j_graphrag/exceptions.py b/src/neo4j_graphrag/exceptions.py index bde7cfe83..3c0fdc0b3 100644 --- a/src/neo4j_graphrag/exceptions.py +++ b/src/neo4j_graphrag/exceptions.py @@ -128,3 +128,7 @@ class PromptMissingPlaceholderError(Neo4jGraphRagError): class InvalidHybridSearchRankerError(Neo4jGraphRagError): """Exception raised when an invalid ranker type for Hybrid Search is provided.""" + + +class SearchQueryParseError(Neo4jGraphRagError): + """Exception raised when there is a query parse error in the text search string.""" diff --git a/src/neo4j_graphrag/retrievers/hybrid.py b/src/neo4j_graphrag/retrievers/hybrid.py index 2edd5449a..fb257bf94 100644 --- a/src/neo4j_graphrag/retrievers/hybrid.py +++ b/src/neo4j_graphrag/retrievers/hybrid.py @@ -26,6 +26,7 @@ EmbeddingRequiredError, RetrieverInitializationError, SearchValidationError, + SearchQueryParseError, ) from neo4j_graphrag.neo4j_queries import get_search_query from neo4j_graphrag.retrievers.base import Retriever @@ -218,12 +219,19 @@ def get_search_results( logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters) logger.debug("HybridRetriever Cypher query: %s", search_query) - records, _, _ = self.driver.execute_query( - search_query, - parameters, - database_=self.neo4j_database, - routing_=neo4j.RoutingControl.READ, - ) + try: + records, _, _ = self.driver.execute_query( + search_query, + parameters, + database_=self.neo4j_database, + routing_=neo4j.RoutingControl.READ, + ) + except neo4j.exceptions.ClientError as e: + if "org.apache.lucene.queryparser.classic.ParseException" in str(e): + raise SearchQueryParseError( + f"Invalid Lucene query generated from query_text: {query_text}" + ) from e + raise return RawSearchResult( records=records, ) @@ -395,12 +403,19 @@ def get_search_results( logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters) logger.debug("HybridRetriever Cypher query: %s", search_query) - records, _, _ = self.driver.execute_query( - search_query, - parameters, - database_=self.neo4j_database, - routing_=neo4j.RoutingControl.READ, - ) + try: + records, _, _ = self.driver.execute_query( + search_query, + parameters, + database_=self.neo4j_database, + routing_=neo4j.RoutingControl.READ, + ) + except neo4j.exceptions.ClientError as e: + if "org.apache.lucene.queryparser.classic.ParseException" in str(e): + raise SearchQueryParseError( + f"Invalid Lucene query generated from query_text: {query_text}" + ) from e + raise return RawSearchResult( records=records, ) diff --git a/tests/unit/retrievers/test_hybrid.py b/tests/unit/retrievers/test_hybrid.py index d38f1ab10..7a6fc505b 100644 --- a/tests/unit/retrievers/test_hybrid.py +++ b/tests/unit/retrievers/test_hybrid.py @@ -21,6 +21,7 @@ EmbeddingRequiredError, RetrieverInitializationError, SearchValidationError, + SearchQueryParseError, ) from neo4j_graphrag.neo4j_queries import get_search_query from neo4j_graphrag.retrievers import HybridCypherRetriever, HybridRetriever @@ -793,3 +794,73 @@ def test_hybrid_cypher_linear_ranker( ], metadata={"__retriever": "HybridCypherRetriever"}, ) + + +@patch("neo4j_graphrag.retrievers.HybridRetriever._fetch_index_infos") +@patch("neo4j_graphrag.retrievers.base.get_version") +def test_hybrid_retriever_invalid_lucene_query_error( + mock_get_version: MagicMock, + _fetch_index_infos_mock: MagicMock, + driver: MagicMock, + embedder: MagicMock, +) -> None: + mock_get_version.return_value = ((5, 23, 0), False, False) + + error_message = ( + "Failed to invoke procedure `db.index.fulltext.queryNodes`: " + "Caused by: org.apache.lucene.queryparser.classic.ParseException: " + 'Encountered " "~aliens " at line 1, column 0.' + ) + client_error = neo4j.exceptions.ClientError(error_message) + driver.execute_query.side_effect = client_error + + retriever = HybridRetriever( + driver=driver, + vector_index_name="vector-index", + fulltext_index_name="fulltext-index", + embedder=embedder, + ) + retriever.neo4j_version_is_5_23_or_above = True + retriever._embedding_node_property = "embedding" + + with pytest.raises( + SearchQueryParseError, match="Invalid Lucene query generated from query_text" + ): + retriever.search(query_text="~aliens", top_k=5) + + +@patch("neo4j_graphrag.retrievers.HybridCypherRetriever._fetch_index_infos") +@patch("neo4j_graphrag.retrievers.base.get_version") +def test_hybrid_cypher_retriever_invalid_lucene_query_error( + mock_get_version: MagicMock, + _fetch_index_infos_mock: MagicMock, + driver: MagicMock, + embedder: MagicMock, +) -> None: + mock_get_version.return_value = ((5, 23, 0), False, False) + retrieval_query = """ + RETURN node.id AS node_id, node.text AS text, score, {test: $param} AS metadata + """ + + error_message = ( + "Failed to invoke procedure `db.index.fulltext.queryNodes`: " + "Caused by: org.apache.lucene.queryparser.classic.ParseException: " + 'Encountered " "~aliens " at line 1, column 0.' + ) + client_error = neo4j.exceptions.ClientError(error_message) + driver.execute_query.side_effect = client_error + + retriever = HybridCypherRetriever( + driver=driver, + vector_index_name="vector-index", + fulltext_index_name="fulltext-index", + embedder=embedder, + retrieval_query=retrieval_query, + ) + retriever.neo4j_version_is_5_23_or_above = True + retriever._embedding_node_property = "embedding" + + with pytest.raises( + SearchQueryParseError, match="Invalid Lucene query generated from query_text" + ): + retriever.search(query_text="~aliens", top_k=5)