diff --git a/CHANGELOG.md b/CHANGELOG.md index 78b445549..d276f17cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Next +### Changed + +- Improved log output readability in Retrievers and GraphRAG and added embedded vector to retriever result metadata for debugging. + ## 1.6.1 ### Added @@ -12,6 +16,7 @@ - Added `enforce_schema` parameter to `SimpleKGPipeline` for optional schema enforcement. + ## 1.6.0 ### Added @@ -19,7 +24,6 @@ - Added optional schema enforcement as a validation layer after entity and relation extraction. - Introduced a linear hybrid search ranker for HybridRetriever and HybridCypherRetriever, allowing customizable ranking with an `alpha` parameter. - Introduced SearchQueryParseError for handling invalid Lucene query strings in HybridRetriever and HybridCypherRetriever. -- Components can now be called with the `run_with_context` method that gets an extra `context_` argument containing information about the pipeline it's run from: the `run_id`, `task_name` and a `notify` function that can be used to send `TASK_PROGRESS` events to the same callback as the pipeline events. ### Fixed diff --git a/src/neo4j_graphrag/generation/graphrag.py b/src/neo4j_graphrag/generation/graphrag.py index a92f615d9..4f0fcdbbc 100644 --- a/src/neo4j_graphrag/generation/graphrag.py +++ b/src/neo4j_graphrag/generation/graphrag.py @@ -30,6 +30,7 @@ from neo4j_graphrag.message_history import MessageHistory from neo4j_graphrag.retrievers.base import Retriever from neo4j_graphrag.types import LLMMessage, RetrieverResult +from neo4j_graphrag.utils.logging import prettify logger = logging.getLogger(__name__) @@ -138,7 +139,7 @@ def search( prompt = self.prompt_template.format( query_text=query_text, context=context, examples=validated_data.examples ) - logger.debug(f"RAG: retriever_result={retriever_result}") + logger.debug(f"RAG: retriever_result={prettify(retriever_result)}") logger.debug(f"RAG: prompt={prompt}") answer = self.llm.invoke( prompt, diff --git a/src/neo4j_graphrag/retrievers/hybrid.py b/src/neo4j_graphrag/retrievers/hybrid.py index fb257bf94..cb71d9396 100644 --- a/src/neo4j_graphrag/retrievers/hybrid.py +++ b/src/neo4j_graphrag/retrievers/hybrid.py @@ -14,7 +14,6 @@ # limitations under the License. from __future__ import annotations -import copy import logging from typing import Any, Callable, Optional, Union @@ -42,6 +41,7 @@ SearchType, HybridSearchRanker, ) +from neo4j_graphrag.utils.logging import prettify logger = logging.getLogger(__name__) @@ -213,10 +213,7 @@ def get_search_results( if "ranker" in parameters: del parameters["ranker"] - sanitized_parameters = copy.deepcopy(parameters) - if "query_vector" in sanitized_parameters: - sanitized_parameters["query_vector"] = "..." - logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters) + logger.debug("HybridRetriever Cypher parameters: %s", prettify(parameters)) logger.debug("HybridRetriever Cypher query: %s", search_query) try: @@ -234,6 +231,7 @@ def get_search_results( raise return RawSearchResult( records=records, + metadata={"query_vector": query_vector}, ) @@ -397,10 +395,7 @@ def get_search_results( if "ranker" in parameters: del parameters["ranker"] - sanitized_parameters = copy.deepcopy(parameters) - if "query_vector" in sanitized_parameters: - sanitized_parameters["query_vector"] = "..." - logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters) + logger.debug("HybridRetriever Cypher parameters: %s", prettify(parameters)) logger.debug("HybridRetriever Cypher query: %s", search_query) try: @@ -418,4 +413,5 @@ def get_search_results( raise return RawSearchResult( records=records, + metadata={"query_vector": query_vector}, ) diff --git a/src/neo4j_graphrag/retrievers/vector.py b/src/neo4j_graphrag/retrievers/vector.py index 62023333b..fd4970d4b 100644 --- a/src/neo4j_graphrag/retrievers/vector.py +++ b/src/neo4j_graphrag/retrievers/vector.py @@ -39,6 +39,7 @@ VectorRetrieverModel, VectorSearchModel, ) +from neo4j_graphrag.utils.logging import prettify logger = logging.getLogger(__name__) @@ -207,7 +208,7 @@ def get_search_results( ) parameters.update(search_params) - logger.debug("VectorRetriever Cypher parameters: %s", parameters) + logger.debug("VectorRetriever Cypher parameters: %s", prettify(parameters)) logger.debug("VectorRetriever Cypher query: %s", search_query) records, _, _ = self.driver.execute_query( @@ -216,7 +217,10 @@ def get_search_results( database_=self.neo4j_database, routing_=neo4j.RoutingControl.READ, ) - return RawSearchResult(records=records) + return RawSearchResult( + records=records, + metadata={"query_vector": query_vector}, + ) class VectorCypherRetriever(Retriever): @@ -351,7 +355,8 @@ def get_search_results( raise EmbeddingRequiredError( "Embedding method required for text query." ) - parameters["query_vector"] = self.embedder.embed_query(query_text) + query_vector = self.embedder.embed_query(query_text) + parameters["query_vector"] = query_vector del parameters["query_text"] if query_params: @@ -370,7 +375,9 @@ def get_search_results( ) parameters.update(search_params) - logger.debug("VectorCypherRetriever Cypher parameters: %s", parameters) + logger.debug( + "VectorCypherRetriever Cypher parameters: %s", prettify(parameters) + ) logger.debug("VectorCypherRetriever Cypher query: %s", search_query) records, _, _ = self.driver.execute_query( @@ -381,4 +388,5 @@ def get_search_results( ) return RawSearchResult( records=records, + metadata={"query_vector": query_vector}, ) diff --git a/tests/unit/retrievers/test_hybrid.py b/tests/unit/retrievers/test_hybrid.py index 7a6fc505b..8430b9375 100644 --- a/tests/unit/retrievers/test_hybrid.py +++ b/tests/unit/retrievers/test_hybrid.py @@ -110,7 +110,7 @@ def test_hybrid_retriever_with_result_format_function( content="dummy-node", metadata={"score": 1.0, "node_id": 123} ), ], - metadata={"__retriever": "HybridRetriever"}, + metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector}, ) @@ -229,7 +229,7 @@ def test_hybrid_search_text_happy_path( items=[ RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}), ], - metadata={"__retriever": "HybridRetriever"}, + metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector}, ) @@ -436,7 +436,7 @@ def test_hybrid_retriever_return_properties( items=[ RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}), ], - metadata={"__retriever": "HybridRetriever"}, + metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector}, ) @@ -511,7 +511,10 @@ def test_hybrid_cypher_retrieval_query_with_params( metadata=None, ), ], - metadata={"__retriever": "HybridCypherRetriever"}, + metadata={ + "__retriever": "HybridCypherRetriever", + "query_vector": embed_query_vector, + }, ) @@ -554,7 +557,10 @@ def test_hybrid_cypher_retriever_with_result_format_function( content="dummy-node", metadata={"score": 1.0, "node_id": 123} ), ], - metadata={"__retriever": "HybridCypherRetriever"}, + metadata={ + "__retriever": "HybridCypherRetriever", + "query_vector": embed_query_vector, + }, ) @@ -710,7 +716,7 @@ def test_hybrid_search_linear_ranker_happy_path( items=[ RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}), ], - metadata={"__retriever": "HybridRetriever"}, + metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector}, ) @@ -792,7 +798,10 @@ def test_hybrid_cypher_linear_ranker( metadata=None, ), ], - metadata={"__retriever": "HybridCypherRetriever"}, + metadata={ + "__retriever": "HybridCypherRetriever", + "query_vector": embed_query_vector, + }, ) diff --git a/tests/unit/retrievers/test_vector.py b/tests/unit/retrievers/test_vector.py index 18ba9d0ba..f2ec57019 100644 --- a/tests/unit/retrievers/test_vector.py +++ b/tests/unit/retrievers/test_vector.py @@ -155,7 +155,7 @@ def test_similarity_search_vector_happy_path( metadata={"score": 1.0, "nodeLabels": None, "id": None}, ), ], - metadata={"__retriever": "VectorRetriever"}, + metadata={"__retriever": "VectorRetriever", "query_vector": query_vector}, ) @@ -208,7 +208,7 @@ def test_similarity_search_text_happy_path( metadata={"score": 1.0, "nodeLabels": None, "id": None}, ), ], - metadata={"__retriever": "VectorRetriever"}, + metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector}, ) @@ -270,7 +270,7 @@ def test_similarity_search_text_return_properties( metadata={"score": 1.0, "nodeLabels": None, "id": None}, ), ], - metadata={"__retriever": "VectorRetriever"}, + metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector}, ) @@ -344,7 +344,7 @@ def test_vector_retriever_with_result_format_function( content="dummy-node", metadata={"score": 1.0, "node_id": 123} ), ], - metadata={"__retriever": "VectorRetriever"}, + metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector}, ) @@ -439,7 +439,10 @@ def test_retrieval_query_happy_path( metadata=None, ), ], - metadata={"__retriever": "VectorCypherRetriever"}, + metadata={ + "__retriever": "VectorCypherRetriever", + "query_vector": embed_query_vector, + }, ) @@ -504,7 +507,10 @@ def test_retrieval_query_with_result_format_function( content="dummy-node", metadata={"score": 1.0, "node_id": 123} ), ], - metadata={"__retriever": "VectorCypherRetriever"}, + metadata={ + "__retriever": "VectorCypherRetriever", + "query_vector": embed_query_vector, + }, ) @@ -573,7 +579,10 @@ def test_retrieval_query_with_params( metadata=None, ), ], - metadata={"__retriever": "VectorCypherRetriever"}, + metadata={ + "__retriever": "VectorCypherRetriever", + "query_vector": embed_query_vector, + }, )