From feea1ca0d2852870b39f86ccbf9645b47f0cf944 Mon Sep 17 00:00:00 2001 From: estelle Date: Thu, 20 Mar 2025 14:11:40 +0100 Subject: [PATCH 1/3] Prettify logs in retrievers and GraphRAG and add the embedded vector to the returned result --- src/neo4j_graphrag/generation/graphrag.py | 3 ++- src/neo4j_graphrag/retrievers/hybrid.py | 14 +++++--------- src/neo4j_graphrag/retrievers/vector.py | 16 ++++++++++++---- tests/unit/retrievers/test_hybrid.py | 23 ++++++++++++++++------- tests/unit/retrievers/test_vector.py | 23 ++++++++++++++++------- 5 files changed, 51 insertions(+), 28 deletions(-) diff --git a/src/neo4j_graphrag/generation/graphrag.py b/src/neo4j_graphrag/generation/graphrag.py index a92f615d9..4f0fcdbbc 100644 --- a/src/neo4j_graphrag/generation/graphrag.py +++ b/src/neo4j_graphrag/generation/graphrag.py @@ -30,6 +30,7 @@ from neo4j_graphrag.message_history import MessageHistory from neo4j_graphrag.retrievers.base import Retriever from neo4j_graphrag.types import LLMMessage, RetrieverResult +from neo4j_graphrag.utils.logging import prettify logger = logging.getLogger(__name__) @@ -138,7 +139,7 @@ def search( prompt = self.prompt_template.format( query_text=query_text, context=context, examples=validated_data.examples ) - logger.debug(f"RAG: retriever_result={retriever_result}") + logger.debug(f"RAG: retriever_result={prettify(retriever_result)}") logger.debug(f"RAG: prompt={prompt}") answer = self.llm.invoke( prompt, diff --git a/src/neo4j_graphrag/retrievers/hybrid.py b/src/neo4j_graphrag/retrievers/hybrid.py index fb257bf94..cb71d9396 100644 --- a/src/neo4j_graphrag/retrievers/hybrid.py +++ b/src/neo4j_graphrag/retrievers/hybrid.py @@ -14,7 +14,6 @@ # limitations under the License. from __future__ import annotations -import copy import logging from typing import Any, Callable, Optional, Union @@ -42,6 +41,7 @@ SearchType, HybridSearchRanker, ) +from neo4j_graphrag.utils.logging import prettify logger = logging.getLogger(__name__) @@ -213,10 +213,7 @@ def get_search_results( if "ranker" in parameters: del parameters["ranker"] - sanitized_parameters = copy.deepcopy(parameters) - if "query_vector" in sanitized_parameters: - sanitized_parameters["query_vector"] = "..." - logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters) + logger.debug("HybridRetriever Cypher parameters: %s", prettify(parameters)) logger.debug("HybridRetriever Cypher query: %s", search_query) try: @@ -234,6 +231,7 @@ def get_search_results( raise return RawSearchResult( records=records, + metadata={"query_vector": query_vector}, ) @@ -397,10 +395,7 @@ def get_search_results( if "ranker" in parameters: del parameters["ranker"] - sanitized_parameters = copy.deepcopy(parameters) - if "query_vector" in sanitized_parameters: - sanitized_parameters["query_vector"] = "..." - logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters) + logger.debug("HybridRetriever Cypher parameters: %s", prettify(parameters)) logger.debug("HybridRetriever Cypher query: %s", search_query) try: @@ -418,4 +413,5 @@ def get_search_results( raise return RawSearchResult( records=records, + metadata={"query_vector": query_vector}, ) diff --git a/src/neo4j_graphrag/retrievers/vector.py b/src/neo4j_graphrag/retrievers/vector.py index 62023333b..fd4970d4b 100644 --- a/src/neo4j_graphrag/retrievers/vector.py +++ b/src/neo4j_graphrag/retrievers/vector.py @@ -39,6 +39,7 @@ VectorRetrieverModel, VectorSearchModel, ) +from neo4j_graphrag.utils.logging import prettify logger = logging.getLogger(__name__) @@ -207,7 +208,7 @@ def get_search_results( ) parameters.update(search_params) - logger.debug("VectorRetriever Cypher parameters: %s", parameters) + logger.debug("VectorRetriever Cypher parameters: %s", prettify(parameters)) logger.debug("VectorRetriever Cypher query: %s", search_query) records, _, _ = self.driver.execute_query( @@ -216,7 +217,10 @@ def get_search_results( database_=self.neo4j_database, routing_=neo4j.RoutingControl.READ, ) - return RawSearchResult(records=records) + return RawSearchResult( + records=records, + metadata={"query_vector": query_vector}, + ) class VectorCypherRetriever(Retriever): @@ -351,7 +355,8 @@ def get_search_results( raise EmbeddingRequiredError( "Embedding method required for text query." ) - parameters["query_vector"] = self.embedder.embed_query(query_text) + query_vector = self.embedder.embed_query(query_text) + parameters["query_vector"] = query_vector del parameters["query_text"] if query_params: @@ -370,7 +375,9 @@ def get_search_results( ) parameters.update(search_params) - logger.debug("VectorCypherRetriever Cypher parameters: %s", parameters) + logger.debug( + "VectorCypherRetriever Cypher parameters: %s", prettify(parameters) + ) logger.debug("VectorCypherRetriever Cypher query: %s", search_query) records, _, _ = self.driver.execute_query( @@ -381,4 +388,5 @@ def get_search_results( ) return RawSearchResult( records=records, + metadata={"query_vector": query_vector}, ) diff --git a/tests/unit/retrievers/test_hybrid.py b/tests/unit/retrievers/test_hybrid.py index 7a6fc505b..8430b9375 100644 --- a/tests/unit/retrievers/test_hybrid.py +++ b/tests/unit/retrievers/test_hybrid.py @@ -110,7 +110,7 @@ def test_hybrid_retriever_with_result_format_function( content="dummy-node", metadata={"score": 1.0, "node_id": 123} ), ], - metadata={"__retriever": "HybridRetriever"}, + metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector}, ) @@ -229,7 +229,7 @@ def test_hybrid_search_text_happy_path( items=[ RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}), ], - metadata={"__retriever": "HybridRetriever"}, + metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector}, ) @@ -436,7 +436,7 @@ def test_hybrid_retriever_return_properties( items=[ RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}), ], - metadata={"__retriever": "HybridRetriever"}, + metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector}, ) @@ -511,7 +511,10 @@ def test_hybrid_cypher_retrieval_query_with_params( metadata=None, ), ], - metadata={"__retriever": "HybridCypherRetriever"}, + metadata={ + "__retriever": "HybridCypherRetriever", + "query_vector": embed_query_vector, + }, ) @@ -554,7 +557,10 @@ def test_hybrid_cypher_retriever_with_result_format_function( content="dummy-node", metadata={"score": 1.0, "node_id": 123} ), ], - metadata={"__retriever": "HybridCypherRetriever"}, + metadata={ + "__retriever": "HybridCypherRetriever", + "query_vector": embed_query_vector, + }, ) @@ -710,7 +716,7 @@ def test_hybrid_search_linear_ranker_happy_path( items=[ RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}), ], - metadata={"__retriever": "HybridRetriever"}, + metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector}, ) @@ -792,7 +798,10 @@ def test_hybrid_cypher_linear_ranker( metadata=None, ), ], - metadata={"__retriever": "HybridCypherRetriever"}, + metadata={ + "__retriever": "HybridCypherRetriever", + "query_vector": embed_query_vector, + }, ) diff --git a/tests/unit/retrievers/test_vector.py b/tests/unit/retrievers/test_vector.py index 18ba9d0ba..f2ec57019 100644 --- a/tests/unit/retrievers/test_vector.py +++ b/tests/unit/retrievers/test_vector.py @@ -155,7 +155,7 @@ def test_similarity_search_vector_happy_path( metadata={"score": 1.0, "nodeLabels": None, "id": None}, ), ], - metadata={"__retriever": "VectorRetriever"}, + metadata={"__retriever": "VectorRetriever", "query_vector": query_vector}, ) @@ -208,7 +208,7 @@ def test_similarity_search_text_happy_path( metadata={"score": 1.0, "nodeLabels": None, "id": None}, ), ], - metadata={"__retriever": "VectorRetriever"}, + metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector}, ) @@ -270,7 +270,7 @@ def test_similarity_search_text_return_properties( metadata={"score": 1.0, "nodeLabels": None, "id": None}, ), ], - metadata={"__retriever": "VectorRetriever"}, + metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector}, ) @@ -344,7 +344,7 @@ def test_vector_retriever_with_result_format_function( content="dummy-node", metadata={"score": 1.0, "node_id": 123} ), ], - metadata={"__retriever": "VectorRetriever"}, + metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector}, ) @@ -439,7 +439,10 @@ def test_retrieval_query_happy_path( metadata=None, ), ], - metadata={"__retriever": "VectorCypherRetriever"}, + metadata={ + "__retriever": "VectorCypherRetriever", + "query_vector": embed_query_vector, + }, ) @@ -504,7 +507,10 @@ def test_retrieval_query_with_result_format_function( content="dummy-node", metadata={"score": 1.0, "node_id": 123} ), ], - metadata={"__retriever": "VectorCypherRetriever"}, + metadata={ + "__retriever": "VectorCypherRetriever", + "query_vector": embed_query_vector, + }, ) @@ -573,7 +579,10 @@ def test_retrieval_query_with_params( metadata=None, ), ], - metadata={"__retriever": "VectorCypherRetriever"}, + metadata={ + "__retriever": "VectorCypherRetriever", + "query_vector": embed_query_vector, + }, ) From bee8c6e8ff70f855d1e834ff40162f15e829a82a Mon Sep 17 00:00:00 2001 From: estelle Date: Thu, 20 Mar 2025 14:16:53 +0100 Subject: [PATCH 2/3] Update CHANGELOG --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78b445549..bdd243312 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,11 @@ - Added `enforce_schema` parameter to `SimpleKGPipeline` for optional schema enforcement. +### Changed + +- Improved log output readability in Retrievers and GraphRAG and added embedded vector to retriever result metadata for debugging. + + ## 1.6.0 ### Added From 53b55fcfddb9a9d90be4f324c775fefbbaee2ccf Mon Sep 17 00:00:00 2001 From: estelle Date: Fri, 28 Mar 2025 13:27:59 +0100 Subject: [PATCH 3/3] Rebase --- CHANGELOG.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bdd243312..d276f17cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Next +### Changed + +- Improved log output readability in Retrievers and GraphRAG and added embedded vector to retriever result metadata for debugging. + ## 1.6.1 ### Added @@ -12,10 +16,6 @@ - Added `enforce_schema` parameter to `SimpleKGPipeline` for optional schema enforcement. -### Changed - -- Improved log output readability in Retrievers and GraphRAG and added embedded vector to retriever result metadata for debugging. - ## 1.6.0 @@ -24,7 +24,6 @@ - Added optional schema enforcement as a validation layer after entity and relation extraction. - Introduced a linear hybrid search ranker for HybridRetriever and HybridCypherRetriever, allowing customizable ranking with an `alpha` parameter. - Introduced SearchQueryParseError for handling invalid Lucene query strings in HybridRetriever and HybridCypherRetriever. -- Components can now be called with the `run_with_context` method that gets an extra `context_` argument containing information about the pipeline it's run from: the `run_id`, `task_name` and a `notify` function that can be used to send `TASK_PROGRESS` events to the same callback as the pipeline events. ### Fixed