Skip to content

Prettify logs in retrievers and GraphRAG #313

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Next

### Changed

- Improved log output readability in Retrievers and GraphRAG and added embedded vector to retriever result metadata for debugging.

## 1.6.1

### Added
Expand All @@ -12,14 +16,14 @@

- Added `enforce_schema` parameter to `SimpleKGPipeline` for optional schema enforcement.


## 1.6.0

### Added

- Added optional schema enforcement as a validation layer after entity and relation extraction.
- Introduced a linear hybrid search ranker for HybridRetriever and HybridCypherRetriever, allowing customizable ranking with an `alpha` parameter.
- Introduced SearchQueryParseError for handling invalid Lucene query strings in HybridRetriever and HybridCypherRetriever.
- Components can now be called with the `run_with_context` method that gets an extra `context_` argument containing information about the pipeline it's run from: the `run_id`, `task_name` and a `notify` function that can be used to send `TASK_PROGRESS` events to the same callback as the pipeline events.

### Fixed

Expand Down
3 changes: 2 additions & 1 deletion src/neo4j_graphrag/generation/graphrag.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from neo4j_graphrag.message_history import MessageHistory
from neo4j_graphrag.retrievers.base import Retriever
from neo4j_graphrag.types import LLMMessage, RetrieverResult
from neo4j_graphrag.utils.logging import prettify

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -138,7 +139,7 @@ def search(
prompt = self.prompt_template.format(
query_text=query_text, context=context, examples=validated_data.examples
)
logger.debug(f"RAG: retriever_result={retriever_result}")
logger.debug(f"RAG: retriever_result={prettify(retriever_result)}")
logger.debug(f"RAG: prompt={prompt}")
answer = self.llm.invoke(
prompt,
Expand Down
14 changes: 5 additions & 9 deletions src/neo4j_graphrag/retrievers/hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# limitations under the License.
from __future__ import annotations

import copy
import logging
from typing import Any, Callable, Optional, Union

Expand Down Expand Up @@ -42,6 +41,7 @@
SearchType,
HybridSearchRanker,
)
from neo4j_graphrag.utils.logging import prettify

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -213,10 +213,7 @@ def get_search_results(
if "ranker" in parameters:
del parameters["ranker"]

sanitized_parameters = copy.deepcopy(parameters)
if "query_vector" in sanitized_parameters:
sanitized_parameters["query_vector"] = "..."
logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters)
logger.debug("HybridRetriever Cypher parameters: %s", prettify(parameters))
logger.debug("HybridRetriever Cypher query: %s", search_query)

try:
Expand All @@ -234,6 +231,7 @@ def get_search_results(
raise
return RawSearchResult(
records=records,
metadata={"query_vector": query_vector},
)


Expand Down Expand Up @@ -397,10 +395,7 @@ def get_search_results(
if "ranker" in parameters:
del parameters["ranker"]

sanitized_parameters = copy.deepcopy(parameters)
if "query_vector" in sanitized_parameters:
sanitized_parameters["query_vector"] = "..."
logger.debug("HybridRetriever Cypher parameters: %s", sanitized_parameters)
logger.debug("HybridRetriever Cypher parameters: %s", prettify(parameters))
logger.debug("HybridRetriever Cypher query: %s", search_query)

try:
Expand All @@ -418,4 +413,5 @@ def get_search_results(
raise
return RawSearchResult(
records=records,
metadata={"query_vector": query_vector},
)
16 changes: 12 additions & 4 deletions src/neo4j_graphrag/retrievers/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
VectorRetrieverModel,
VectorSearchModel,
)
from neo4j_graphrag.utils.logging import prettify

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -207,7 +208,7 @@ def get_search_results(
)
parameters.update(search_params)

logger.debug("VectorRetriever Cypher parameters: %s", parameters)
logger.debug("VectorRetriever Cypher parameters: %s", prettify(parameters))
logger.debug("VectorRetriever Cypher query: %s", search_query)

records, _, _ = self.driver.execute_query(
Expand All @@ -216,7 +217,10 @@ def get_search_results(
database_=self.neo4j_database,
routing_=neo4j.RoutingControl.READ,
)
return RawSearchResult(records=records)
return RawSearchResult(
records=records,
metadata={"query_vector": query_vector},
)


class VectorCypherRetriever(Retriever):
Expand Down Expand Up @@ -351,7 +355,8 @@ def get_search_results(
raise EmbeddingRequiredError(
"Embedding method required for text query."
)
parameters["query_vector"] = self.embedder.embed_query(query_text)
query_vector = self.embedder.embed_query(query_text)
parameters["query_vector"] = query_vector
del parameters["query_text"]

if query_params:
Expand All @@ -370,7 +375,9 @@ def get_search_results(
)
parameters.update(search_params)

logger.debug("VectorCypherRetriever Cypher parameters: %s", parameters)
logger.debug(
"VectorCypherRetriever Cypher parameters: %s", prettify(parameters)
)
logger.debug("VectorCypherRetriever Cypher query: %s", search_query)

records, _, _ = self.driver.execute_query(
Expand All @@ -381,4 +388,5 @@ def get_search_results(
)
return RawSearchResult(
records=records,
metadata={"query_vector": query_vector},
)
23 changes: 16 additions & 7 deletions tests/unit/retrievers/test_hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_hybrid_retriever_with_result_format_function(
content="dummy-node", metadata={"score": 1.0, "node_id": 123}
),
],
metadata={"__retriever": "HybridRetriever"},
metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector},
)


Expand Down Expand Up @@ -229,7 +229,7 @@ def test_hybrid_search_text_happy_path(
items=[
RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}),
],
metadata={"__retriever": "HybridRetriever"},
metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector},
)


Expand Down Expand Up @@ -436,7 +436,7 @@ def test_hybrid_retriever_return_properties(
items=[
RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}),
],
metadata={"__retriever": "HybridRetriever"},
metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector},
)


Expand Down Expand Up @@ -511,7 +511,10 @@ def test_hybrid_cypher_retrieval_query_with_params(
metadata=None,
),
],
metadata={"__retriever": "HybridCypherRetriever"},
metadata={
"__retriever": "HybridCypherRetriever",
"query_vector": embed_query_vector,
},
)


Expand Down Expand Up @@ -554,7 +557,10 @@ def test_hybrid_cypher_retriever_with_result_format_function(
content="dummy-node", metadata={"score": 1.0, "node_id": 123}
),
],
metadata={"__retriever": "HybridCypherRetriever"},
metadata={
"__retriever": "HybridCypherRetriever",
"query_vector": embed_query_vector,
},
)


Expand Down Expand Up @@ -710,7 +716,7 @@ def test_hybrid_search_linear_ranker_happy_path(
items=[
RetrieverResultItem(content="dummy-node", metadata={"score": 1.0}),
],
metadata={"__retriever": "HybridRetriever"},
metadata={"__retriever": "HybridRetriever", "query_vector": embed_query_vector},
)


Expand Down Expand Up @@ -792,7 +798,10 @@ def test_hybrid_cypher_linear_ranker(
metadata=None,
),
],
metadata={"__retriever": "HybridCypherRetriever"},
metadata={
"__retriever": "HybridCypherRetriever",
"query_vector": embed_query_vector,
},
)


Expand Down
23 changes: 16 additions & 7 deletions tests/unit/retrievers/test_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def test_similarity_search_vector_happy_path(
metadata={"score": 1.0, "nodeLabels": None, "id": None},
),
],
metadata={"__retriever": "VectorRetriever"},
metadata={"__retriever": "VectorRetriever", "query_vector": query_vector},
)


Expand Down Expand Up @@ -208,7 +208,7 @@ def test_similarity_search_text_happy_path(
metadata={"score": 1.0, "nodeLabels": None, "id": None},
),
],
metadata={"__retriever": "VectorRetriever"},
metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector},
)


Expand Down Expand Up @@ -270,7 +270,7 @@ def test_similarity_search_text_return_properties(
metadata={"score": 1.0, "nodeLabels": None, "id": None},
),
],
metadata={"__retriever": "VectorRetriever"},
metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector},
)


Expand Down Expand Up @@ -344,7 +344,7 @@ def test_vector_retriever_with_result_format_function(
content="dummy-node", metadata={"score": 1.0, "node_id": 123}
),
],
metadata={"__retriever": "VectorRetriever"},
metadata={"__retriever": "VectorRetriever", "query_vector": embed_query_vector},
)


Expand Down Expand Up @@ -439,7 +439,10 @@ def test_retrieval_query_happy_path(
metadata=None,
),
],
metadata={"__retriever": "VectorCypherRetriever"},
metadata={
"__retriever": "VectorCypherRetriever",
"query_vector": embed_query_vector,
},
)


Expand Down Expand Up @@ -504,7 +507,10 @@ def test_retrieval_query_with_result_format_function(
content="dummy-node", metadata={"score": 1.0, "node_id": 123}
),
],
metadata={"__retriever": "VectorCypherRetriever"},
metadata={
"__retriever": "VectorCypherRetriever",
"query_vector": embed_query_vector,
},
)


Expand Down Expand Up @@ -573,7 +579,10 @@ def test_retrieval_query_with_params(
metadata=None,
),
],
metadata={"__retriever": "VectorCypherRetriever"},
metadata={
"__retriever": "VectorCypherRetriever",
"query_vector": embed_query_vector,
},
)


Expand Down