Skip to content

Commit 358fafd

Browse files
authored
Fix: do not return embeddings in HybridRetriever (#187)
* Fix: do not return embedding property in HybridRetriever * Update changelog * Fix changelog
1 parent ca86f26 commit 358fafd

File tree

6 files changed

+26
-5
lines changed

6 files changed

+26
-5
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
### Changed
1010
- Vector and Hybrid retrievers used with `return_properties` now also return the node labels (`nodeLabels`) and the node's element ID (`id`).
11+
- `HybridRetriever` now filters out the embedding property index in `self.vector_index_name` from the retriever result by default.
12+
1113

1214
## 1.1.0
1315

src/neo4j_graphrag/retrievers/base.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,12 @@ def _verify_version(self) -> None:
131131
if version_tuple < target_version:
132132
raise Neo4jVersionError()
133133

134-
def _fetch_index_infos(self) -> None:
135-
"""Fetch the node label and embedding property from the index definition"""
134+
def _fetch_index_infos(self, vector_index_name: str) -> None:
135+
"""Fetch the node label and embedding property from the index definition
136+
137+
Args:
138+
vector_index_name (str): Name of the vector index
139+
"""
136140
query = (
137141
"SHOW VECTOR INDEXES "
138142
"YIELD name, labelsOrTypes, properties, options "
@@ -141,7 +145,7 @@ def _fetch_index_infos(self) -> None:
141145
"options.indexConfig.`vector.dimensions` as dimensions"
142146
)
143147
query_result = self.driver.execute_query(
144-
query, {"index_name": self.index_name}, database_=self.neo4j_database
148+
query, {"index_name": vector_index_name}, database_=self.neo4j_database
145149
)
146150
try:
147151
result = query_result.records[0]

src/neo4j_graphrag/retrievers/hybrid.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ def __init__(
117117
else None
118118
)
119119
self.result_formatter = validated_data.result_formatter
120+
self._embedding_node_property = None
121+
self._embedding_dimension = None
122+
self._fetch_index_infos(self.vector_index_name)
120123

121124
def default_record_formatter(self, record: neo4j.Record) -> RetrieverResultItem:
122125
"""
@@ -187,6 +190,7 @@ def get_search_results(
187190
search_query, _ = get_search_query(
188191
SearchType.HYBRID,
189192
self.return_properties,
193+
embedding_node_property=self._embedding_node_property,
190194
neo4j_version_is_5_23_or_above=self.neo4j_version_is_5_23_or_above,
191195
)
192196

src/neo4j_graphrag/retrievers/vector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def __init__(
123123
self._node_label = None
124124
self._embedding_node_property = None
125125
self._embedding_dimension = None
126-
self._fetch_index_infos()
126+
self._fetch_index_infos(self.index_name)
127127

128128
def default_record_formatter(self, record: neo4j.Record) -> RetrieverResultItem:
129129
"""
@@ -287,7 +287,7 @@ def __init__(
287287
self._node_label = None
288288
self._node_embedding_property = None
289289
self._embedding_dimension = None
290-
self._fetch_index_infos()
290+
self._fetch_index_infos(self.index_name)
291291

292292
def get_search_results(
293293
self,

tests/e2e/test_hybrid_e2e.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def test_hybrid_retriever_search_text(
3939
assert len(results.items) == 5
4040
for result in results.items:
4141
assert isinstance(result, RetrieverResultItem)
42+
assert "'vectorProperty': None," in result.content
4243

4344

4445
@pytest.mark.usefixtures("setup_neo4j_for_retrieval")

tests/unit/retrievers/test_hybrid.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,11 @@ def test_hybrid_cypher_retriever_invalid_database_name(
158158
assert "Input should be a valid string" in str(exc_info.value)
159159

160160

161+
@patch("neo4j_graphrag.retrievers.HybridRetriever._fetch_index_infos")
161162
@patch("neo4j_graphrag.retrievers.HybridRetriever._verify_version")
162163
def test_hybrid_search_text_happy_path(
163164
_verify_version_mock: MagicMock,
165+
_fetch_index_infos_mock: MagicMock,
164166
driver: MagicMock,
165167
embedder: MagicMock,
166168
neo4j_record: MagicMock,
@@ -176,13 +178,17 @@ def test_hybrid_search_text_happy_path(
176178
driver, vector_index_name, fulltext_index_name, embedder
177179
)
178180
retriever.neo4j_version_is_5_23_or_above = True
181+
retriever._embedding_node_property = (
182+
"embedding" # variable normally filled by fetch_index_infos
183+
)
179184
retriever.driver.execute_query.return_value = [ # type: ignore
180185
[neo4j_record],
181186
None,
182187
None,
183188
]
184189
search_query, _ = get_search_query(
185190
SearchType.HYBRID,
191+
embedding_node_property="embedding",
186192
neo4j_version_is_5_23_or_above=retriever.neo4j_version_is_5_23_or_above,
187193
)
188194

@@ -208,9 +214,11 @@ def test_hybrid_search_text_happy_path(
208214
)
209215

210216

217+
@patch("neo4j_graphrag.retrievers.HybridRetriever._fetch_index_infos")
211218
@patch("neo4j_graphrag.retrievers.HybridRetriever._verify_version")
212219
def test_hybrid_search_favors_query_vector_over_embedding_vector(
213220
_verify_version_mock: MagicMock,
221+
_fetch_index_infos_mock: MagicMock,
214222
driver: MagicMock,
215223
embedder: MagicMock,
216224
neo4j_record: MagicMock,
@@ -288,9 +296,11 @@ def test_hybrid_search_retriever_search_missing_embedder_for_text(
288296
)
289297

290298

299+
@patch("neo4j_graphrag.retrievers.HybridRetriever._fetch_index_infos")
291300
@patch("neo4j_graphrag.retrievers.HybridRetriever._verify_version")
292301
def test_hybrid_retriever_return_properties(
293302
_verify_version_mock: MagicMock,
303+
_fetch_index_infos_mock: MagicMock,
294304
driver: MagicMock,
295305
embedder: MagicMock,
296306
neo4j_record: MagicMock,

0 commit comments

Comments
 (0)