Skip to content

Commit 5c40b03

Browse files
authored
Fixes the hybrid retriever query (#114)
* Fixes the hybrid retriever query * Updated hybrid query to make it more readable * Updated CHANGELOG
1 parent cced13b commit 5c40b03

File tree

3 files changed

+25
-12
lines changed

3 files changed

+25
-12
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
## Next
44

5+
## 0.5.0
6+
7+
### Fixed
8+
- Corrected the hybrid retriever query to ensure proper normalization of scores in vector search results.
9+
510
## 0.4.0
611

712
### Added

src/neo4j_genai/neo4j_queries.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,14 @@
5353
def _get_hybrid_query() -> str:
5454
return (
5555
f"CALL {{ {VECTOR_INDEX_QUERY} "
56-
f"RETURN node, score "
56+
f"WITH collect({{node:node, score:score}}) AS nodes, max(score) AS vector_index_max_score "
57+
f"UNWIND nodes AS n "
58+
f"RETURN n.node AS node, (n.score / vector_index_max_score) AS score "
5759
f"UNION "
5860
f"{FULL_TEXT_SEARCH_QUERY} "
59-
f"WITH collect({{node:node, score:score}}) AS nodes, max(score) AS max "
61+
f"WITH collect({{node:node, score:score}}) AS nodes, max(score) AS ft_index_max_score "
6062
f"UNWIND nodes AS n "
61-
f"RETURN n.node AS node, (n.score / max) AS score }} "
63+
f"RETURN n.node AS node, (n.score / ft_index_max_score) AS score }} "
6264
f"WITH node, max(score) AS score ORDER BY score DESC LIMIT $top_k"
6365
)
6466

tests/unit/test_neo4j_queries.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,14 @@ def test_hybrid_search_basic() -> None:
3535
"CALL { "
3636
"CALL db.index.vector.queryNodes($vector_index_name, $top_k, $query_vector) "
3737
"YIELD node, score "
38-
"RETURN node, score UNION "
38+
"WITH collect({node:node, score:score}) AS nodes, max(score) AS vector_index_max_score "
39+
"UNWIND nodes AS n "
40+
"RETURN n.node AS node, (n.score / vector_index_max_score) AS score UNION "
3941
"CALL db.index.fulltext.queryNodes($fulltext_index_name, $query_text, {limit: $top_k}) "
4042
"YIELD node, score "
41-
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
43+
"WITH collect({node:node, score:score}) AS nodes, max(score) AS ft_index_max_score "
4244
"UNWIND nodes AS n "
43-
"RETURN n.node AS node, (n.score / max) AS score "
45+
"RETURN n.node AS node, (n.score / ft_index_max_score) AS score "
4446
"} "
4547
"WITH node, max(score) AS score ORDER BY score DESC LIMIT $top_k "
4648
"RETURN node { .*, `None`: null } AS node, labels(node) AS nodeLabels, elementId(node) AS id, score"
@@ -125,12 +127,14 @@ def test_hybrid_search_with_retrieval_query() -> None:
125127
"CALL { "
126128
"CALL db.index.vector.queryNodes($vector_index_name, $top_k, $query_vector) "
127129
"YIELD node, score "
128-
"RETURN node, score UNION "
130+
"WITH collect({node:node, score:score}) AS nodes, max(score) AS vector_index_max_score "
131+
"UNWIND nodes AS n "
132+
"RETURN n.node AS node, (n.score / vector_index_max_score) AS score UNION "
129133
"CALL db.index.fulltext.queryNodes($fulltext_index_name, $query_text, {limit: $top_k}) "
130134
"YIELD node, score "
131-
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
135+
"WITH collect({node:node, score:score}) AS nodes, max(score) AS ft_index_max_score "
132136
"UNWIND nodes AS n "
133-
"RETURN n.node AS node, (n.score / max) AS score "
137+
"RETURN n.node AS node, (n.score / ft_index_max_score) AS score "
134138
"} "
135139
"WITH node, max(score) AS score ORDER BY score DESC LIMIT $top_k "
136140
+ retrieval_query
@@ -145,12 +149,14 @@ def test_hybrid_search_with_properties() -> None:
145149
"CALL { "
146150
"CALL db.index.vector.queryNodes($vector_index_name, $top_k, $query_vector) "
147151
"YIELD node, score "
148-
"RETURN node, score UNION "
152+
"WITH collect({node:node, score:score}) AS nodes, max(score) AS vector_index_max_score "
153+
"UNWIND nodes AS n "
154+
"RETURN n.node AS node, (n.score / vector_index_max_score) AS score UNION "
149155
"CALL db.index.fulltext.queryNodes($fulltext_index_name, $query_text, {limit: $top_k}) "
150156
"YIELD node, score "
151-
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
157+
"WITH collect({node:node, score:score}) AS nodes, max(score) AS ft_index_max_score "
152158
"UNWIND nodes AS n "
153-
"RETURN n.node AS node, (n.score / max) AS score "
159+
"RETURN n.node AS node, (n.score / ft_index_max_score) AS score "
154160
"} "
155161
"WITH node, max(score) AS score ORDER BY score DESC LIMIT $top_k "
156162
"RETURN node {.name, .age} as node, score"

0 commit comments

Comments
 (0)