14
14
# limitations under the License.
15
15
from __future__ import annotations
16
16
17
+ import warnings
17
18
from typing import Any , Optional
18
19
19
20
from neo4j_graphrag .filters import get_metadata_filter
20
21
from neo4j_graphrag .types import IndexType , SearchType
21
22
22
23
VECTOR_EXACT_QUERY = (
23
24
"WITH node, "
24
- "vector.similarity.cosine(node.`{embedding_node_property }`, $query_vector) AS score "
25
+ "vector.similarity.cosine(node.`{embedding_property }`, $query_vector) AS score "
25
26
"ORDER BY score DESC LIMIT $top_k"
26
27
)
27
28
28
29
BASE_VECTOR_EXACT_QUERY = (
29
30
"MATCH (node:`{node_label}`) "
30
- "WHERE node.`{embedding_node_property }` IS NOT NULL "
31
- "AND size(node.`{embedding_node_property }`) = toInteger($embedding_dimension)"
31
+ "WHERE node.`{embedding_property }` IS NOT NULL "
32
+ "AND size(node.`{embedding_property }`) = toInteger($embedding_dimension)"
32
33
)
33
34
34
35
@@ -151,7 +152,7 @@ def _get_hybrid_query(
151
152
def _get_filtered_vector_query (
152
153
filters : dict [str , Any ],
153
154
node_label : str ,
154
- embedding_node_property : str ,
155
+ embedding_property : str ,
155
156
embedding_dimension : int ,
156
157
) -> tuple [str , dict [str , Any ]]:
157
158
"""Build Cypher query for vector search with filters
@@ -160,7 +161,7 @@ def _get_filtered_vector_query(
160
161
Args:
161
162
filters (dict[str, Any]): filters used to pre-filter the nodes before vector search
162
163
node_label (str): node label we want to search for
163
- embedding_node_property (str): the name of the property holding the embeddings
164
+ embedding_property (str): the name of the property holding the embeddings
164
165
embedding_dimension (int): the dimension of the embeddings
165
166
166
167
Returns:
@@ -169,77 +170,129 @@ def _get_filtered_vector_query(
169
170
where_filters , query_params = get_metadata_filter (filters , node_alias = "node" )
170
171
base_query = BASE_VECTOR_EXACT_QUERY .format (
171
172
node_label = node_label ,
172
- embedding_node_property = embedding_node_property ,
173
+ embedding_property = embedding_property ,
173
174
)
174
175
vector_query = VECTOR_EXACT_QUERY .format (
175
- embedding_node_property = embedding_node_property ,
176
+ embedding_property = embedding_property ,
176
177
)
177
178
query_params ["embedding_dimension" ] = embedding_dimension
178
179
return f"{ base_query } AND ({ where_filters } ) { vector_query } " , query_params
179
180
180
181
181
182
def get_search_query (
182
183
search_type : SearchType ,
184
+ index_type : IndexType = IndexType .NODE ,
183
185
return_properties : Optional [list [str ]] = None ,
184
186
retrieval_query : Optional [str ] = None ,
185
187
node_label : Optional [str ] = None ,
186
188
embedding_node_property : Optional [str ] = None ,
189
+ embedding_property : Optional [str ] = None ,
187
190
embedding_dimension : Optional [int ] = None ,
188
191
filters : Optional [dict [str , Any ]] = None ,
189
192
neo4j_version_is_5_23_or_above : bool = False ,
190
193
) -> tuple [str , dict [str , Any ]]:
191
- """Build the search query, including pre-filtering if needed, and return clause.
194
+ """
195
+ Constructs a search query for vector or hybrid search, including optional pre-filtering
196
+ and return clause.
192
197
193
- Args
194
- search_type: Search type we want to search for:
195
- return_properties (list[str]): list of property names to return.
196
- It can't be provided together with retrieval_query.
197
- retrieval_query (str): the query to use to retrieve the search results
198
- It can't be provided together with return_properties.
199
- node_label (str): node label we want to search for
200
- embedding_node_property (str): the name of the property holding the embeddings
201
- embedding_dimension (int): the dimension of the embeddings
202
- filters (dict[str, Any]): filters used to pre-filter the nodes before vector search
198
+ Args:
199
+ search_type (SearchType): Specifies whether to perform a vector or hybrid search.
200
+ index_type (Optional[IndexType]): Specifies whether to search over node or
201
+ relationship indexes. Defaults to 'node'.
202
+ return_properties (Optional[list[str]]): List of property names to return.
203
+ Cannot be provided alongside `retrieval_query`.
204
+ retrieval_query (Optional[str]): Query used to retrieve search results.
205
+ Cannot be provided alongside `return_properties`.
206
+ node_label (Optional[str]): Label of the nodes to search.
207
+ embedding_property (Optional[str])): Name of the property containing the embeddings.
208
+ embedding_dimension (Optional[int]): Dimension of the embeddings.
209
+ filters (Optional[dict[str, Any]]): Filters to pre-filter nodes before vector search.
210
+ neo4j_version_is_5_23_or_above (Optional[bool]): Whether the Neo4j version is 5.23 or above.
203
211
204
212
Returns:
205
- tuple[str, dict[str, Any]]: query and parameters
213
+ tuple[str, dict[str, Any]]: A tuple containing the constructed query string and
214
+ a dictionary of query parameters.
206
215
216
+ Raises:
217
+ Exception: If filters are used with Hybrid Search.
218
+ Exception: If Vector Search with filters is missing required parameters.
219
+ ValueError: If an unsupported search type is provided.
207
220
"""
208
- if search_type == SearchType .HYBRID :
209
- if filters :
210
- raise Exception ("Filters are not supported with Hybrid Search" )
211
- query = _get_hybrid_query (neo4j_version_is_5_23_or_above )
212
- params : dict [str , Any ] = {}
213
- elif search_type == SearchType .VECTOR :
214
- if filters :
215
- if (
216
- node_label is not None
217
- and embedding_node_property is not None
218
- and embedding_dimension is not None
219
- ):
220
- query , params = _get_filtered_vector_query (
221
- filters , node_label , embedding_node_property , embedding_dimension
222
- )
221
+ warnings .warn (
222
+ "embedding_node_property is deprecated, use embedding_property instead" ,
223
+ DeprecationWarning ,
224
+ stacklevel = 2 ,
225
+ )
226
+ if embedding_node_property :
227
+ if embedding_property :
228
+ warnings .warn (
229
+ "Both embedding_node_property and embedding_property provided, using embedding_property" ,
230
+ UserWarning ,
231
+ stacklevel = 2 ,
232
+ )
233
+ else :
234
+ embedding_property = embedding_node_property
235
+
236
+ if index_type == IndexType .NODE :
237
+ if search_type == SearchType .HYBRID :
238
+ if filters :
239
+ raise Exception ("Filters are not supported with Hybrid Search" )
240
+ query = _get_hybrid_query (neo4j_version_is_5_23_or_above )
241
+ params : dict [str , Any ] = {}
242
+ elif search_type == SearchType .VECTOR :
243
+ if filters :
244
+ if (
245
+ node_label is not None
246
+ and embedding_property is not None
247
+ and embedding_dimension is not None
248
+ ):
249
+ query , params = _get_filtered_vector_query (
250
+ filters ,
251
+ node_label ,
252
+ embedding_property ,
253
+ embedding_dimension ,
254
+ )
255
+ else :
256
+ raise Exception (
257
+ "Vector Search with filters requires: node_label, embedding_property, embedding_dimension"
258
+ )
223
259
else :
224
- raise Exception (
225
- "Vector Search with filters requires: node_label, embedding_node_property, embedding_dimension"
226
- )
260
+ query , params = _get_vector_search_query (index_type = index_type ), {}
227
261
else :
228
- query , params = _get_vector_search_query (), {}
262
+ raise ValueError (f"Search type is not supported: { search_type } " )
263
+ fallback_return = (
264
+ f"RETURN node {{ .*, `{ embedding_property } `: null }} AS node, "
265
+ "labels(node) AS nodeLabels, elementId(node) AS elementId, score"
266
+ )
267
+ elif index_type == IndexType .RELATIONSHIP :
268
+ if search_type == SearchType .HYBRID :
269
+ raise Exception ("Hybrid search is not support for relationship indexes" )
270
+ elif search_type == SearchType .VECTOR :
271
+ query , params = _get_vector_search_query (index_type = index_type ), {}
272
+ else :
273
+ raise ValueError (f"Search type is not supported: { search_type } " )
274
+ fallback_return = (
275
+ f"RETURN relationship {{ .*, `{ embedding_property } `: null }} AS relationship, "
276
+ "elementId(relationship) AS elementId, score"
277
+ )
229
278
else :
230
- raise ValueError (f"Search type is not supported: { search_type } " )
279
+ raise ValueError (f"Index type is not supported: { index_type } " )
280
+
231
281
query_tail = get_query_tail (
232
282
retrieval_query ,
233
283
return_properties ,
234
- fallback_return = f"RETURN node {{ .*, `{ embedding_node_property } `: null }} AS node, labels(node) AS nodeLabels, elementId(node) AS elementId, score" ,
284
+ fallback_return = fallback_return ,
285
+ index_type = index_type ,
235
286
)
287
+
236
288
return f"{ query } { query_tail } " , params
237
289
238
290
239
291
def get_query_tail (
240
292
retrieval_query : Optional [str ] = None ,
241
293
return_properties : Optional [list [str ]] = None ,
242
294
fallback_return : Optional [str ] = None ,
295
+ index_type : IndexType = IndexType .NODE ,
243
296
) -> str :
244
297
"""Build the RETURN statement after the search is performed
245
298
@@ -257,5 +310,10 @@ def get_query_tail(
257
310
return retrieval_query
258
311
if return_properties :
259
312
return_properties_cypher = ", " .join ([f".{ prop } " for prop in return_properties ])
260
- return f"RETURN node {{{ return_properties_cypher } }} AS node, labels(node) AS nodeLabels, elementId(node) AS elementId, score"
313
+ if index_type == IndexType .NODE :
314
+ return f"RETURN node {{{ return_properties_cypher } }} AS node, labels(node) AS nodeLabels, elementId(node) AS elementId, score"
315
+ elif index_type == IndexType .RELATIONSHIP :
316
+ return f"RETURN relationship {{{ return_properties_cypher } }} AS relationship, elementId(relationship) AS elementId, score"
317
+ else :
318
+ raise ValueError (f"Index type is not supported: { index_type } " )
261
319
return fallback_return if fallback_return else ""
0 commit comments