|
52 | 52 | "YIELD node, score"
|
53 | 53 | )
|
54 | 54 |
|
55 |
| -UPSERT_NODE_QUERY = ( |
56 |
| - "UNWIND $rows AS row " |
57 |
| - "CREATE (n:__KGBuilder__) " |
58 |
| - "SET n += row.properties " |
59 |
| - "WITH n, row CALL apoc.create.addLabels(n, row.labels) YIELD node " |
60 |
| - "WITH node as n, row CALL { " |
61 |
| - "WITH n, row WITH n, row WHERE row.embedding_properties IS NOT NULL " |
62 |
| - "UNWIND keys(row.embedding_properties) as emb " |
63 |
| - "CALL db.create.setNodeVectorProperty(n, emb, row.embedding_properties[emb]) " |
64 |
| - "RETURN count(*) as nbEmb " |
65 |
| - "} " |
66 |
| - "RETURN row.id as _internal_id, elementId(n) as element_id" |
67 |
| -) |
68 | 55 |
|
69 |
| -UPSERT_NODE_QUERY_VARIABLE_SCOPE_CLAUSE = ( |
70 |
| - "UNWIND $rows AS row " |
71 |
| - "CREATE (n:__KGBuilder__) " |
72 |
| - "SET n += row.properties " |
73 |
| - "WITH n, row CALL apoc.create.addLabels(n, row.labels) YIELD node " |
74 |
| - "WITH node as n, row CALL (n, row) { " |
75 |
| - "WITH n, row WITH n, row WHERE row.embedding_properties IS NOT NULL " |
76 |
| - "UNWIND keys(row.embedding_properties) as emb " |
77 |
| - "CALL db.create.setNodeVectorProperty(n, emb, row.embedding_properties[emb]) " |
78 |
| - "RETURN count(*) as nbEmb " |
79 |
| - "} " |
80 |
| - "RETURN row.id as _internal_id, elementId(n) as element_id" |
81 |
| -) |
| 56 | +def _call_subquery_syntax( |
| 57 | + support_variable_scope_clause: bool, variable_list: list[str] |
| 58 | +) -> str: |
| 59 | + """A helper function to return the CALL subquery syntax: |
| 60 | + - Either CALL { WITH <variables> |
| 61 | + - or CALL (variables) { |
| 62 | + """ |
| 63 | + variables = ",".join(variable_list) |
| 64 | + if support_variable_scope_clause: |
| 65 | + return f"CALL ({variables}) {{ " |
| 66 | + if variables: |
| 67 | + return f"CALL {{ WITH {variables} " |
| 68 | + return "CALL { " |
| 69 | + |
| 70 | + |
| 71 | +def upsert_node_query(support_variable_scope_clause: bool) -> str: |
| 72 | + """Build the Cypher query to upsert a batch of nodes: |
| 73 | + - Create the new node |
| 74 | + - Set its label(s) and properties |
| 75 | + - Set its embedding properties if any |
| 76 | + - Return the node elementId |
| 77 | + """ |
| 78 | + call_prefix = _call_subquery_syntax( |
| 79 | + support_variable_scope_clause, variable_list=["n", "row"] |
| 80 | + ) |
| 81 | + return ( |
| 82 | + "UNWIND $rows AS row " |
| 83 | + "CREATE (n:__KGBuilder__ {__tmp_internal_id: row.id}) " |
| 84 | + "SET n += row.properties " |
| 85 | + "WITH n, row CALL apoc.create.addLabels(n, row.labels) YIELD node " |
| 86 | + "WITH node as n, row " |
| 87 | + f"{call_prefix} " |
| 88 | + "WITH n, row WHERE row.embedding_properties IS NOT NULL " |
| 89 | + "UNWIND keys(row.embedding_properties) as emb " |
| 90 | + "CALL db.create.setNodeVectorProperty(n, emb, row.embedding_properties[emb]) " |
| 91 | + "RETURN count(*) as nbEmb " |
| 92 | + "} " |
| 93 | + "RETURN elementId(n) as element_id" |
| 94 | + ) |
82 | 95 |
|
83 |
| -UPSERT_RELATIONSHIP_QUERY = ( |
84 |
| - "UNWIND $rows as row " |
85 |
| - "MATCH (start:__KGBuilder__), (end:__KGBuilder__) " |
86 |
| - "WHERE elementId(start) = row.start_node_element_id AND elementId(end) = row.end_node_element_id " |
87 |
| - "WITH start, end, row " |
88 |
| - "CALL apoc.merge.relationship(start, row.type, {}, row.properties, end, row.properties) YIELD rel " |
89 |
| - "WITH rel, row CALL { " |
90 |
| - "WITH rel, row WITH rel, row WHERE row.embedding_properties IS NOT NULL " |
91 |
| - "UNWIND keys(row.embedding_properties) as emb " |
92 |
| - "CALL db.create.setRelationshipVectorProperty(rel, emb, row.embedding_properties[emb]) " |
93 |
| - "} " |
94 |
| - "RETURN elementId(rel)" |
95 |
| -) |
96 | 96 |
|
97 |
| -UPSERT_RELATIONSHIP_QUERY_VARIABLE_SCOPE_CLAUSE = ( |
98 |
| - "UNWIND $rows as row " |
99 |
| - "MATCH (start:__KGBuilder__), (end:__KGBuilder__) " |
100 |
| - "WHERE elementId(start) = row.start_node_element_id AND elementId(end) = row.end_node_element_id " |
101 |
| - "WITH start, end, row " |
102 |
| - "CALL apoc.merge.relationship(start, row.type, {}, row.properties, end, row.properties) YIELD rel " |
103 |
| - "WITH rel, row CALL (rel, row) { " |
104 |
| - "WITH rel, row WITH rel, row WHERE row.embedding_properties IS NOT NULL " |
105 |
| - "UNWIND keys(row.embedding_properties) as emb " |
106 |
| - "CALL db.create.setRelationshipVectorProperty(rel, emb, row.embedding_properties[emb]) " |
107 |
| - "} " |
108 |
| - "RETURN elementId(rel)" |
109 |
| -) |
| 97 | +def upsert_relationship_query(support_variable_scope_clause: bool) -> str: |
| 98 | + """Build the Cypher query to upsert a batch of relationships: |
| 99 | + - Create the new relationship: |
| 100 | + only one relationship of a specific type is allowed between the same two nodes |
| 101 | + - Set its properties |
| 102 | + - Set its embedding properties if any |
| 103 | + - Return the node elementId |
| 104 | + """ |
| 105 | + call_prefix = _call_subquery_syntax( |
| 106 | + support_variable_scope_clause, variable_list=["rel", "row"] |
| 107 | + ) |
| 108 | + return ( |
| 109 | + "UNWIND $rows as row " |
| 110 | + "MATCH (start:__KGBuilder__ {__tmp_internal_id: row.start_node_id}), " |
| 111 | + " (end:__KGBuilder__ {__tmp_internal_id: row.end_node_id}) " |
| 112 | + "WITH start, end, row " |
| 113 | + "CALL apoc.merge.relationship(start, row.type, {}, row.properties, end, row.properties) YIELD rel " |
| 114 | + "WITH rel, row " |
| 115 | + f"{call_prefix} " |
| 116 | + "WITH rel, row WHERE row.embedding_properties IS NOT NULL " |
| 117 | + "UNWIND keys(row.embedding_properties) as emb " |
| 118 | + "CALL db.create.setRelationshipVectorProperty(rel, emb, row.embedding_properties[emb]) " |
| 119 | + "} " |
| 120 | + "RETURN elementId(rel)" |
| 121 | + ) |
| 122 | + |
| 123 | + |
| 124 | +def db_cleaning_query(support_variable_scope_clause: bool, batch_size: int) -> str: |
| 125 | + """Removes the temporary __tmp_internal_id property from all nodes.""" |
| 126 | + call_prefix = _call_subquery_syntax( |
| 127 | + support_variable_scope_clause, variable_list=["n"] |
| 128 | + ) |
| 129 | + return ( |
| 130 | + "MATCH (n:__KGBuilder__) " |
| 131 | + "WHERE n.__tmp_internal_id IS NOT NULL " |
| 132 | + f"{call_prefix} " |
| 133 | + " SET n.__tmp_internal_id = NULL " |
| 134 | + "} " |
| 135 | + f"IN TRANSACTIONS OF {batch_size} ROWS" |
| 136 | + ) |
| 137 | + |
110 | 138 |
|
111 | 139 | # Deprecated, remove along with upsert_vector
|
112 | 140 | UPSERT_VECTOR_ON_NODE_QUERY = (
|
@@ -150,13 +178,15 @@ def _get_hybrid_query(neo4j_version_is_5_23_or_above: bool) -> str:
|
150 | 178 | Construct a cypher query for hybrid search.
|
151 | 179 |
|
152 | 180 | Args:
|
153 |
| - neo4j_version_is_5_23_or_above (bool): Whether or not the Neo4j version is 5.23 or above; |
| 181 | + neo4j_version_is_5_23_or_above (bool): Whether the Neo4j version is 5.23 or above; |
154 | 182 | determines which call syntax is used.
|
155 | 183 |
|
156 | 184 | Returns:
|
157 | 185 | str: The constructed Cypher query string.
|
158 | 186 | """
|
159 |
| - call_prefix = "CALL () { " if neo4j_version_is_5_23_or_above else "CALL { " |
| 187 | + call_prefix = _call_subquery_syntax( |
| 188 | + neo4j_version_is_5_23_or_above, variable_list=[] |
| 189 | + ) |
160 | 190 | query_body = (
|
161 | 191 | f"{NODE_VECTOR_INDEX_QUERY} "
|
162 | 192 | "WITH collect({node:node, score:score}) AS nodes, max(score) AS vector_index_max_score "
|
|
0 commit comments