@@ -180,7 +180,13 @@ def query_database(
180
180
return json_data
181
181
182
182
183
- def get_schema (driver : neo4j .Driver , is_enhanced : bool = False ) -> str :
183
+ def get_schema (
184
+ driver : neo4j .Driver ,
185
+ is_enhanced : bool = False ,
186
+ database : Optional [str ] = None ,
187
+ timeout : Optional [float ] = None ,
188
+ sanitize : bool = False ,
189
+ ) -> str :
184
190
"""
185
191
Returns the schema of the graph as a string with following format:
186
192
@@ -197,16 +203,34 @@ def get_schema(driver: neo4j.Driver, is_enhanced: bool = False) -> str:
197
203
driver (neo4j.Driver): Neo4j Python driver instance.
198
204
is_enhanced (bool): Flag indicating whether to format the schema with
199
205
detailed statistics (True) or in a simpler overview format (False).
206
+ database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
207
+ timeout (Optional[float]): The timeout for transactions in seconds.
208
+ Useful for terminating long-running queries.
209
+ By default, there is no timeout set.
210
+ sanitize (bool): A flag to indicate whether to remove lists with
211
+ more than 128 elements from results. Useful for removing
212
+ embedding-like properties from database responses. Default is False.
213
+
200
214
201
215
Returns:
202
216
str: the graph schema information in a serialized format.
203
217
"""
204
- structured_schema = get_structured_schema (driver , is_enhanced )
218
+ structured_schema = get_structured_schema (
219
+ driver = driver ,
220
+ is_enhanced = is_enhanced ,
221
+ database = database ,
222
+ timeout = timeout ,
223
+ sanitize = sanitize ,
224
+ )
205
225
return format_schema (structured_schema , is_enhanced )
206
226
207
227
208
228
def get_structured_schema (
209
- driver : neo4j .Driver , is_enhanced : bool = False
229
+ driver : neo4j .Driver ,
230
+ is_enhanced : bool = False ,
231
+ database : Optional [str ] = None ,
232
+ timeout : Optional [float ] = None ,
233
+ sanitize : bool = False ,
210
234
) -> dict [str , Any ]:
211
235
"""
212
236
Returns the structured schema of the graph.
@@ -249,45 +273,75 @@ def get_structured_schema(
249
273
driver (neo4j.Driver): Neo4j Python driver instance.
250
274
is_enhanced (bool): Flag indicating whether to format the schema with
251
275
detailed statistics (True) or in a simpler overview format (False).
276
+ database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
277
+ timeout (Optional[float]): The timeout for transactions in seconds.
278
+ Useful for terminating long-running queries.
279
+ By default, there is no timeout set.
280
+ sanitize (bool): A flag to indicate whether to remove lists with
281
+ more than 128 elements from results. Useful for removing
282
+ embedding-like properties from database responses. Default is False.
252
283
253
284
Returns:
254
285
dict[str, Any]: the graph schema information in a structured format.
255
286
"""
256
287
node_properties = [
257
288
data ["output" ]
258
289
for data in query_database (
259
- driver ,
260
- NODE_PROPERTIES_QUERY ,
290
+ driver = driver ,
291
+ query = NODE_PROPERTIES_QUERY ,
261
292
params = {
262
293
"EXCLUDED_LABELS" : EXCLUDED_LABELS
263
294
+ [BASE_ENTITY_LABEL , BASE_KG_BUILDER_LABEL ]
264
295
},
296
+ database = database ,
297
+ timeout = timeout ,
298
+ sanitize = sanitize ,
265
299
)
266
300
]
267
301
268
302
rel_properties = [
269
303
data ["output" ]
270
304
for data in query_database (
271
- driver , REL_PROPERTIES_QUERY , params = {"EXCLUDED_LABELS" : EXCLUDED_RELS }
305
+ driver = driver ,
306
+ query = REL_PROPERTIES_QUERY ,
307
+ params = {"EXCLUDED_LABELS" : EXCLUDED_RELS },
308
+ database = database ,
309
+ timeout = timeout ,
310
+ sanitize = sanitize ,
272
311
)
273
312
]
274
313
275
314
relationships = [
276
315
data ["output" ]
277
316
for data in query_database (
278
- driver ,
279
- REL_QUERY ,
317
+ driver = driver ,
318
+ query = REL_QUERY ,
280
319
params = {
281
320
"EXCLUDED_LABELS" : EXCLUDED_LABELS
282
321
+ [BASE_ENTITY_LABEL , BASE_KG_BUILDER_LABEL ]
283
322
},
323
+ database = database ,
324
+ timeout = timeout ,
325
+ sanitize = sanitize ,
284
326
)
285
327
]
286
328
287
329
# Get constraints and indexes
288
330
try :
289
- constraint = query_database (driver , "SHOW CONSTRAINTS" )
290
- index = query_database (driver , INDEX_QUERY )
331
+ constraint = query_database (
332
+ driver = driver ,
333
+ query = "SHOW CONSTRAINTS" ,
334
+ database = database ,
335
+ timeout = timeout ,
336
+ sanitize = sanitize ,
337
+ )
338
+ index = query_database (
339
+ driver = driver ,
340
+ query = INDEX_QUERY ,
341
+ database = database ,
342
+ timeout = timeout ,
343
+ sanitize = sanitize ,
344
+ )
291
345
except ClientError :
292
346
constraint = []
293
347
index = []
@@ -299,7 +353,13 @@ def get_structured_schema(
299
353
"metadata" : {"constraint" : constraint , "index" : index },
300
354
}
301
355
if is_enhanced :
302
- enhance_schema (driver = driver , structured_schema = structured_schema )
356
+ enhance_schema (
357
+ driver = driver ,
358
+ structured_schema = structured_schema ,
359
+ database = database ,
360
+ timeout = timeout ,
361
+ sanitize = sanitize ,
362
+ )
303
363
return structured_schema
304
364
305
365
@@ -436,6 +496,9 @@ def _build_str_clauses(
436
496
label_or_type : str ,
437
497
exhaustive : bool ,
438
498
prop_index : Optional [List [Any ]] = None ,
499
+ database : Optional [str ] = None ,
500
+ timeout : Optional [float ] = None ,
501
+ sanitize : bool = False ,
439
502
) -> Tuple [List [str ], List [str ]]:
440
503
"""
441
504
Build Cypher clauses for string property statistics.
@@ -455,6 +518,13 @@ def _build_str_clauses(
455
518
prop_index (Optional[List[Any]]): Optional metadata about the property's
456
519
index. If provided, certain optimizations are applied based on
457
520
distinct value limits and index availability.
521
+ database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
522
+ timeout (Optional[float]): The timeout for transactions in seconds.
523
+ Useful for terminating long-running queries.
524
+ By default, there is no timeout set.
525
+ sanitize (bool): A flag to indicate whether to remove lists with
526
+ more than 128 elements from results. Useful for removing
527
+ embedding-like properties from database responses. Default is False.
458
528
459
529
Returns:
460
530
Tuple[List[str], List[str]]:
@@ -471,9 +541,14 @@ def _build_str_clauses(
471
541
and prop_index [0 ].get ("distinctValues" ) <= DISTINCT_VALUE_LIMIT
472
542
):
473
543
distinct_values = query_database (
474
- driver ,
475
- f"CALL apoc.schema.properties.distinct("
476
- f"'{ label_or_type } ', '{ prop_name } ') YIELD value" ,
544
+ driver = driver ,
545
+ query = (
546
+ f"CALL apoc.schema.properties.distinct("
547
+ f"'{ label_or_type } ', '{ prop_name } ') YIELD value"
548
+ ),
549
+ database = database ,
550
+ timeout = timeout ,
551
+ sanitize = sanitize ,
477
552
)[0 ]["value" ]
478
553
return_clauses .append (
479
554
(f"values: { distinct_values } ," f" distinct_count: { len (distinct_values )} " )
@@ -582,6 +657,9 @@ def get_enhanced_schema_cypher(
582
657
exhaustive : bool ,
583
658
sample_size : int = 5 ,
584
659
is_relationship : bool = False ,
660
+ database : Optional [str ] = None ,
661
+ timeout : Optional [float ] = None ,
662
+ sanitize : bool = False ,
585
663
) -> str :
586
664
"""
587
665
Build a Cypher query for enhanced schema information.
@@ -605,6 +683,13 @@ def get_enhanced_schema_cypher(
605
683
exhaustive is False. Defaults to 5.
606
684
is_relationship (bool, optional): Indicates if the query is for
607
685
a relationship type (True) or a node label (False). Defaults to False.
686
+ database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
687
+ timeout (Optional[float]): The timeout for transactions in seconds.
688
+ Useful for terminating long-running queries.
689
+ By default, there is no timeout set.
690
+ sanitize (bool): A flag to indicate whether to remove lists with
691
+ more than 128 elements from results. Useful for removing
692
+ embedding-like properties from database responses. Default is False.
608
693
609
694
Returns:
610
695
str: A Cypher query string that gathers enhanced property metadata.
@@ -643,6 +728,9 @@ def get_enhanced_schema_cypher(
643
728
label_or_type = label_or_type ,
644
729
exhaustive = exhaustive ,
645
730
prop_index = prop_index ,
731
+ database = database ,
732
+ timeout = timeout ,
733
+ sanitize = sanitize ,
646
734
)
647
735
with_clauses += str_w_clauses
648
736
return_clauses += str_r_clauses
@@ -682,6 +770,9 @@ def enhance_properties(
682
770
structured_schema : Dict [str , Any ],
683
771
prop_dict : Dict [str , Any ],
684
772
is_relationship : bool ,
773
+ database : Optional [str ] = None ,
774
+ timeout : Optional [float ] = None ,
775
+ sanitize : bool = False ,
685
776
) -> None :
686
777
"""
687
778
Enhance the structured schema with detailed statistics for a single node label or relationship type.
@@ -699,6 +790,13 @@ def enhance_properties(
699
790
relationship type to be enhanced.
700
791
is_relationship (bool): Indicates whether the properties to be enhanced belong to a relationship
701
792
(True) or a node (False).
793
+ database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
794
+ timeout (Optional[float]): The timeout for transactions in seconds.
795
+ Useful for terminating long-running queries.
796
+ By default, there is no timeout set.
797
+ sanitize (bool): A flag to indicate whether to remove lists with
798
+ more than 128 elements from results. Useful for removing
799
+ embedding-like properties from database responses. Default is False.
702
800
703
801
Returns:
704
802
None
@@ -720,6 +818,9 @@ def enhance_properties(
720
818
properties = props ,
721
819
exhaustive = count < EXHAUSTIVE_SEARCH_LIMIT ,
722
820
is_relationship = is_relationship ,
821
+ database = database ,
822
+ timeout = timeout ,
823
+ sanitize = sanitize ,
723
824
)
724
825
# Due to schema-flexible nature of neo4j errors can happen
725
826
try :
@@ -733,9 +834,12 @@ def enhance_properties(
733
834
else {}
734
835
)
735
836
enhanced_info = query_database (
736
- driver ,
737
- enhanced_cypher ,
837
+ driver = driver ,
838
+ query = enhanced_cypher ,
738
839
session_params = session_params ,
840
+ database = database ,
841
+ timeout = timeout ,
842
+ sanitize = sanitize ,
739
843
)[0 ]["output" ]
740
844
for prop in props :
741
845
if prop ["property" ] in enhanced_info :
@@ -744,7 +848,13 @@ def enhance_properties(
744
848
return
745
849
746
850
747
- def enhance_schema (driver : neo4j .Driver , structured_schema : Dict [str , Any ]) -> None :
851
+ def enhance_schema (
852
+ driver : neo4j .Driver ,
853
+ structured_schema : Dict [str , Any ],
854
+ database : Optional [str ] = None ,
855
+ timeout : Optional [float ] = None ,
856
+ sanitize : bool = False ,
857
+ ) -> None :
748
858
"""
749
859
Enhance the structured schema with detailed property statistics.
750
860
@@ -759,18 +869,34 @@ def enhance_schema(driver: neo4j.Driver, structured_schema: Dict[str, Any]) -> N
759
869
structured_schema (Dict[str, Any]): The initial structured schema
760
870
containing node and relationship properties, which will be updated
761
871
with enhanced statistics.
872
+ database (Optional[str]): The name of the database to connect to. Default is 'neo4j'.
873
+ timeout (Optional[float]): The timeout for transactions in seconds.
874
+ Useful for terminating long-running queries.
875
+ By default, there is no timeout set.
876
+ sanitize (bool): A flag to indicate whether to remove lists with
877
+ more than 128 elements from results. Useful for removing
878
+ embedding-like properties from database responses. Default is False.
762
879
763
880
Returns:
764
881
None
765
882
"""
766
- schema_counts = query_database (driver , SCHEMA_COUNTS_QUERY )
883
+ schema_counts = query_database (
884
+ driver = driver ,
885
+ query = SCHEMA_COUNTS_QUERY ,
886
+ database = database ,
887
+ timeout = timeout ,
888
+ sanitize = sanitize ,
889
+ )
767
890
# Update node info
768
891
for node in schema_counts [0 ]["nodes" ]:
769
892
enhance_properties (
770
893
driver = driver ,
771
894
structured_schema = structured_schema ,
772
895
prop_dict = node ,
773
896
is_relationship = False ,
897
+ database = database ,
898
+ timeout = timeout ,
899
+ sanitize = sanitize ,
774
900
)
775
901
# Update rel info
776
902
for rel in schema_counts [0 ]["relationships" ]:
@@ -779,4 +905,7 @@ def enhance_schema(driver: neo4j.Driver, structured_schema: Dict[str, Any]) -> N
779
905
structured_schema = structured_schema ,
780
906
prop_dict = rel ,
781
907
is_relationship = True ,
908
+ database = database ,
909
+ timeout = timeout ,
910
+ sanitize = sanitize ,
782
911
)
0 commit comments