71
71
)
72
72
73
73
74
- def clean_string_values (text : str ) -> str :
74
+ def _clean_string_values (text : str ) -> str :
75
75
"""Clean string values for schema.
76
76
77
77
Cleans the input text by replacing newline and carriage return characters.
@@ -85,7 +85,7 @@ def clean_string_values(text: str) -> str:
85
85
return text .replace ("\n " , " " ).replace ("\r " , " " )
86
86
87
87
88
- def value_sanitize (d : Any ) -> Any :
88
+ def _value_sanitize (d : Any ) -> Any :
89
89
"""Sanitize the input dictionary or list.
90
90
91
91
Sanitizes the input by removing embedding-like values,
@@ -104,14 +104,14 @@ def value_sanitize(d: Any) -> Any:
104
104
new_dict = {}
105
105
for key , value in d .items ():
106
106
if isinstance (value , dict ):
107
- sanitized_value = value_sanitize (value )
107
+ sanitized_value = _value_sanitize (value )
108
108
if (
109
109
sanitized_value is not None
110
110
): # Check if the sanitized value is not None
111
111
new_dict [key ] = sanitized_value
112
112
elif isinstance (value , list ):
113
113
if len (value ) < LIST_LIMIT :
114
- sanitized_value = value_sanitize (value )
114
+ sanitized_value = _value_sanitize (value )
115
115
if (
116
116
sanitized_value is not None
117
117
): # Check if the sanitized value is not None
@@ -123,7 +123,7 @@ def value_sanitize(d: Any) -> Any:
123
123
elif isinstance (d , list ):
124
124
if len (d ) < LIST_LIMIT :
125
125
return [
126
- value_sanitize (item ) for item in d if value_sanitize (item ) is not None
126
+ _value_sanitize (item ) for item in d if _value_sanitize (item ) is not None
127
127
]
128
128
else :
129
129
return None
@@ -169,7 +169,7 @@ def query_database(
169
169
)
170
170
json_data = [r .data () for r in data .records ]
171
171
if sanitize :
172
- json_data = [value_sanitize (el ) for el in json_data ]
172
+ json_data = [_value_sanitize (el ) for el in json_data ]
173
173
return json_data
174
174
except Neo4jError as e :
175
175
if not (
@@ -198,7 +198,7 @@ def query_database(
198
198
result = session .run (Query (text = query , timeout = timeout ), params )
199
199
json_data = [r .data () for r in result ]
200
200
if sanitize :
201
- json_data = [value_sanitize (el ) for el in json_data ]
201
+ json_data = [_value_sanitize (el ) for el in json_data ]
202
202
return json_data
203
203
204
204
@@ -325,7 +325,7 @@ def get_structured_schema(
325
325
return structured_schema
326
326
327
327
328
- def format_property (prop : Dict [str , Any ]) -> Optional [str ]:
328
+ def _format_property (prop : Dict [str , Any ]) -> Optional [str ]:
329
329
"""
330
330
Format a single property based on its type and available metadata.
331
331
@@ -343,11 +343,11 @@ def format_property(prop: Dict[str, Any]) -> Optional[str]:
343
343
"""
344
344
if prop ["type" ] == "STRING" and prop .get ("values" ):
345
345
if prop .get ("distinct_count" , 11 ) > DISTINCT_VALUE_LIMIT :
346
- return f'Example: "{ clean_string_values (prop ["values" ][0 ])} "'
346
+ return f'Example: "{ _clean_string_values (prop ["values" ][0 ])} "'
347
347
else :
348
348
return (
349
349
"Available options: "
350
- + f'{ [clean_string_values (el ) for el in prop ["values" ]]} '
350
+ + f'{ [_clean_string_values (el ) for el in prop ["values" ]]} '
351
351
)
352
352
elif prop ["type" ] in [
353
353
"INTEGER" ,
@@ -368,7 +368,7 @@ def format_property(prop: Dict[str, Any]) -> Optional[str]:
368
368
return ""
369
369
370
370
371
- def format_properties (property_dict : Dict [str , Any ], is_enhanced : bool ) -> List [str ]:
371
+ def _format_properties (property_dict : Dict [str , Any ], is_enhanced : bool ) -> List [str ]:
372
372
"""
373
373
Format a collection of properties for nodes or relationships.
374
374
@@ -389,7 +389,7 @@ def format_properties(property_dict: Dict[str, Any], is_enhanced: bool) -> List[
389
389
for label , props in property_dict .items ():
390
390
formatted_props .append (f"- **{ label } **" )
391
391
for prop in props :
392
- example = format_property (prop )
392
+ example = _format_property (prop )
393
393
if example is not None :
394
394
formatted_props .append (
395
395
f" - `{ prop ['property' ]} `: { prop ['type' ]} { example } "
@@ -403,7 +403,7 @@ def format_properties(property_dict: Dict[str, Any], is_enhanced: bool) -> List[
403
403
return formatted_props
404
404
405
405
406
- def format_relationships (rels : List [Dict [str , Any ]]) -> List [str ]:
406
+ def _format_relationships (rels : List [Dict [str , Any ]]) -> List [str ]:
407
407
"""
408
408
Format relationships into a structured string representation.
409
409
@@ -437,9 +437,9 @@ def format_schema(schema: Dict[str, Any], is_enhanced: bool) -> str:
437
437
str: A formatted string representation of the graph schema, including
438
438
node properties, relationship properties, and relationship patterns.
439
439
"""
440
- formatted_node_props = format_properties (schema ["node_props" ], is_enhanced )
441
- formatted_rel_props = format_properties (schema ["rel_props" ], is_enhanced )
442
- formatted_rels = format_relationships (schema ["relationships" ])
440
+ formatted_node_props = _format_properties (schema ["node_props" ], is_enhanced )
441
+ formatted_rel_props = _format_properties (schema ["rel_props" ], is_enhanced )
442
+ formatted_rels = _format_relationships (schema ["relationships" ])
443
443
return "\n " .join (
444
444
[
445
445
"Node properties:" ,
@@ -452,7 +452,7 @@ def format_schema(schema: Dict[str, Any], is_enhanced: bool) -> str:
452
452
)
453
453
454
454
455
- def build_str_clauses (
455
+ def _build_str_clauses (
456
456
prop_name : str ,
457
457
driver : neo4j .Driver ,
458
458
label_or_type : str ,
@@ -519,7 +519,7 @@ def build_str_clauses(
519
519
return with_clauses , return_clauses
520
520
521
521
522
- def build_list_clauses (prop_name : str ) -> Tuple [str , str ]:
522
+ def _build_list_clauses (prop_name : str ) -> Tuple [str , str ]:
523
523
"""
524
524
Build Cypher clauses for list property size statistics.
525
525
@@ -547,7 +547,7 @@ def build_list_clauses(prop_name: str) -> Tuple[str, str]:
547
547
return with_clause , return_clause
548
548
549
549
550
- def build_num_date_clauses (
550
+ def _build_num_date_clauses (
551
551
prop_name : str , exhaustive : bool , prop_index : Optional [List [Any ]] = None
552
552
) -> Tuple [List [str ], List [str ]]:
553
553
"""
@@ -602,6 +602,7 @@ def get_enhanced_schema_cypher(
602
602
label_or_type : str ,
603
603
properties : List [Dict [str , Any ]],
604
604
exhaustive : bool ,
605
+ sample_size : int = 5 ,
605
606
is_relationship : bool = False ,
606
607
) -> str :
607
608
"""
@@ -622,6 +623,8 @@ def get_enhanced_schema_cypher(
622
623
the node label or relationship type.
623
624
exhaustive (bool): Whether to perform an exhaustive search or a
624
625
sampled query approach.
626
+ sample_size (int): The number of nodes or relationships to sample when
627
+ exhaustive is False. Defaults to 5.
625
628
is_relationship (bool, optional): Indicates if the query is for
626
629
a relationship type (True) or a node label (False). Defaults to False.
627
630
@@ -637,8 +640,8 @@ def get_enhanced_schema_cypher(
637
640
return_clauses = []
638
641
output_dict = {}
639
642
if not exhaustive :
640
- # Sample 5 random nodes if not exhaustive
641
- match_clause += " WITH n LIMIT 5 "
643
+ # Sample random nodes if not exhaustive
644
+ match_clause += f " WITH n LIMIT { sample_size } "
642
645
# Build the with and return clauses
643
646
for prop in properties :
644
647
prop_name = prop ["property" ]
@@ -656,7 +659,7 @@ def get_enhanced_schema_cypher(
656
659
else None
657
660
)
658
661
if prop_type == "STRING" :
659
- str_w_clauses , str_r_clauses = build_str_clauses (
662
+ str_w_clauses , str_r_clauses = _build_str_clauses (
660
663
prop_name = prop_name ,
661
664
driver = driver ,
662
665
label_or_type = label_or_type ,
@@ -672,13 +675,13 @@ def get_enhanced_schema_cypher(
672
675
"DATE_TIME" ,
673
676
"LOCAL_DATE_TIME" ,
674
677
]:
675
- num_date_w_clauses , num_date_r_clauses = build_num_date_clauses (
678
+ num_date_w_clauses , num_date_r_clauses = _build_num_date_clauses (
676
679
prop_name = prop_name , exhaustive = exhaustive , prop_index = prop_index
677
680
)
678
681
with_clauses += num_date_w_clauses
679
682
return_clauses += num_date_r_clauses
680
683
elif prop_type == "LIST" :
681
- list_w_clause , list_r_clause = build_list_clauses (prop_name = prop_name )
684
+ list_w_clause , list_r_clause = _build_list_clauses (prop_name = prop_name )
682
685
with_clauses .append (list_w_clause )
683
686
return_clauses .append (list_r_clause )
684
687
elif prop_type in ["BOOLEAN" , "POINT" , "DURATION" ]:
0 commit comments