Skip to content

Commit 067390b

Browse files
committed
Makes helper functions private
1 parent 7f1851d commit 067390b

File tree

2 files changed

+31
-28
lines changed

2 files changed

+31
-28
lines changed

src/neo4j_graphrag/schema.py

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
)
7272

7373

74-
def clean_string_values(text: str) -> str:
74+
def _clean_string_values(text: str) -> str:
7575
"""Clean string values for schema.
7676
7777
Cleans the input text by replacing newline and carriage return characters.
@@ -85,7 +85,7 @@ def clean_string_values(text: str) -> str:
8585
return text.replace("\n", " ").replace("\r", " ")
8686

8787

88-
def value_sanitize(d: Any) -> Any:
88+
def _value_sanitize(d: Any) -> Any:
8989
"""Sanitize the input dictionary or list.
9090
9191
Sanitizes the input by removing embedding-like values,
@@ -104,14 +104,14 @@ def value_sanitize(d: Any) -> Any:
104104
new_dict = {}
105105
for key, value in d.items():
106106
if isinstance(value, dict):
107-
sanitized_value = value_sanitize(value)
107+
sanitized_value = _value_sanitize(value)
108108
if (
109109
sanitized_value is not None
110110
): # Check if the sanitized value is not None
111111
new_dict[key] = sanitized_value
112112
elif isinstance(value, list):
113113
if len(value) < LIST_LIMIT:
114-
sanitized_value = value_sanitize(value)
114+
sanitized_value = _value_sanitize(value)
115115
if (
116116
sanitized_value is not None
117117
): # Check if the sanitized value is not None
@@ -123,7 +123,7 @@ def value_sanitize(d: Any) -> Any:
123123
elif isinstance(d, list):
124124
if len(d) < LIST_LIMIT:
125125
return [
126-
value_sanitize(item) for item in d if value_sanitize(item) is not None
126+
_value_sanitize(item) for item in d if _value_sanitize(item) is not None
127127
]
128128
else:
129129
return None
@@ -169,7 +169,7 @@ def query_database(
169169
)
170170
json_data = [r.data() for r in data.records]
171171
if sanitize:
172-
json_data = [value_sanitize(el) for el in json_data]
172+
json_data = [_value_sanitize(el) for el in json_data]
173173
return json_data
174174
except Neo4jError as e:
175175
if not (
@@ -198,7 +198,7 @@ def query_database(
198198
result = session.run(Query(text=query, timeout=timeout), params)
199199
json_data = [r.data() for r in result]
200200
if sanitize:
201-
json_data = [value_sanitize(el) for el in json_data]
201+
json_data = [_value_sanitize(el) for el in json_data]
202202
return json_data
203203

204204

@@ -325,7 +325,7 @@ def get_structured_schema(
325325
return structured_schema
326326

327327

328-
def format_property(prop: Dict[str, Any]) -> Optional[str]:
328+
def _format_property(prop: Dict[str, Any]) -> Optional[str]:
329329
"""
330330
Format a single property based on its type and available metadata.
331331
@@ -343,11 +343,11 @@ def format_property(prop: Dict[str, Any]) -> Optional[str]:
343343
"""
344344
if prop["type"] == "STRING" and prop.get("values"):
345345
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
346-
return f'Example: "{clean_string_values(prop["values"][0])}"'
346+
return f'Example: "{_clean_string_values(prop["values"][0])}"'
347347
else:
348348
return (
349349
"Available options: "
350-
+ f'{[clean_string_values(el) for el in prop["values"]]}'
350+
+ f'{[_clean_string_values(el) for el in prop["values"]]}'
351351
)
352352
elif prop["type"] in [
353353
"INTEGER",
@@ -368,7 +368,7 @@ def format_property(prop: Dict[str, Any]) -> Optional[str]:
368368
return ""
369369

370370

371-
def format_properties(property_dict: Dict[str, Any], is_enhanced: bool) -> List[str]:
371+
def _format_properties(property_dict: Dict[str, Any], is_enhanced: bool) -> List[str]:
372372
"""
373373
Format a collection of properties for nodes or relationships.
374374
@@ -389,7 +389,7 @@ def format_properties(property_dict: Dict[str, Any], is_enhanced: bool) -> List[
389389
for label, props in property_dict.items():
390390
formatted_props.append(f"- **{label}**")
391391
for prop in props:
392-
example = format_property(prop)
392+
example = _format_property(prop)
393393
if example is not None:
394394
formatted_props.append(
395395
f" - `{prop['property']}`: {prop['type']} {example}"
@@ -403,7 +403,7 @@ def format_properties(property_dict: Dict[str, Any], is_enhanced: bool) -> List[
403403
return formatted_props
404404

405405

406-
def format_relationships(rels: List[Dict[str, Any]]) -> List[str]:
406+
def _format_relationships(rels: List[Dict[str, Any]]) -> List[str]:
407407
"""
408408
Format relationships into a structured string representation.
409409
@@ -437,9 +437,9 @@ def format_schema(schema: Dict[str, Any], is_enhanced: bool) -> str:
437437
str: A formatted string representation of the graph schema, including
438438
node properties, relationship properties, and relationship patterns.
439439
"""
440-
formatted_node_props = format_properties(schema["node_props"], is_enhanced)
441-
formatted_rel_props = format_properties(schema["rel_props"], is_enhanced)
442-
formatted_rels = format_relationships(schema["relationships"])
440+
formatted_node_props = _format_properties(schema["node_props"], is_enhanced)
441+
formatted_rel_props = _format_properties(schema["rel_props"], is_enhanced)
442+
formatted_rels = _format_relationships(schema["relationships"])
443443
return "\n".join(
444444
[
445445
"Node properties:",
@@ -452,7 +452,7 @@ def format_schema(schema: Dict[str, Any], is_enhanced: bool) -> str:
452452
)
453453

454454

455-
def build_str_clauses(
455+
def _build_str_clauses(
456456
prop_name: str,
457457
driver: neo4j.Driver,
458458
label_or_type: str,
@@ -519,7 +519,7 @@ def build_str_clauses(
519519
return with_clauses, return_clauses
520520

521521

522-
def build_list_clauses(prop_name: str) -> Tuple[str, str]:
522+
def _build_list_clauses(prop_name: str) -> Tuple[str, str]:
523523
"""
524524
Build Cypher clauses for list property size statistics.
525525
@@ -547,7 +547,7 @@ def build_list_clauses(prop_name: str) -> Tuple[str, str]:
547547
return with_clause, return_clause
548548

549549

550-
def build_num_date_clauses(
550+
def _build_num_date_clauses(
551551
prop_name: str, exhaustive: bool, prop_index: Optional[List[Any]] = None
552552
) -> Tuple[List[str], List[str]]:
553553
"""
@@ -602,6 +602,7 @@ def get_enhanced_schema_cypher(
602602
label_or_type: str,
603603
properties: List[Dict[str, Any]],
604604
exhaustive: bool,
605+
sample_size: int = 5,
605606
is_relationship: bool = False,
606607
) -> str:
607608
"""
@@ -622,6 +623,8 @@ def get_enhanced_schema_cypher(
622623
the node label or relationship type.
623624
exhaustive (bool): Whether to perform an exhaustive search or a
624625
sampled query approach.
626+
sample_size (int): The number of nodes or relationships to sample when
627+
exhaustive is False. Defaults to 5.
625628
is_relationship (bool, optional): Indicates if the query is for
626629
a relationship type (True) or a node label (False). Defaults to False.
627630
@@ -637,8 +640,8 @@ def get_enhanced_schema_cypher(
637640
return_clauses = []
638641
output_dict = {}
639642
if not exhaustive:
640-
# Sample 5 random nodes if not exhaustive
641-
match_clause += " WITH n LIMIT 5"
643+
# Sample random nodes if not exhaustive
644+
match_clause += f" WITH n LIMIT {sample_size}"
642645
# Build the with and return clauses
643646
for prop in properties:
644647
prop_name = prop["property"]
@@ -656,7 +659,7 @@ def get_enhanced_schema_cypher(
656659
else None
657660
)
658661
if prop_type == "STRING":
659-
str_w_clauses, str_r_clauses = build_str_clauses(
662+
str_w_clauses, str_r_clauses = _build_str_clauses(
660663
prop_name=prop_name,
661664
driver=driver,
662665
label_or_type=label_or_type,
@@ -672,13 +675,13 @@ def get_enhanced_schema_cypher(
672675
"DATE_TIME",
673676
"LOCAL_DATE_TIME",
674677
]:
675-
num_date_w_clauses, num_date_r_clauses = build_num_date_clauses(
678+
num_date_w_clauses, num_date_r_clauses = _build_num_date_clauses(
676679
prop_name=prop_name, exhaustive=exhaustive, prop_index=prop_index
677680
)
678681
with_clauses += num_date_w_clauses
679682
return_clauses += num_date_r_clauses
680683
elif prop_type == "LIST":
681-
list_w_clause, list_r_clause = build_list_clauses(prop_name=prop_name)
684+
list_w_clause, list_r_clause = _build_list_clauses(prop_name=prop_name)
682685
with_clauses.append(list_w_clause)
683686
return_clauses.append(list_r_clause)
684687
elif prop_type in ["BOOLEAN", "POINT", "DURATION"]:

tests/unit/test_schema.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
get_schema,
3636
get_structured_schema,
3737
query_database,
38-
value_sanitize,
38+
_value_sanitize,
3939
)
4040

4141

@@ -203,12 +203,12 @@ def test_get_schema_ensure_structured_response(driver: MagicMock) -> None:
203203
),
204204
],
205205
)
206-
def test_value_sanitize(
206+
def test__value_sanitize(
207207
description: str, input_value: Dict[str, Any], expected_output: Any
208208
) -> None:
209-
"""Test the value_sanitize function."""
209+
"""Test the _value_sanitize function."""
210210
assert (
211-
value_sanitize(input_value) == expected_output
211+
_value_sanitize(input_value) == expected_output
212212
), f"Failed test case: {description}"
213213

214214

0 commit comments

Comments
 (0)