Skip to content

Commit c166afc

Browse files
authored
Document schema functions (#228)
* Document schema functions * Ruff * Ruff again and fix tests
1 parent 6ac97b7 commit c166afc

File tree

4 files changed

+78
-5
lines changed

4 files changed

+78
-5
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
## Next
44

5+
### Fixed
6+
- Added schema functions to the documentation.
7+
58
## 1.2.1
69

710
### Added

docs/source/api.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,11 @@ Database Interaction
340340

341341
.. autofunction:: neo4j_graphrag.indexes.async_upsert_vector_on_relationship
342342

343+
.. autofunction:: neo4j_graphrag.schema.get_structured_schema
344+
345+
.. autofunction:: neo4j_graphrag.schema.get_schema
346+
347+
343348
******
344349
Errors
345350
******
@@ -408,6 +413,13 @@ FilterValidationError
408413
:show-inheritance:
409414

410415

416+
EmbeddingsGenerationError
417+
========================
418+
419+
.. autoclass:: neo4j_graphrag.exceptions.EmbeddingsGenerationError
420+
:show-inheritance:
421+
422+
411423
EmbeddingRequiredError
412424
======================
413425

src/neo4j_graphrag/schema.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import neo4j
2020
from neo4j.exceptions import ClientError
2121

22+
BASE_KG_BUILDER_LABEL = "__KGBuilder__"
2223
BASE_ENTITY_LABEL = "__Entity__"
2324
EXCLUDED_LABELS = ["_Bloom_Perspective_", "_Bloom_Scene_"]
2425
EXCLUDED_RELS = ["_Bloom_HAS_SCENE_"]
@@ -82,13 +83,23 @@ def get_schema(
8283
driver: neo4j.Driver,
8384
) -> str:
8485
"""
85-
Returns the schema of the graph.
86+
Returns the schema of the graph as a string with following format:
87+
88+
.. code-block:: text
89+
90+
Node properties:
91+
Person {id: INTEGER, name: STRING}
92+
Relationship properties:
93+
KNOWS {fromDate: DATE}
94+
The relationships:
95+
(:Person)-[:KNOWS]->(:Person)
8696
8797
Args:
8898
driver (neo4j.Driver): Neo4j Python driver instance.
8999
90100
Returns:
91101
str: the graph schema information in a serialized format.
102+
92103
"""
93104
structured_schema = get_structured_schema(driver)
94105

@@ -129,6 +140,40 @@ def get_structured_schema(driver: neo4j.Driver) -> dict[str, Any]:
129140
"""
130141
Returns the structured schema of the graph.
131142
143+
Returns a dict with following format:
144+
145+
.. code:: python
146+
147+
{
148+
'node_props': {
149+
'Person': [{'property': 'id', 'type': 'INTEGER'}, {'property': 'name', 'type': 'STRING'}]
150+
},
151+
'rel_props': {
152+
'KNOWS': [{'property': 'fromDate', 'type': 'DATE'}]
153+
},
154+
'relationships': [
155+
{'start': 'Person', 'type': 'KNOWS', 'end': 'Person'}
156+
],
157+
'metadata': {
158+
'constraint': [
159+
{'id': 7, 'name': 'person_id', 'type': 'UNIQUENESS', 'entityType': 'NODE', 'labelsOrTypes': ['Persno'], 'properties': ['id'], 'ownedIndex': 'person_id', 'propertyType': None},
160+
],
161+
'index': [
162+
{'label': 'Person', 'properties': ['name'], 'size': 2, 'type': 'RANGE', 'valuesSelectivity': 1.0, 'distinctValues': 2.0},
163+
]
164+
}
165+
}
166+
167+
Note:
168+
The internal structure of the returned dict depends on the apoc.meta.data
169+
and apoc.schema.nodes procedures.
170+
171+
Warning:
172+
Some labels are excluded from the output schema:
173+
174+
- The `__Entity__` and `__KGBuilder__` node labels which are created by the KG Builder pipeline within this package
175+
- Some labels related to Bloom internals.
176+
132177
Args:
133178
driver (neo4j.Driver): Neo4j Python driver instance.
134179
@@ -140,7 +185,10 @@ def get_structured_schema(driver: neo4j.Driver) -> dict[str, Any]:
140185
for data in query_database(
141186
driver,
142187
NODE_PROPERTIES_QUERY,
143-
params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
188+
params={
189+
"EXCLUDED_LABELS": EXCLUDED_LABELS
190+
+ [BASE_ENTITY_LABEL, BASE_KG_BUILDER_LABEL]
191+
},
144192
)
145193
]
146194

@@ -156,7 +204,10 @@ def get_structured_schema(driver: neo4j.Driver) -> dict[str, Any]:
156204
for data in query_database(
157205
driver,
158206
REL_QUERY,
159-
params={"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
207+
params={
208+
"EXCLUDED_LABELS": EXCLUDED_LABELS
209+
+ [BASE_ENTITY_LABEL, BASE_KG_BUILDER_LABEL]
210+
},
160211
)
161212
]
162213

tests/unit/test_schema.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from neo4j import Driver
2121
from neo4j_graphrag.schema import (
2222
BASE_ENTITY_LABEL,
23+
BASE_KG_BUILDER_LABEL,
2324
EXCLUDED_LABELS,
2425
EXCLUDED_RELS,
2526
INDEX_QUERY,
@@ -84,15 +85,21 @@ def test_get_structured_schema_happy_path(driver: MagicMock) -> None:
8485
assert 5 == driver.execute_query.call_count
8586
driver.execute_query.assert_any_call(
8687
NODE_PROPERTIES_QUERY,
87-
{"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
88+
{
89+
"EXCLUDED_LABELS": EXCLUDED_LABELS
90+
+ [BASE_ENTITY_LABEL, BASE_KG_BUILDER_LABEL]
91+
},
8892
)
8993
driver.execute_query.assert_any_call(
9094
REL_PROPERTIES_QUERY,
9195
{"EXCLUDED_LABELS": EXCLUDED_RELS},
9296
)
9397
driver.execute_query.assert_any_call(
9498
REL_QUERY,
95-
{"EXCLUDED_LABELS": EXCLUDED_LABELS + [BASE_ENTITY_LABEL]},
99+
{
100+
"EXCLUDED_LABELS": EXCLUDED_LABELS
101+
+ [BASE_ENTITY_LABEL, BASE_KG_BUILDER_LABEL]
102+
},
96103
)
97104
driver.execute_query.assert_any_call("SHOW CONSTRAINTS", {})
98105
driver.execute_query.assert_any_call(INDEX_QUERY, {})

0 commit comments

Comments
 (0)