Skip to content

Commit b96a9bd

Browse files
authored
Fix schema validation (#345)
* Fix schema validation * Ruff * Ruff 2 * Mypy * Mypy 2 * Test fix * Update the config
1 parent 2686329 commit b96a9bd

File tree

9 files changed

+200
-202
lines changed

9 files changed

+200
-202
lines changed

examples/build_graph/from_config_files/simple_kg_pipeline_config.json

Lines changed: 57 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -42,65 +42,67 @@
4242
}
4343
},
4444
"from_pdf": false,
45-
"entities": [
46-
"Person",
47-
{
48-
"label": "House",
49-
"description": "Family the person belongs to",
50-
"properties": [
51-
{
52-
"name": "name",
53-
"type": "STRING"
54-
}
55-
]
56-
},
57-
{
58-
"label": "Planet",
59-
"properties": [
60-
{
61-
"name": "name",
62-
"type": "STRING"
63-
},
64-
{
65-
"name": "weather",
66-
"type": "STRING"
67-
}
68-
]
69-
}
70-
],
71-
"relations": [
72-
"PARENT_OF",
73-
{
74-
"label": "HEIR_OF",
75-
"description": "Used for inheritor relationship between father and sons"
76-
},
77-
{
78-
"label": "RULES",
79-
"properties": [
80-
{
81-
"name": "fromYear",
82-
"type": "INTEGER"
83-
}
84-
]
85-
}
86-
],
87-
"potential_schema": [
88-
[
45+
"schema": {
46+
"node_types": [
8947
"Person",
90-
"PARENT_OF",
91-
"Person"
48+
{
49+
"label": "House",
50+
"description": "Family the person belongs to",
51+
"properties": [
52+
{
53+
"name": "name",
54+
"type": "STRING"
55+
}
56+
]
57+
},
58+
{
59+
"label": "Planet",
60+
"properties": [
61+
{
62+
"name": "name",
63+
"type": "STRING"
64+
},
65+
{
66+
"name": "weather",
67+
"type": "STRING"
68+
}
69+
]
70+
}
9271
],
93-
[
94-
"Person",
95-
"HEIR_OF",
96-
"House"
72+
"relationship_types": [
73+
"PARENT_OF",
74+
{
75+
"label": "HEIR_OF",
76+
"description": "Used for inheritor relationship between father and sons"
77+
},
78+
{
79+
"label": "RULES",
80+
"properties": [
81+
{
82+
"name": "fromYear",
83+
"type": "INTEGER"
84+
}
85+
]
86+
}
9787
],
98-
[
99-
"House",
100-
"RULES",
101-
"Planet"
88+
"patterns": [
89+
[
90+
"Person",
91+
"PARENT_OF",
92+
"Person"
93+
],
94+
[
95+
"Person",
96+
"HEIR_OF",
97+
"House"
98+
],
99+
[
100+
"House",
101+
"RULES",
102+
"Planet"
103+
]
102104
]
103-
],
105+
},
104106
"text_splitter": {
105107
"class_": "text_splitters.fixed_size_splitter.FixedSizeSplitter",
106108
"params_": {

examples/build_graph/from_config_files/simple_kg_pipeline_config.yaml

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,31 +30,32 @@ embedder_config:
3030
resolver_: ENV
3131
var_: OPENAI_API_KEY
3232
from_pdf: false
33-
entities:
34-
- label: Person
35-
- label: House
36-
description: Family the person belongs to
37-
properties:
38-
- name: name
39-
type: STRING
40-
- label: Planet
41-
properties:
42-
- name: name
43-
type: STRING
44-
- name: weather
45-
type: STRING
46-
relations:
47-
- label: PARENT_OF
48-
- label: HEIR_OF
49-
description: Used for inheritor relationship between father and sons
50-
- label: RULES
51-
properties:
52-
- name: fromYear
53-
type: INTEGER
54-
potential_schema:
55-
- ["Person", "PARENT_OF", "Person"]
56-
- ["Person", "HEIR_OF", "House"]
57-
- ["House", "RULES", "Planet"]
33+
schema:
34+
node_types:
35+
- label: Person
36+
- label: House
37+
description: Family the person belongs to
38+
properties:
39+
- name: name
40+
type: STRING
41+
- label: Planet
42+
properties:
43+
- name: name
44+
type: STRING
45+
- name: weather
46+
type: STRING
47+
relationship_types:
48+
- label: PARENT_OF
49+
- label: HEIR_OF
50+
description: Used for inheritor relationship between father and sons
51+
- label: RULES
52+
properties:
53+
- name: fromYear
54+
type: INTEGER
55+
patterns:
56+
- ["Person", "PARENT_OF", "Person"]
57+
- ["Person", "HEIR_OF", "House"]
58+
- ["House", "RULES", "Planet"]
5859
text_splitter:
5960
class_: text_splitters.fixed_size_splitter.FixedSizeSplitter
6061
params_:

examples/build_graph/from_config_files/simple_kg_pipeline_config_url.json

Lines changed: 57 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -42,65 +42,67 @@
4242
}
4343
},
4444
"from_pdf": true,
45-
"entities": [
46-
"Person",
47-
{
48-
"label": "House",
49-
"description": "Family the person belongs to",
50-
"properties": [
51-
{
52-
"name": "name",
53-
"type": "STRING"
54-
}
55-
]
56-
},
57-
{
58-
"label": "Planet",
59-
"properties": [
60-
{
61-
"name": "name",
62-
"type": "STRING"
63-
},
64-
{
65-
"name": "weather",
66-
"type": "STRING"
67-
}
68-
]
69-
}
70-
],
71-
"relations": [
72-
"PARENT_OF",
73-
{
74-
"label": "HEIR_OF",
75-
"description": "Used for inheritor relationship between father and sons"
76-
},
77-
{
78-
"label": "RULES",
79-
"properties": [
80-
{
81-
"name": "fromYear",
82-
"type": "INTEGER"
83-
}
84-
]
85-
}
86-
],
87-
"potential_schema": [
88-
[
45+
"schema": {
46+
"node_types": [
8947
"Person",
90-
"PARENT_OF",
91-
"Person"
48+
{
49+
"label": "House",
50+
"description": "Family the person belongs to",
51+
"properties": [
52+
{
53+
"name": "name",
54+
"type": "STRING"
55+
}
56+
]
57+
},
58+
{
59+
"label": "Planet",
60+
"properties": [
61+
{
62+
"name": "name",
63+
"type": "STRING"
64+
},
65+
{
66+
"name": "weather",
67+
"type": "STRING"
68+
}
69+
]
70+
}
9271
],
93-
[
94-
"Person",
95-
"HEIR_OF",
96-
"House"
72+
"relationship_types": [
73+
"PARENT_OF",
74+
{
75+
"label": "HEIR_OF",
76+
"description": "Used for inheritor relationship between father and sons"
77+
},
78+
{
79+
"label": "RULES",
80+
"properties": [
81+
{
82+
"name": "fromYear",
83+
"type": "INTEGER"
84+
}
85+
]
86+
}
9787
],
98-
[
99-
"House",
100-
"RULES",
101-
"Planet"
88+
"patterns": [
89+
[
90+
"Person",
91+
"PARENT_OF",
92+
"Person"
93+
],
94+
[
95+
"Person",
96+
"HEIR_OF",
97+
"House"
98+
],
99+
[
100+
"House",
101+
"RULES",
102+
"Planet"
103+
]
102104
]
103-
],
105+
},
104106
"text_splitter": {
105107
"class_": "text_splitters.fixed_size_splitter.FixedSizeSplitter",
106108
"params_": {

src/neo4j_graphrag/experimental/components/entity_relation_extractor.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,9 @@ async def extract_for_chunk(
213213
) -> Neo4jGraph:
214214
"""Run entity extraction for a given text chunk."""
215215
prompt = self.prompt_template.format(
216-
text=chunk.text, schema=schema.model_dump(), examples=examples
216+
text=chunk.text,
217+
schema=schema.model_dump(exclude_none=True),
218+
examples=examples,
217219
)
218220
llm_result = await self.llm.ainvoke(prompt)
219221
try:
@@ -326,7 +328,7 @@ async def run(
326328
elif lexical_graph_config:
327329
lexical_graph_builder = LexicalGraphBuilder(config=lexical_graph_config)
328330
schema = schema or GraphSchema(
329-
node_types=(), relationship_types=(), patterns=()
331+
node_types=(), relationship_types=None, patterns=None
330332
)
331333
examples = examples or ""
332334
sem = asyncio.Semaphore(self.max_concurrency)

src/neo4j_graphrag/experimental/components/schema.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,12 @@ class NodeType(BaseModel):
8181
description: str = ""
8282
properties: list[PropertyType] = []
8383

84+
@model_validator(mode="before")
8485
@classmethod
85-
def from_text_or_dict(cls, input: EntityInputType) -> Self:
86-
if isinstance(input, NodeType):
87-
return input
88-
if isinstance(input, str):
89-
return cls(label=input)
90-
return cls.model_validate(input)
86+
def validate_input_if_string(cls, data: EntityInputType) -> EntityInputType:
87+
if isinstance(data, str):
88+
return {"label": data}
89+
return data
9190

9291

9392
class RelationshipType(BaseModel):
@@ -99,13 +98,12 @@ class RelationshipType(BaseModel):
9998
description: str = ""
10099
properties: list[PropertyType] = []
101100

101+
@model_validator(mode="before")
102102
@classmethod
103-
def from_text_or_dict(cls, input: RelationInputType) -> Self:
104-
if isinstance(input, RelationshipType):
105-
return input
106-
if isinstance(input, str):
107-
return cls(label=input)
108-
return cls.model_validate(input)
103+
def validate_input_if_string(cls, data: RelationInputType) -> RelationInputType:
104+
if isinstance(data, str):
105+
return {"label": data}
106+
return data
109107

110108

111109
class GraphSchema(DataModel):

0 commit comments

Comments
 (0)