Skip to content

Commit 68a49d7

Browse files
authored
Bug fix (#149)
1 parent 4600422 commit 68a49d7

File tree

3 files changed

+140
-10
lines changed

3 files changed

+140
-10
lines changed

examples/pipeline/kg_builder.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# Copyright (c) "Neo4j"
2+
# Neo4j Sweden AB [https://neo4j.com]
3+
# #
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
# #
8+
# https://www.apache.org/licenses/LICENSE-2.0
9+
# #
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
from __future__ import annotations
16+
17+
import asyncio
18+
import logging
19+
20+
import neo4j
21+
from neo4j_graphrag.experimental.components.entity_relation_extractor import (
22+
LLMEntityRelationExtractor,
23+
OnError,
24+
)
25+
from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter
26+
from neo4j_graphrag.experimental.components.pdf_loader import PdfLoader
27+
from neo4j_graphrag.experimental.components.schema import (
28+
SchemaBuilder,
29+
SchemaEntity,
30+
SchemaRelation,
31+
)
32+
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import (
33+
FixedSizeSplitter,
34+
)
35+
from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
36+
from neo4j_graphrag.llm import OpenAILLM
37+
38+
logging.basicConfig(level=logging.INFO)
39+
40+
41+
async def main(neo4j_driver: neo4j.Driver) -> PipelineResult:
42+
from neo4j_graphrag.experimental.pipeline import Pipeline
43+
44+
# Instantiate Entity and Relation objects
45+
entities = [
46+
SchemaEntity(label="PERSON", description="An individual human being."),
47+
SchemaEntity(
48+
label="ORGANIZATION",
49+
description="A structured group of people with a common purpose.",
50+
),
51+
SchemaEntity(label="LOCATION", description="A location or place."),
52+
SchemaEntity(
53+
label="HORCRUX",
54+
description="A magical item in the Harry Potter universe.",
55+
),
56+
]
57+
relations = [
58+
SchemaRelation(
59+
label="SITUATED_AT", description="Indicates the location of a person."
60+
),
61+
SchemaRelation(
62+
label="LED_BY",
63+
description="Indicates the leader of an organization.",
64+
),
65+
SchemaRelation(
66+
label="OWNS",
67+
description="Indicates the ownership of an item such as a Horcrux.",
68+
),
69+
SchemaRelation(
70+
label="INTERACTS", description="The interaction between two people."
71+
),
72+
]
73+
potential_schema = [
74+
("PERSON", "SITUATED_AT", "LOCATION"),
75+
("PERSON", "INTERACTS", "PERSON"),
76+
("PERSON", "OWNS", "HORCRUX"),
77+
("ORGANIZATION", "LED_BY", "PERSON"),
78+
]
79+
80+
# Set up the pipeline
81+
pipe = Pipeline()
82+
pipe.add_component(PdfLoader(), "pdf_loader")
83+
pipe.add_component(
84+
FixedSizeSplitter(chunk_size=4000, chunk_overlap=200), "splitter"
85+
)
86+
pipe.add_component(SchemaBuilder(), "schema")
87+
pipe.add_component(
88+
LLMEntityRelationExtractor(
89+
llm=OpenAILLM(
90+
model_name="gpt-4o",
91+
model_params={
92+
"max_tokens": 2000,
93+
"response_format": {"type": "json_object"},
94+
},
95+
),
96+
on_error=OnError.RAISE,
97+
),
98+
"extractor",
99+
)
100+
pipe.add_component(Neo4jWriter(neo4j_driver), "writer")
101+
pipe.connect("pdf_loader", "splitter", input_config={"text": "pdf_loader.text"})
102+
pipe.connect("splitter", "extractor", input_config={"chunks": "splitter"})
103+
pipe.connect(
104+
"schema",
105+
"extractor",
106+
input_config={
107+
"schema": "schema",
108+
"document_info": "pdf_loader.document_info",
109+
},
110+
)
111+
pipe.connect(
112+
"extractor",
113+
"writer",
114+
input_config={"graph": "extractor"},
115+
)
116+
117+
pipe_inputs = {
118+
"pdf_loader": {
119+
"filepath": "examples/pipeline/Harry Potter and the Death Hallows Summary.pdf"
120+
},
121+
"schema": {
122+
"entities": entities,
123+
"relations": relations,
124+
"potential_schema": potential_schema,
125+
},
126+
}
127+
return await pipe.run(pipe_inputs)
128+
129+
130+
if __name__ == "__main__":
131+
with neo4j.GraphDatabase.driver(
132+
"bolt://localhost:7687", auth=("neo4j", "password")
133+
) as driver:
134+
print(asyncio.run(main(driver)))

poetry.lock

Lines changed: 5 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ pygraphviz = [
4242
{version = "^1.0.0", python = "<3.10", optional = true}
4343
]
4444
google-cloud-aiplatform = {version = "^1.66.0", optional = true}
45+
fsspec = "^2024.9.0"
4546

4647
[tool.poetry.group.dev.dependencies]
4748
pylint = "^3.1.0"

0 commit comments

Comments
 (0)