Skip to content

Commit 7558b56

Browse files
Rename schema from text component
1 parent 8458b75 commit 7558b56

File tree

6 files changed

+60
-55
lines changed

6 files changed

+60
-55
lines changed

docs/source/api.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,10 @@ SchemaBuilder
7777
.. autoclass:: neo4j_graphrag.experimental.components.schema.SchemaBuilder
7878
:members: run
7979

80-
SchemaFromText
81-
=============
80+
SchemaFromTextExtractor
81+
-----------------------
8282

83-
.. autoclass:: neo4j_graphrag.experimental.components.schema.SchemaFromText
83+
.. autoclass:: neo4j_graphrag.experimental.components.schema.SchemaFromTextExtractor
8484
:members: run
8585

8686
EntityRelationExtractor

docs/source/user_guide_kg_builder.rst

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ This schema information can be provided to the `SimpleKGBuilder` as demonstrated
139139
)
140140
141141
.. note::
142-
By default, if no schema is provided to the SimpleKGPipeline, automatic schema extraction will be performed using the LLM (See the :ref:`Automatic Schema Extraction with SchemaFromText` section.
142+
By default, if no schema is provided to the SimpleKGPipeline, automatic schema extraction will be performed using the LLM (See the :ref:`Automatic Schema Extraction with SchemaFromTextExtractor`).
143143

144144
Extra configurations
145145
--------------------
@@ -817,19 +817,18 @@ Here is a code block illustrating these concepts:
817817
After validation, this schema is saved in a `SchemaConfig` object, whose dict representation is passed
818818
to the LLM.
819819

820-
Automatic Schema Extraction with SchemaFromText
821-
----------------------------------------------
822-
.. _automatic-schema-extraction:
820+
Automatic Schema Extraction
821+
---------------------------
823822

824-
Instead of manually defining the schema, you can use the `SchemaFromText` component to automatically extract a schema from your text using an LLM:
823+
Instead of manually defining the schema, you can use the `SchemaFromTextExtractor` component to automatically extract a schema from your text using an LLM:
825824

826825
.. code:: python
827826
828-
from neo4j_graphrag.experimental.components.schema import SchemaFromText
827+
from neo4j_graphrag.experimental.components.schema import SchemaFromTextExtractor
829828
from neo4j_graphrag.llm import OpenAILLM
830829
831830
# Create the automatic schema extractor
832-
schema_extractor = SchemaFromText(
831+
schema_extractor = SchemaFromTextExtractor(
833832
llm=OpenAILLM(
834833
model_name="gpt-4o",
835834
model_params={
@@ -839,14 +838,7 @@ Instead of manually defining the schema, you can use the `SchemaFromText` compon
839838
)
840839
)
841840
842-
# Extract schema from text
843-
schema_config = await schema_extractor.run(text="Your document text here...")
844-
845-
# Use the extracted schema with other components
846-
extractor = LLMEntityRelationExtractor(llm=llm)
847-
result = await extractor.run(chunks=chunks, schema=schema_config)
848-
849-
The `SchemaFromText` component analyzes the text and identifies entity types, relationship types, and their property types. It creates a complete `SchemaConfig` object that can be used in the same way as a manually defined schema.
841+
The `SchemaFromTextExtractor` component analyzes the text and identifies entity types, relationship types, and their property types. It creates a complete `SchemaConfig` object that can be used in the same way as a manually defined schema.
850842

851843
You can also save and reload the extracted schema:
852844

examples/automatic_schema_extraction/schema_from_text.py

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
"""This example demonstrates how to use the SchemaFromText component
1+
"""This example demonstrates how to use the SchemaFromTextExtractor component
22
to automatically extract a schema from text and save it to JSON and YAML files.
33
4-
The SchemaFromText component uses an LLM to analyze the text and identify entities,
4+
The SchemaFromTextExtractor component uses an LLM to analyze the text and identify entities,
55
relations, and their properties.
66
77
Note: This example requires an OpenAI API key to be set in the .env file.
@@ -12,7 +12,10 @@
1212
import os
1313
from dotenv import load_dotenv
1414

15-
from neo4j_graphrag.experimental.components.schema import SchemaFromText, SchemaConfig
15+
from neo4j_graphrag.experimental.components.schema import (
16+
SchemaFromTextExtractor,
17+
SchemaConfig,
18+
)
1619
from neo4j_graphrag.llm import OpenAILLM
1720

1821
# Load environment variables from .env file
@@ -54,77 +57,81 @@
5457
"""
5558

5659
# Define the file paths for saving the schema
57-
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
60+
OUTPUT_DIR = os.path.join(
61+
os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data"
62+
)
5863
JSON_FILE_PATH = os.path.join(OUTPUT_DIR, "extracted_schema.json")
5964
YAML_FILE_PATH = os.path.join(OUTPUT_DIR, "extracted_schema.yaml")
6065

6166

6267
async def extract_and_save_schema() -> SchemaConfig:
6368
"""Extract schema from text and save it to JSON and YAML files."""
64-
69+
6570
# Define LLM parameters
6671
llm_model_params = {
6772
"max_tokens": 2000,
6873
"response_format": {"type": "json_object"},
6974
"temperature": 0, # Lower temperature for more consistent output
7075
}
71-
76+
7277
# Create the LLM instance
7378
llm = OpenAILLM(
7479
model_name="gpt-4o",
7580
model_params=llm_model_params,
7681
)
77-
82+
7883
try:
79-
# Create a SchemaFromText component with the default template
80-
schema_extractor = SchemaFromText(llm=llm)
81-
84+
# Create a SchemaFromTextExtractor component with the default template
85+
schema_extractor = SchemaFromTextExtractor(llm=llm)
86+
8287
print("Extracting schema from text...")
8388
# Extract schema from text
8489
inferred_schema = await schema_extractor.run(text=TEXT)
85-
90+
8691
# Ensure the output directory exists
8792
os.makedirs(OUTPUT_DIR, exist_ok=True)
88-
93+
8994
print(f"Saving schema to JSON file: {JSON_FILE_PATH}")
9095
# Save the schema to JSON file
9196
inferred_schema.store_as_json(JSON_FILE_PATH)
92-
97+
9398
print(f"Saving schema to YAML file: {YAML_FILE_PATH}")
9499
# Save the schema to YAML file
95100
inferred_schema.store_as_yaml(YAML_FILE_PATH)
96-
101+
97102
print("\nExtracted Schema Summary:")
98103
print(f"Entities: {list(inferred_schema.entities.keys())}")
99-
print(f"Relations: {list(inferred_schema.relations.keys() if inferred_schema.relations else [])}")
100-
104+
print(
105+
f"Relations: {list(inferred_schema.relations.keys() if inferred_schema.relations else [])}"
106+
)
107+
101108
if inferred_schema.potential_schema:
102109
print("\nPotential Schema:")
103110
for entity1, relation, entity2 in inferred_schema.potential_schema:
104111
print(f" {entity1} --[{relation}]--> {entity2}")
105-
112+
106113
return inferred_schema
107-
114+
108115
finally:
109116
# Close the LLM client
110117
await llm.async_client.close()
111118

112119

113120
async def main() -> None:
114121
"""Run the example."""
115-
122+
116123
# Extract schema and save to files
117124
schema_config = await extract_and_save_schema()
118-
125+
119126
print(f"\nSchema files have been saved to:")
120127
print(f" - JSON: {JSON_FILE_PATH}")
121128
print(f" - YAML: {YAML_FILE_PATH}")
122-
129+
123130
print("\nExample of how to load the schema from files:")
124131
print(" from neo4j_graphrag.experimental.components.schema import SchemaConfig")
125132
print(f" schema_from_json = SchemaConfig.from_file('{JSON_FILE_PATH}')")
126133
print(f" schema_from_yaml = SchemaConfig.from_file('{YAML_FILE_PATH}')")
127134

128135

129136
if __name__ == "__main__":
130-
asyncio.run(main())
137+
asyncio.run(main())

src/neo4j_graphrag/experimental/components/schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ async def run(
335335
return self.create_schema_model(entities, relations, potential_schema)
336336

337337

338-
class SchemaFromText(Component):
338+
class SchemaFromTextExtractor(Component):
339339
"""
340340
A component for constructing SchemaConfig objects from the output of an LLM after
341341
automatic schema extraction from text.

src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@
3131
)
3232
from neo4j_graphrag.experimental.components.schema import (
3333
SchemaBuilder,
34+
SchemaConfig,
3435
SchemaEntity,
3536
SchemaRelation,
36-
SchemaFromText,
37-
SchemaConfig,
37+
SchemaFromTextExtractor,
3838
)
3939
from neo4j_graphrag.experimental.components.text_splitters.base import TextSplitter
4040
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import (
@@ -164,13 +164,13 @@ def _get_run_params_for_splitter(self) -> dict[str, Any]:
164164
def _get_chunk_embedder(self) -> TextChunkEmbedder:
165165
return TextChunkEmbedder(embedder=self.get_default_embedder())
166166

167-
def _get_schema(self) -> Union[SchemaBuilder, SchemaFromText]:
167+
def _get_schema(self) -> Union[SchemaBuilder, SchemaFromTextExtractor]:
168168
"""
169169
Get the appropriate schema component based on configuration.
170-
Return SchemaFromText for automatic extraction or SchemaBuilder for manual schema.
170+
Return SchemaFromTextExtractor for automatic extraction or SchemaBuilder for manual schema.
171171
"""
172172
if self.auto_schema_extraction and not self.has_user_provided_schema():
173-
return SchemaFromText(llm=self.get_default_llm())
173+
return SchemaFromTextExtractor(llm=self.get_default_llm())
174174
return SchemaBuilder()
175175

176176
def _process_schema_with_precedence(

tests/unit/experimental/components/test_schema.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
SchemaEntity,
2525
SchemaProperty,
2626
SchemaRelation,
27-
SchemaFromText,
27+
SchemaFromTextExtractor,
2828
SchemaConfig,
2929
)
3030
from pydantic import ValidationError
@@ -498,13 +498,15 @@ def invalid_schema_json() -> str:
498498

499499

500500
@pytest.fixture
501-
def schema_from_text(mock_llm: AsyncMock) -> SchemaFromText:
502-
return SchemaFromText(llm=mock_llm)
501+
def schema_from_text(mock_llm: AsyncMock) -> SchemaFromTextExtractor:
502+
return SchemaFromTextExtractor(llm=mock_llm)
503503

504504

505505
@pytest.mark.asyncio
506506
async def test_schema_from_text_run_valid_response(
507-
schema_from_text: SchemaFromText, mock_llm: AsyncMock, valid_schema_json: str
507+
schema_from_text: SchemaFromTextExtractor,
508+
mock_llm: AsyncMock,
509+
valid_schema_json: str,
508510
) -> None:
509511
# configure the mock LLM to return a valid schema JSON
510512
mock_llm.ainvoke.return_value = valid_schema_json
@@ -533,7 +535,9 @@ async def test_schema_from_text_run_valid_response(
533535

534536
@pytest.mark.asyncio
535537
async def test_schema_from_text_run_invalid_json(
536-
schema_from_text: SchemaFromText, mock_llm: AsyncMock, invalid_schema_json: str
538+
schema_from_text: SchemaFromTextExtractor,
539+
mock_llm: AsyncMock,
540+
invalid_schema_json: str,
537541
) -> None:
538542
# configure the mock LLM to return invalid JSON
539543
mock_llm.ainvoke.return_value = invalid_schema_json
@@ -553,8 +557,10 @@ async def test_schema_from_text_custom_template(
553557
custom_prompt = "This is a custom prompt with text: {text}"
554558
custom_template = PromptTemplate(template=custom_prompt, expected_inputs=["text"])
555559

556-
# create SchemaFromText with the custom template
557-
schema_from_text = SchemaFromText(llm=mock_llm, prompt_template=custom_template)
560+
# create SchemaFromTextExtractor with the custom template
561+
schema_from_text = SchemaFromTextExtractor(
562+
llm=mock_llm, prompt_template=custom_template
563+
)
558564

559565
# configure mock LLM to return valid JSON and capture the prompt that was sent to it
560566
mock_llm.ainvoke.return_value = valid_schema_json
@@ -574,8 +580,8 @@ async def test_schema_from_text_llm_params(
574580
# configure custom LLM parameters
575581
llm_params = {"temperature": 0.1, "max_tokens": 500}
576582

577-
# create SchemaFromText with custom LLM parameters
578-
schema_from_text = SchemaFromText(llm=mock_llm, llm_params=llm_params)
583+
# create SchemaFromTextExtractor with custom LLM parameters
584+
schema_from_text = SchemaFromTextExtractor(llm=mock_llm, llm_params=llm_params)
579585

580586
# configure the mock LLM to return a valid schema JSON
581587
mock_llm.ainvoke.return_value = valid_schema_json

0 commit comments

Comments
 (0)