neo4j · stellasia · May 21, 2025 · May 13, 2025 · May 16, 2025 · May 16, 2025
@@ -12,8 +12,17 @@
 
 ### Changed
 
+#### Strict mode
+
 - Strict mode in `SimpleKGPipeline`: now properties and relationships are pruned only if they are defined in the input schema.
 
+#### Schema definition
+
+- The `SchemaEntity` model has been renamed `NodeType`.
+- The `SchemaRelation` model has been renamed `RelationshipType`.
+- The `SchemaProperty` model has been renamed `PropertyType`.
+- `SchemaConfig` has been removed in favor of `GraphSchema` (used in the `SchemaBuilder` and `EntityRelationExtractor` classes). `entities`, `relations` and `potential_schema` fields have also been renamed `node_types`, `relationship_types` and `patterns` respectively.
+
 
 ## 1.7.0
 

@@ -102,9 +102,9 @@ NEO4J_PASSWORD = "password"
 driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
 
 # List the entities and relations the LLM should look for in the text
-entities = ["Person", "House", "Planet"]
-relations = ["PARENT_OF", "HEIR_OF", "RULES"]
-potential_schema = [
+node_types = ["Person", "House", "Planet"]
+relationship_types = ["PARENT_OF", "HEIR_OF", "RULES"]
+patterns = [
     ("Person", "PARENT_OF", "Person"),
     ("Person", "HEIR_OF", "House"),
     ("House", "RULES", "Planet"),
@@ -128,8 +128,11 @@ kg_builder = SimpleKGPipeline(
     llm=llm,
     driver=driver,
     embedder=embedder,
-    entities=entities,
-    relations=relations,
+    schema={
+        "node_types": node_types,
+        "relationship_types": relationship_types,
+        "patterns": patterns,
+    },
     on_error="IGNORE",
     from_pdf=False,
 )
@@ -365,7 +368,7 @@ When you're finished with your changes, create a pull request (PR) using the fol
 
 ## 🧪 Tests
 
-To be able to run all tests, all extra packages needs to be installed.  
+To be able to run all tests, all extra packages needs to be installed.
 This is achieved by:
 
 ```bash

@@ -75,25 +75,25 @@ KGWriterModel
 
 .. autoclass:: neo4j_graphrag.experimental.components.kg_writer.KGWriterModel
 
-SchemaProperty
-==============
+PropertyType
+============
 
-.. autoclass:: neo4j_graphrag.experimental.components.schema.SchemaProperty
+.. autoclass:: neo4j_graphrag.experimental.components.schema.PropertyType
 
-SchemaEntity
-============
+NodeType
+========
 
-.. autoclass:: neo4j_graphrag.experimental.components.schema.SchemaEntity
+.. autoclass:: neo4j_graphrag.experimental.components.schema.NodeType
 
-SchemaRelation
-==============
+RelationshipType
+================
 
-.. autoclass:: neo4j_graphrag.experimental.components.schema.SchemaRelation
+.. autoclass:: neo4j_graphrag.experimental.components.schema.RelationshipType
 
-SchemaConfig
-============
+GraphSchema
+===========
 
-.. autoclass:: neo4j_graphrag.experimental.components.schema.SchemaConfig
+.. autoclass:: neo4j_graphrag.experimental.components.schema.GraphSchema
 
 LexicalGraphConfig
 ===================

@@ -21,7 +21,7 @@ A Knowledge Graph (KG) construction pipeline requires a few components (some of
 - **Data loader**: extract text from files (PDFs, ...).
 - **Text splitter**: split the text into smaller pieces of text (chunks), manageable by the LLM context window (token limit).
 - **Chunk embedder** (optional): compute the chunk embeddings.
-- **Schema builder**: provide a schema to ground the LLM extracted entities and relations and obtain an easily navigable KG. Schema can be provided manually or extracted automatically using LLMs.
+- **Schema builder**: provide a schema to ground the LLM extracted node and relationship types and obtain an easily navigable KG. Schema can be provided manually or extracted automatically using LLMs.
 - **Lexical graph builder**: build the lexical graph (Document, Chunk and their relationships) (optional).
 - **Entity and relation extractor**: extract relevant entities and relations from the text.
 - **Knowledge Graph writer**: save the identified entities and relations.
@@ -73,18 +73,18 @@ Customizing the SimpleKGPipeline
 Graph Schema
 ------------
 
-It is possible to guide the LLM by supplying a list of entities, relationships,
-and instructions on how to connect them. However, note that the extracted graph
-may not fully adhere to these guidelines unless schema enforcement is enabled 
-(see :ref:`Schema Enforcement Behaviour`). Entities and relationships can be represented
+It is possible to guide the LLM by supplying a list of node and relationship types,
+and instructions on how to connect them (patterns). However, note that the extracted graph
+may not fully adhere to these guidelines unless schema enforcement is enabled
+(see :ref:`Schema Enforcement Behaviour`). Node and relationship types can be represented
 as either simple strings (for their labels) or dictionaries. If using a dictionary,
 it must include a label key and can optionally include description and properties keys,
 as shown below:
 
 .. code:: python
 
-    ENTITIES = [
-        # entities can be defined with a simple label...
+    NODE_TYPES = [
+        # node types can be defined with a simple label...
         "Person",
         # ... or with a dict if more details are needed,
         # such as a description:
@@ -93,7 +93,7 @@ as shown below:
         {"label": "Planet", "properties": [{"name": "weather", "type": "STRING"}]},
     ]
     # same thing for relationships:
-    RELATIONS = [
+    RELATIONSHIP_TYPES = [
         "PARENT_OF",
         {
             "label": "HEIR_OF",
@@ -102,13 +102,13 @@ as shown below:
         {"label": "RULES", "properties": [{"name": "fromYear", "type": "INTEGER"}]},
     ]
 
-The `potential_schema` is defined by a list of triplet in the format:
+The `patterns` are defined by a list of triplet in the format:
 `(source_node_label, relationship_label, target_node_label)`. For instance:
 
 
 .. code:: python
 
-    POTENTIAL_SCHEMA = [
+    PATTERNS = [
         ("Person", "PARENT_OF", "Person"),
         ("Person", "HEIR_OF", "House"),
         ("House", "RULES", "Planet"),
@@ -122,15 +122,15 @@ This schema information can be provided to the `SimpleKGBuilder` as demonstrated
     kg_builder = SimpleKGPipeline(
         # ...
         schema={
-            "entities": ENTITIES,
-            "relations": RELATIONS,
-            "potential_schema": POTENTIAL_SCHEMA
+            "node_types": NODE_TYPES,
+            "relationship_types": RELATIONSHIP_TYPES,
+            "patterns": PATTERNS
         },
         # ...
     )
 
 .. note::
-   By default, if no schema is provided to the SimpleKGPipeline, automatic schema extraction will be performed using the LLM (See the :ref:`Automatic Schema Extraction with SchemaFromTextExtractor`).
+   By default, if no schema is provided to the SimpleKGPipeline, automatic schema extraction will be performed using the LLM (See the :ref:`Automatic Schema Extraction`).
 
 Extra configurations
 --------------------
@@ -419,9 +419,8 @@ within the configuration file.
         "neo4j_database": "myDb",
         "on_error": "IGNORE",
         "prompt_template": "...",
-
         "schema": {
-            "entities": [
+            "node_types": [
                 "Person",
                 {
                     "label": "House",
@@ -438,7 +437,7 @@ within the configuration file.
                     ]
                 }
             ],
-            "relations": [
+            "relationship_types": [
                 "PARENT_OF",
                 {
                     "label": "HEIR_OF",
@@ -451,7 +450,7 @@ within the configuration file.
                     ]
                 }
             ],
-            "potential_schema": [
+            "patterns": [
                 ["Person", "PARENT_OF", "Person"],
                 ["Person", "HEIR_OF", "House"],
                 ["House", "RULES", "Planet"]
@@ -473,7 +472,7 @@ or in YAML:
     on_error: IGNORE
     prompt_template: ...
     schema:
-      entities:
+      node_types:
         - Person
         - label: House
           description: Family the person belongs to
@@ -486,15 +485,15 @@ or in YAML:
               type: STRING
             - name: weather
               type: STRING
-      relations:
+      relationship_types:
         - PARENT_OF
         - label: HEIR_OF
           description: Used for inheritor relationship between father and sons
         - label: RULES
           properties:
             - name: fromYear
               type: INTEGER
-      potential_schema:
+      patterns:
         - ["Person", "PARENT_OF", "Person"]
         - ["Person", "HEIR_OF", "House"]
         - ["House", "RULES", "Planet"]
@@ -747,62 +746,62 @@ Optionally, the document and chunk node labels can be configured using a `Lexica
 Schema Builder
 ==============
 
-The schema is used to try and ground the LLM to a list of possible entities and relations of interest.
+The schema is used to try and ground the LLM to a list of possible node and relationship types of interest.
 So far, schema must be manually created by specifying:
 
-- **Entities** the LLM should look for in the text, including their properties (name and type).
-- **Relations** of interest between these entities, including the relation properties (name and type).
-- **Triplets** to define the start (source) and end (target) entity types for each relation.
+- **Node types** the LLM should look for in the text, including their properties (name and type).
+- **Relationship types** of interest between these node types, including the relationship properties (name and type).
+- **Patterns** (triplets) to define the start (source) and end (target) entity types for each relationship.
 
 Here is a code block illustrating these concepts:
 
 .. code:: python
 
     from neo4j_graphrag.experimental.components.schema import (
         SchemaBuilder,
-        SchemaEntity,
-        SchemaProperty,
-        SchemaRelation,
+        NodeType,
+        PropertyType,
+        RelationshipType,
     )
 
     schema_builder = SchemaBuilder()
 
     await schema_builder.run(
-        entities=[
-            SchemaEntity(
+        node_types=[
+            NodeType(
                 label="Person",
                 properties=[
                     SchemaProperty(name="name", type="STRING"),
                     SchemaProperty(name="place_of_birth", type="STRING"),
                     SchemaProperty(name="date_of_birth", type="DATE"),
                 ],
             ),
-            SchemaEntity(
+            NodeType(
                 label="Organization",
                 properties=[
                     SchemaProperty(name="name", type="STRING"),
                     SchemaProperty(name="country", type="STRING"),
                 ],
             ),
         ],
-        relations=[
-            SchemaRelation(
+        relationship_types=[
+            RelationshipType(
                 label="WORKED_ON",
             ),
-            SchemaRelation(
+            RelationshipType(
                 label="WORKED_FOR",
             ),
         ],
-        possible_schema=[
+        patterns=[
             ("Person", "WORKED_ON", "Field"),
             ("Person", "WORKED_FOR", "Organization"),
         ],
     )
 
-After validation, this schema is saved in a `SchemaConfig` object, whose dict representation is passed
+After validation, this schema is saved in a `GraphSchema` object, whose dict representation is passed
 to the LLM.
 
-Automatic Schema Extraction 
+Automatic Schema Extraction
 ---------------------------
 
 Instead of manually defining the schema, you can use the `SchemaFromTextExtractor` component to automatically extract a schema from your text using an LLM:
@@ -826,19 +825,19 @@ Instead of manually defining the schema, you can use the `SchemaFromTextExtracto
     # Extract the schema from the text
     extracted_schema = await schema_extractor.run(text="Some text")
 
-The `SchemaFromTextExtractor` component analyzes the text and identifies entity types, relationship types, and their property types. It creates a complete `SchemaConfig` object that can be used in the same way as a manually defined schema.
+The `SchemaFromTextExtractor` component analyzes the text and identifies entity types, relationship types, and their property types. It creates a complete `GraphSchema` object that can be used in the same way as a manually defined schema.
 
 You can also save and reload the extracted schema:
 
 .. code:: python
 
     # Save the schema to JSON or YAML files
-    schema_config.store_as_json("my_schema.json")
-    schema_config.store_as_yaml("my_schema.yaml")
-    
+    extracted_schema.store_as_json("my_schema.json")
+    extracted_schema.store_as_yaml("my_schema.yaml")
+
     # Later, reload the schema from file
-    from neo4j_graphrag.experimental.components.schema import SchemaConfig
-    restored_schema = SchemaConfig.from_file("my_schema.json")  # or my_schema.yaml
+    from neo4j_graphrag.experimental.components.schema import GraphSchema
+    restored_schema = GraphSchema.from_file("my_schema.json")  # or my_schema.yaml
 
 
 Entity and Relation Extractor
@@ -993,7 +992,6 @@ If more customization is needed, it is possible to subclass the `EntityRelationE
 
     from pydantic import validate_call
     from neo4j_graphrag.experimental.components.entity_relation_extractor import EntityRelationExtractor
-    from neo4j_graphrag.experimental.components.schema import SchemaConfig
     from neo4j_graphrag.experimental.components.types import (
         Neo4jGraph,
         Neo4jNode,

@@ -27,11 +27,11 @@
 file_path = root_dir / "data" / "Harry Potter and the Chamber of Secrets Summary.pdf"
 
 
-# Instantiate Entity and Relation objects. This defines the
+# Instantiate NodeType and RelationshipType objects. This defines the
 # entities and relations the LLM will be looking for in the text.
-ENTITIES = ["Person", "Organization", "Location"]
-RELATIONS = ["SITUATED_AT", "INTERACTS", "LED_BY"]
-POTENTIAL_SCHEMA = [
+NODE_TYPES = ["Person", "Organization", "Location"]
+RELATIONSHIP_TYPES = ["SITUATED_AT", "INTERACTS", "LED_BY"]
+PATTERNS = [
     ("Person", "SITUATED_AT", "Location"),
     ("Person", "INTERACTS", "Person"),
     ("Organization", "LED_BY", "Person"),
@@ -47,9 +47,11 @@ async def define_and_run_pipeline(
         llm=llm,
         driver=neo4j_driver,
         embedder=OpenAIEmbeddings(),
-        entities=ENTITIES,
-        relations=RELATIONS,
-        potential_schema=POTENTIAL_SCHEMA,
+        schema={
+            "node_types": NODE_TYPES,
+            "relationship_types": RELATIONSHIP_TYPES,
+            "patterns": PATTERNS,
+        },
         neo4j_database=DATABASE,
     )
     return await kg_builder.run_async(file_path=str(file_path))