neo4j · stellasia · Jul 1, 2025 · Jul 1, 2025 · Jul 1, 2025 · NathalieCharbel
@@ -131,8 +131,19 @@ This schema information can be provided to the `SimpleKGBuilder` as demonstrated
         # ...
     )
 
-.. note::
-   By default, if no schema is provided to the SimpleKGPipeline, automatic schema extraction will be performed using the LLM (See the :ref:`Automatic Schema Extraction`).
+
+Schema Parameter Behavior
+-------------------------
+
+The `schema` parameter controls how entity and relation extraction is performed:
+
+* **AUTO_EXTRACTION**: ``schema="AUTO_EXTRACTION"`` or (``schema=None``)
+  The schema is automatically extracted from the input text once. This guiding schema is then used to structure entity and relation extraction for all chunks. This guarantees all chunks have the same guiding schema.
+  (See :ref:`Automatic Schema Extraction`)
+
+* **NO_EXTRACTION**: ``schema="NO_EXTRACTION"`` or empty schema (``{"node_types": ()}``)
+  No schema extraction is performed. Entity and relation extraction proceed without a predefined or derived schema, resulting in unguided extraction.
+
 
 Extra configurations
 --------------------

@@ -226,6 +226,10 @@ def node_type_from_label(self, label: str) -> Optional[NodeType]:
     def relationship_type_from_label(self, label: str) -> Optional[RelationshipType]:
         return self._relationship_type_index.get(label)
 
+    @classmethod
+    def create_empty(cls) -> Self:
+        return cls(node_types=tuple())
+
     def save(
         self,
         file_path: Union[str, Path],

@@ -22,10 +22,9 @@
     Sequence,
     Union,
 )
-import logging
 import warnings
 
-from pydantic import ConfigDict, Field, model_validator
+from pydantic import ConfigDict, Field, model_validator, field_validator
 from typing_extensions import Self
 
 from neo4j_graphrag.experimental.components.embedder import TextChunkEmbedder
@@ -66,8 +65,6 @@
 )
 from neo4j_graphrag.generation.prompts import ERExtractionTemplate
 
-logger = logging.getLogger(__name__)
-
 
 class SimpleKGPipelineConfig(TemplatePipelineConfig):
     COMPONENTS: ClassVar[list[str]] = [
@@ -102,6 +99,15 @@ class SimpleKGPipelineConfig(TemplatePipelineConfig):
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
+    @field_validator("schema_", mode="before")
+    @classmethod
+    def validate_schema_literal(cls, v: Any) -> Any:
+        if v == "NO_EXTRACTION":  # same as "empty" schema
+            return GraphSchema.create_empty()
+        if v == "AUTO_EXTRACTION":  # same as no schema
+            return None
+        return v
+
     @model_validator(mode="after")
     def handle_schema_precedence(self) -> Self:
         """Handle schema precedence and warnings"""

@@ -15,7 +15,7 @@
 
 from __future__ import annotations
 
-from typing import List, Optional, Sequence, Union, Any
+from typing import List, Optional, Sequence, Union, Any, Literal
 import logging
 
 import neo4j
@@ -99,7 +99,13 @@ def __init__(
         entities: Optional[Sequence[EntityInputType]] = None,
         relations: Optional[Sequence[RelationInputType]] = None,
         potential_schema: Optional[List[tuple[str, str, str]]] = None,
-        schema: Optional[Union[GraphSchema, dict[str, list[Any]]]] = None,
+        schema: Optional[
+            Union[
+                GraphSchema,
+                dict[str, list[Any]],
+                Literal["NO_EXTRACTION", "AUTO_EXTRACTION"],
+            ],
+        ] = None,
         from_pdf: bool = True,
         text_splitter: Optional[TextSplitter] = None,
         pdf_loader: Optional[DataLoader] = None,

@@ -138,6 +138,14 @@ def test_simple_kg_pipeline_config_manual_schema() -> None:
     assert isinstance(config._get_schema(), SchemaBuilder)
 
 
+def test_simple_kg_pipeline_config_literal_schema_validation() -> None:
+    config = SimpleKGPipelineConfig(schema="NO_EXTRACTION")  # type: ignore
+    assert config.schema_ == GraphSchema.create_empty()
+
+    config = SimpleKGPipelineConfig(schema="AUTO_EXTRACTION")  # type: ignore
+    assert config.schema_ is None
+
+
 def test_simple_kg_pipeline_config_schema_run_params() -> None:
     config = SimpleKGPipelineConfig(
         entities=["Person"],