Fixes

stellasia · stellasia · commit 523df1534e55 · 2025-06-12T15:21:47.000+02:00
diff --git a/src/neo4j_graphrag/experimental/components/graph_pruning.py b/src/neo4j_graphrag/experimental/components/graph_pruning.py
@@ -28,6 +28,7 @@
     Neo4jGraph,
     Neo4jNode,
     Neo4jRelationship,
+    LexicalGraphConfig,
 )
 from neo4j_graphrag.experimental.pipeline import Component, DataModel
 
@@ -135,9 +136,14 @@ async def run(
         self,
         graph: Neo4jGraph,
         schema: Optional[GraphSchema] = None,
+        lexical_graph_config: Optional[LexicalGraphConfig] = None,
     ) -> GraphPruningResult:
+        if lexical_graph_config is None:
+            lexical_graph_config = LexicalGraphConfig()
         if schema is not None:
-            new_graph, pruning_stats = self._clean_graph(graph, schema)
+            new_graph, pruning_stats = self._clean_graph(
+                graph, schema, lexical_graph_config
+            )
         else:
             new_graph = graph
             pruning_stats = PruningStats()
@@ -150,6 +156,7 @@ def _clean_graph(
         self,
         graph: Neo4jGraph,
         schema: GraphSchema,
+        lexical_graph_config: LexicalGraphConfig,
     ) -> tuple[Neo4jGraph, PruningStats]:
         """
         Verify that the graph conforms to the provided schema.
@@ -162,6 +169,7 @@ def _clean_graph(
         filtered_nodes = self._enforce_nodes(
             graph.nodes,
             schema,
+            lexical_graph_config,
             pruning_stats,
         )
         if not filtered_nodes:
@@ -174,6 +182,7 @@ def _clean_graph(
             graph.relationships,
             filtered_nodes,
             schema,
+            lexical_graph_config,
             pruning_stats,
         )
 
@@ -216,6 +225,7 @@ def _enforce_nodes(
         self,
         extracted_nodes: list[Neo4jNode],
         schema: GraphSchema,
+        lexical_graph_config: LexicalGraphConfig,
         pruning_stats: PruningStats,
     ) -> list[Neo4jNode]:
         """
@@ -228,6 +238,9 @@ def _enforce_nodes(
         """
         valid_nodes = []
         for node in extracted_nodes:
+            if node.label in lexical_graph_config.lexical_graph_node_labels:
+                valid_nodes.append(node)
+                continue
             schema_entity = schema.node_type_from_label(node.label)
             new_node = self._validate_node(
                 node,
@@ -319,6 +332,7 @@ def _enforce_relationships(
         extracted_relationships: list[Neo4jRelationship],
         filtered_nodes: list[Neo4jNode],
         schema: GraphSchema,
+        lexical_graph_config: LexicalGraphConfig,
         pruning_stats: PruningStats,
     ) -> list[Neo4jRelationship]:
         """
@@ -334,6 +348,9 @@ def _enforce_relationships(
         valid_rels = []
         valid_nodes = {node.id: node.label for node in filtered_nodes}
         for rel in extracted_relationships:
+            if rel.type in lexical_graph_config.lexical_graph_relationship_types:
+                valid_rels.append(rel)
+                continue
             schema_relation = schema.relationship_type_from_label(rel.type)
             new_rel = self._validate_relationship(
                 rel,
diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py
@@ -334,6 +334,7 @@ def create_schema_model(
         node_types: Sequence[NodeType],
         relationship_types: Optional[Sequence[RelationshipType]] = None,
         patterns: Optional[Sequence[Tuple[str, str, str]]] = None,
+        **kwargs: Any,
     ) -> GraphSchema:
         """
         Creates a GraphSchema object from Lists of Entity and Relation objects
@@ -343,6 +344,7 @@ def create_schema_model(
             node_types (Sequence[NodeType]): List or tuple of NodeType objects.
             relationship_types (Optional[Sequence[RelationshipType]]): List or tuple of RelationshipType objects.
             patterns (Optional[Sequence[Tuple[str, str, str]]]): List or tuples of triplets: (source_entity_label, relation_label, target_entity_label).
+            kwargs: other arguments passed to GraphSchema validator.
 
         Returns:
             GraphSchema: A configured schema object.
@@ -353,17 +355,19 @@ def create_schema_model(
                     node_types=node_types,
                     relationship_types=relationship_types or (),
                     patterns=patterns or (),
+                    **kwargs,
                 )
             )
-        except (ValidationError, SchemaValidationError) as e:
-            raise SchemaValidationError(e) from e
+        except ValidationError as e:
+            raise SchemaValidationError() from e
 
     @validate_call
     async def run(
         self,
         node_types: Sequence[NodeType],
         relationship_types: Optional[Sequence[RelationshipType]] = None,
         patterns: Optional[Sequence[Tuple[str, str, str]]] = None,
+        **kwargs: Any,
     ) -> GraphSchema:
         """
         Asynchronously constructs and returns a GraphSchema object.
@@ -376,7 +380,12 @@ async def run(
         Returns:
             GraphSchema: A configured schema object, constructed asynchronously.
         """
-        return self.create_schema_model(node_types, relationship_types, patterns)
+        return self.create_schema_model(
+            node_types,
+            relationship_types,
+            patterns,
+            **kwargs,
+        )
 
 
 class SchemaFromTextExtractor(Component):
diff --git a/src/neo4j_graphrag/experimental/components/types.py b/src/neo4j_graphrag/experimental/components/types.py
@@ -174,6 +174,14 @@ class LexicalGraphConfig(BaseModel):
     def lexical_graph_node_labels(self) -> tuple[str, ...]:
         return self.document_node_label, self.chunk_node_label
 
+    @property
+    def lexical_graph_relationship_types(self) -> tuple[str, ...]:
+        return (
+            self.chunk_to_document_relationship_type,
+            self.next_chunk_relationship_type,
+            self.node_to_chunk_relationship_type,
+        )
+
 
 class GraphResult(DataModel):
     graph: Neo4jGraph
diff --git a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
@@ -184,66 +184,33 @@ def _get_schema(self) -> Union[SchemaBuilder, SchemaFromTextExtractor]:
             return SchemaFromTextExtractor(llm=self.get_default_llm())
         return SchemaBuilder()
 
-    def _process_schema_with_precedence(
-        self,
-    ) -> Tuple[
-        Tuple[NodeType, ...],
-        Tuple[RelationshipType, ...] | None,
-        Optional[Tuple[Tuple[str, str, str], ...]] | None,
-    ]:
+    def _process_schema_with_precedence(self) -> GraphSchema:
         """
         Process schema inputs according to precedence rules:
         1. If schema is provided as GraphSchema object, use it
         2. If schema is provided as dictionary, extract from it
         3. Otherwise, use individual schema components
 
         Returns:
-            Tuple of (node_types, relationship_types, patterns)
+            A GraphSchema object
         """
         if self.schema_ is not None:
-            # schema takes precedence over individual components
-            node_types = self.schema_.node_types
+            return self.schema_
 
-            # handle case where relations could be None
-            if self.schema_.relationship_types is not None:
-                relationship_types = self.schema_.relationship_types
-            else:
-                relationship_types = None
-
-            patterns = self.schema_.patterns
-        else:
-            # use individual components
-            node_types = tuple(
-                [NodeType.model_validate(e) for e in self.entities]
-                if self.entities
-                else []
-            )
-            relationship_types = (
-                tuple([RelationshipType.model_validate(r) for r in self.relations])
-                if self.relations is not None
-                else None
-            )
-            patterns = (
-                tuple(self.potential_schema) if self.potential_schema else tuple()
-            )
-
-        return node_types, relationship_types, patterns
+        return GraphSchema(
+            node_types=tuple(self.entities) if self.entities else tuple(),
+            relationship_types=tuple(self.relations) if self.relations else tuple(),
+            patterns=tuple(self.potential_schema) if self.potential_schema else tuple(),
+        )
 
     def _get_run_params_for_schema(self) -> dict[str, Any]:
         if not self.has_user_provided_schema():
             # for automatic extraction, the text parameter is needed (will flow through the pipeline connections)
             return {}
         else:
             # process schema components according to precedence rules
-            node_types, relationship_types, patterns = (
-                self._process_schema_with_precedence()
-            )
-
-            return {
-                "node_types": node_types,
-                "relationship_types": relationship_types,
-                "patterns": patterns,
-            }
+            schema = self._process_schema_with_precedence()
+            return schema.model_dump()
 
     def _get_extractor(self) -> EntityRelationExtractor:
         return LLMEntityRelationExtractor(
@@ -368,7 +335,13 @@ def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
         run_params = {}
         if self.lexical_graph_config:
             run_params["extractor"] = {
-                "lexical_graph_config": self.lexical_graph_config
+                "lexical_graph_config": self.lexical_graph_config,
+            }
+            run_params["writer"] = {
+                "lexical_graph_config": self.lexical_graph_config,
+            }
+            run_params["pruner"] = {
+                "lexical_graph_config": self.lexical_graph_config,
             }
         text = user_input.get("text")
         file_path = user_input.get("file_path")