neo4j · stellasia · May 20, 2025 · Apr 28, 2025 · Apr 28, 2025 · May 9, 2025
@@ -4,12 +4,16 @@
 
 ### Added
 
-- Added support for automatic schema extraction from text using LLMs. In the `SimpleKGPipeline`, when the user provides no schema, the automatic schema extraction is enabled by default. 
+- Added support for automatic schema extraction from text using LLMs. In the `SimpleKGPipeline`, when the user provides no schema, the automatic schema extraction is enabled by default.
 
 ### Fixed
 
 - Fixed a bug where `spacy` and `rapidfuzz` needed to be installed even if not using the relevant entity resolvers.
 
+### Changed
+
+- Strict mode in `SimpleKGPipeline`: now properties and relationships are pruned only if they are defined in the input schema.
+
 
 ## 1.7.0
 

@@ -901,6 +901,13 @@ Any relation whose start node or end node does not conform to the provided tuple
 If a relation start/end nodes are valid but the direction is incorrect, the latter will be inverted.
 If a node is left with no properties, it will be also pruned.
 
+.. note::
+
+    If the input schema lacks a certain type of information, pruning is skipped.
+    For example, if an entity is defined only by a label and has no properties,
+    property pruning is not performed and all properties returned by the LLM are kept.
+
+
 .. warning::
 
     Note that if the schema enforcement mode is on but the schema is not provided, no schema enforcement will be applied.

@@ -403,8 +403,13 @@ def _enforce_nodes(
             schema_entity = schema.entities.get(node.label)
             if not schema_entity:
                 continue
-            allowed_props = schema_entity.get("properties", [])
-            filtered_props = self._enforce_properties(node.properties, allowed_props)
+            allowed_props = schema_entity.get("properties")
+            if allowed_props:
+                filtered_props = self._enforce_properties(
+                    node.properties, allowed_props
+                )
+            else:
+                filtered_props = node.properties
             if filtered_props:
                 valid_nodes.append(
                     Neo4jNode(
@@ -434,16 +439,17 @@ def _enforce_relationships(
         if self.enforce_schema != SchemaEnforcementMode.STRICT:
             return extracted_relationships
 
+        if schema.relations is None:
+            return extracted_relationships
+
         valid_rels = []
 
         valid_nodes = {node.id: node.label for node in filtered_nodes}
 
         potential_schema = schema.potential_schema
 
         for rel in extracted_relationships:
-            schema_relation = (
-                schema.relations.get(rel.type) if schema.relations else None
-            )
+            schema_relation = schema.relations.get(rel.type)
             if not schema_relation:
                 continue
 
@@ -468,8 +474,11 @@ def _enforce_relationships(
                 if not tuple_valid and not reverse_tuple_valid:
                     continue
 
-            allowed_props = schema_relation.get("properties", [])
-            filtered_props = self._enforce_properties(rel.properties, allowed_props)
+            allowed_props = schema_relation.get("properties")
+            if allowed_props:
+                filtered_props = self._enforce_properties(rel.properties, allowed_props)
+            else:
+                filtered_props = rel.properties
 
             valid_rels.append(
                 Neo4jRelationship(

@@ -109,7 +109,7 @@ class SchemaConfig(DataModel):
     @model_validator(mode="before")
     def check_schema(cls, data: Dict[str, Any]) -> Dict[str, Any]:
         entities = data.get("entities", {}).keys()
-        relations = data.get("relations", {}).keys()
+        relations = (data.get("relations") or {}).keys()
         potential_schema = data.get("potential_schema", [])
 
         if potential_schema:

@@ -374,7 +374,7 @@ async def test_extractor_schema_enforcement_valid_nodes_with_empty_props() -> No
 
     result: Neo4jGraph = await extractor.run(chunks, schema=schema)
 
-    assert len(result.nodes) == 0
+    assert len(result.nodes) == 1
 
 
 @pytest.mark.asyncio
@@ -564,6 +564,74 @@ async def test_extractor_schema_enforcement_inverted_relation_direction() -> Non
     assert result.relationships[0].end_node_id.split(":")[1] == "2"
 
 
+@pytest.mark.asyncio
+async def test_extractor_schema_enforcement_none_relationships_in_schema() -> None:
+    llm = MagicMock(spec=LLMInterface)
+    llm.ainvoke.return_value = LLMResponse(
+        content='{"nodes":[{"id":"1","label":"Person","properties":'
+        '{"name":"Alice"}},{"id":"2","label":"Person","properties":'
+        '{"name":"Bob"}}],'
+        '"relationships":[{"start_node_id":"1","end_node_id":"2",'
+        '"type":"FRIENDS_WITH","properties":{}}]}'
+    )
+
+    extractor = LLMEntityRelationExtractor(
+        llm=llm, create_lexical_graph=False, enforce_schema=SchemaEnforcementMode.STRICT
+    )
+
+    schema = SchemaConfig(
+        entities={
+            "Person": {
+                "label": "Person",
+                "properties": [{"name": "name", "type": "STRING"}],
+            }
+        },
+        relations=None,
+        potential_schema=None,
+    )
+
+    chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)])
+
+    result: Neo4jGraph = await extractor.run(chunks, schema=schema)
+
+    assert len(result.nodes) == 2
+    assert len(result.relationships) == 1
+    assert result.relationships[0].type == "FRIENDS_WITH"
+
+
+@pytest.mark.asyncio
+async def test_extractor_schema_enforcement_empty_relationships_in_schema() -> None:
+    llm = MagicMock(spec=LLMInterface)
+    llm.ainvoke.return_value = LLMResponse(
+        content='{"nodes":[{"id":"1","label":"Person","properties":'
+        '{"name":"Alice"}},{"id":"2","label":"Person","properties":'
+        '{"name":"Bob"}}],'
+        '"relationships":[{"start_node_id":"1","end_node_id":"2",'
+        '"type":"FRIENDS_WITH","properties":{}}]}'
+    )
+
+    extractor = LLMEntityRelationExtractor(
+        llm=llm, create_lexical_graph=False, enforce_schema=SchemaEnforcementMode.STRICT
+    )
+
+    schema = SchemaConfig(
+        entities={
+            "Person": {
+                "label": "Person",
+                "properties": [{"name": "name", "type": "STRING"}],
+            }
+        },
+        relations={},
+        potential_schema=None,
+    )
+
+    chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)])
+
+    result: Neo4jGraph = await extractor.run(chunks, schema=schema)
+
+    assert len(result.relationships) == 0
+
+
 def test_fix_invalid_json_empty_result() -> None:
     json_string = "invalid json"