neo4j · martinohanlon · Oct 14, 2025 · Oct 15, 2025 · Oct 16, 2025
@@ -7,6 +7,7 @@
 - Document node is now always created when running SimpleKGPipeline, even if `from_pdf=False`.
 - Document metadata is exposed in SimpleKGPipeline run method.
 - Added automatic rate limiting with retry logic and exponential backoff for all Embedding providers using tenacity. The `RateLimitHandler` interface allows for custom rate limiting strategies, including the ability to disable rate limiting entirely.
+- JSON response returned to `SchemaFromTextExtractor` is cleansed of any markdown code blocks before being loaded.
 
 ### Fixed
 

@@ -15,6 +15,7 @@
 from __future__ import annotations
 
 import json
+import re
 
 import neo4j
 import logging
@@ -554,6 +555,15 @@ def _filter_relationships_without_labels(
             relationship_types, "relationship type"
         )
 
+    def _clean_json_content(self, content: str) -> str:
+        content = content.strip()
+
+        # Remove markdown code block markers if present
+        content = re.sub(r'^```(?:json)?\s*', '', content, flags=re.MULTILINE)
+        content = re.sub(r'```\s*$', '', content, flags=re.MULTILINE)
+
+        return content.strip()
+
     @validate_call
     async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema:
         """
@@ -575,6 +585,9 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema
             # Re-raise the LLMGenerationError
             raise LLMGenerationError("Failed to generate schema from text") from e
 
+        # Clean response
+        content = self._clean_json_content(content)
+
         try:
             extracted_schema: Dict[str, Any] = json.loads(content)
 

@@ -960,6 +960,37 @@ async def test_schema_from_text_filters_relationships_without_labels(
     assert ("Person", "MANAGES", "Organization") in schema.patterns
 
 
+def test_clean_json_content_markdown_with_json_language(
+    schema_from_text: SchemaFromTextExtractor,
+) -> None:
+    content = """```json
+{"node_types": [{"label": "Person"}]}
+```"""
+
+    cleaned = schema_from_text._clean_json_content(content)
+    assert cleaned == '{"node_types": [{"label": "Person"}]}'
+
+
+def test_clean_json_content_markdown_without_language(
+    schema_from_text: SchemaFromTextExtractor,
+) -> None:
+    content = """```
+{"node_types": [{"label": "Person"}]}
+```"""
+
+    cleaned = schema_from_text._clean_json_content(content)
+    assert cleaned == '{"node_types": [{"label": "Person"}]}'
+
+
+def test_clean_json_content_plain_json(
+    schema_from_text: SchemaFromTextExtractor,
+) -> None:
+    content = '{"node_types": [{"label": "Person"}]}'
+
+    cleaned = schema_from_text._clean_json_content(content)
+    assert cleaned == '{"node_types": [{"label": "Person"}]}'
+
+
 @pytest.mark.asyncio
 @patch("neo4j_graphrag.experimental.components.schema.get_structured_schema")
 async def test_schema_from_existing_graph(mock_get_structured_schema: Mock) -> None: