diff --git a/CHANGELOG.md b/CHANGELOG.md index 09d264afc..88ee296d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ ## Next ### Added + +- Added optional schema enforcement as a validation layer after entity and relation extraction. - Introduced SearchQueryParseError for handling invalid Lucene query strings in HybridRetriever and HybridCypherRetriever. ## 1.5.0 diff --git a/docs/source/user_guide_kg_builder.rst b/docs/source/user_guide_kg_builder.rst index edc89f609..20a7db63f 100644 --- a/docs/source/user_guide_kg_builder.rst +++ b/docs/source/user_guide_kg_builder.rst @@ -125,8 +125,8 @@ This schema information can be provided to the `SimpleKGBuilder` as demonstrated # ... ) -Prompt Template, Lexical Graph Config and Error Behavior --------------------------------------------------------- +Extra configurations +-------------------- These parameters are part of the `EntityAndRelationExtractor` component. For detailed information, refer to the section on :ref:`Entity and Relation Extractor`. @@ -138,6 +138,7 @@ They are also accessible via the `SimpleKGPipeline` interface. # ... prompt_template="", lexical_graph_config=my_config, + enforce_schema="STRICT" on_error="RAISE", # ... ) @@ -829,6 +830,30 @@ It can be used in this way: The LLM to use can be customized, the only constraint is that it obeys the :ref:`LLMInterface `. +Schema Enforcement Behaviour +---------------------------- +By default, even if a schema is provided to guide the LLM in the entity and relation extraction, the LLM response is not validated against that schema. +This behaviour can be changed by using the `enforce_schema` flag in the `LLMEntityRelationExtractor` constructor: + +.. code:: python + + from neo4j_graphrag.experimental.components.entity_relation_extractor import LLMEntityRelationExtractor + from neo4j_graphrag.experimental.components.types import SchemaEnforcementMode + + extractor = LLMEntityRelationExtractor( + # ... + enforce_schema=SchemaEnforcementMode.STRICT, + ) + +In this scenario, any extracted node/relation/property that is not part of the provided schema will be pruned. +Any relation whose start node or end node does not conform to the provided tuple in `potential_schema` will be pruned. +If a relation start/end nodes are valid but the direction is incorrect, the latter will be inverted. +If a node is left with no properties, it will be also pruned. + +.. warning:: + + Note that if the schema enforcement mode is on but the schema is not provided, no schema enforcement will be applied. + Error Behaviour --------------- diff --git a/src/neo4j_graphrag/experimental/components/entity_relation_extractor.py b/src/neo4j_graphrag/experimental/components/entity_relation_extractor.py index 1d6861232..2137cfa27 100644 --- a/src/neo4j_graphrag/experimental/components/entity_relation_extractor.py +++ b/src/neo4j_graphrag/experimental/components/entity_relation_extractor.py @@ -19,7 +19,7 @@ import enum import json import logging -from typing import Any, List, Optional, Union, cast +from typing import Any, List, Optional, Union, cast, Dict import json_repair from pydantic import ValidationError, validate_call @@ -31,8 +31,11 @@ DocumentInfo, LexicalGraphConfig, Neo4jGraph, + Neo4jNode, + Neo4jRelationship, TextChunk, TextChunks, + SchemaEnforcementMode, ) from neo4j_graphrag.experimental.pipeline.component import Component from neo4j_graphrag.experimental.pipeline.exceptions import InvalidJSONError @@ -168,6 +171,7 @@ class LLMEntityRelationExtractor(EntityRelationExtractor): llm (LLMInterface): The language model to use for extraction. prompt_template (ERExtractionTemplate | str): A custom prompt template to use for extraction. create_lexical_graph (bool): Whether to include the text chunks in the graph in addition to the extracted entities and relations. Defaults to True. + enforce_schema (SchemaEnforcementMode): Whether to validate or not the extracted entities/rels against the provided schema. Defaults to None. on_error (OnError): What to do when an error occurs during extraction. Defaults to raising an error. max_concurrency (int): The maximum number of concurrent tasks which can be used to make requests to the LLM. @@ -192,11 +196,13 @@ def __init__( llm: LLMInterface, prompt_template: ERExtractionTemplate | str = ERExtractionTemplate(), create_lexical_graph: bool = True, + enforce_schema: SchemaEnforcementMode = SchemaEnforcementMode.NONE, on_error: OnError = OnError.RAISE, max_concurrency: int = 5, ) -> None: super().__init__(on_error=on_error, create_lexical_graph=create_lexical_graph) self.llm = llm # with response_format={ "type": "json_object" }, + self.enforce_schema = enforce_schema self.max_concurrency = max_concurrency if isinstance(prompt_template, str): template = PromptTemplate(prompt_template, expected_inputs=[]) @@ -275,15 +281,16 @@ async def run_for_chunk( examples: str, lexical_graph_builder: Optional[LexicalGraphBuilder] = None, ) -> Neo4jGraph: - """Run extraction and post processing for a single chunk""" + """Run extraction, validation and post processing for a single chunk""" async with sem: chunk_graph = await self.extract_for_chunk(schema, examples, chunk) + final_chunk_graph = self.validate_chunk(chunk_graph, schema) await self.post_process_chunk( - chunk_graph, + final_chunk_graph, chunk, lexical_graph_builder, ) - return chunk_graph + return final_chunk_graph @validate_call async def run( @@ -306,7 +313,7 @@ async def run( chunks (TextChunks): List of text chunks to extract entities and relations from. document_info (Optional[DocumentInfo], optional): Document the chunks are coming from. Used in the lexical graph creation step. lexical_graph_config (Optional[LexicalGraphConfig], optional): Lexical graph configuration to customize node labels and relationship types in the lexical graph. - schema (SchemaConfig | None): Definition of the schema to guide the LLM in its extraction. Caution: at the moment, there is no guarantee that the extracted entities and relations will strictly obey the schema. + schema (SchemaConfig | None): Definition of the schema to guide the LLM in its extraction. examples (str): Examples for few-shot learning in the prompt. """ lexical_graph_builder = None @@ -337,3 +344,157 @@ async def run( graph = self.combine_chunk_graphs(lexical_graph, chunk_graphs) logger.debug(f"Extracted graph: {prettify(graph)}") return graph + + def validate_chunk( + self, + chunk_graph: Neo4jGraph, + schema: SchemaConfig + ) -> Neo4jGraph: + """ + Perform validation after entity and relation extraction: + - Enforce schema if schema enforcement mode is on and schema is provided + """ + if self.enforce_schema != SchemaEnforcementMode.NONE: + if not schema or not schema.entities: # schema is not provided + logger.warning( + "Schema enforcement is ON but the guiding schema is not provided." + ) + else: + # if enforcing_schema is on and schema is provided, clean the graph + return self._clean_graph(chunk_graph, schema) + return chunk_graph + + def _clean_graph( + self, + graph: Neo4jGraph, + schema: SchemaConfig, + ) -> Neo4jGraph: + """ + Verify that the graph conforms to the provided schema. + + Remove invalid entities,relationships, and properties. + If an entity is removed, all of its relationships are also removed. + If no valid properties remain for an entity, remove that entity. + """ + # enforce nodes (remove invalid labels, strip invalid properties) + filtered_nodes = self._enforce_nodes(graph.nodes, schema) + + # enforce relationships (remove those referencing invalid nodes or with invalid + # types or with start/end nodes not conforming to the schema, and strip invalid + # properties) + filtered_rels = self._enforce_relationships( + graph.relationships, filtered_nodes, schema + ) + + return Neo4jGraph(nodes=filtered_nodes, relationships=filtered_rels) + + def _enforce_nodes( + self, + extracted_nodes: List[Neo4jNode], + schema: SchemaConfig + ) -> List[Neo4jNode]: + """ + Filter extracted nodes to be conformant to the schema. + + Keep only those whose label is in schema. + For each valid node, filter out properties not present in the schema. + Remove a node if it ends up with no valid properties. + """ + if self.enforce_schema != SchemaEnforcementMode.STRICT: + return extracted_nodes + + valid_nodes = [] + + for node in extracted_nodes: + schema_entity = schema.entities.get(node.label) + if not schema_entity: + continue + allowed_props = schema_entity.get("properties", []) + filtered_props = self._enforce_properties(node.properties, allowed_props) + if filtered_props: + valid_nodes.append( + Neo4jNode( + id=node.id, + label=node.label, + properties=filtered_props, + embedding_properties=node.embedding_properties, + ) + ) + + return valid_nodes + + def _enforce_relationships( + self, + extracted_relationships: List[Neo4jRelationship], + filtered_nodes: List[Neo4jNode], + schema: SchemaConfig + ) -> List[Neo4jRelationship]: + """ + Filter extracted nodes to be conformant to the schema. + + Keep only those whose types are in schema, start/end node conform to schema, + and start/end nodes are in filtered nodes (i.e., kept after node enforcement). + For each valid relationship, filter out properties not present in the schema. + If a relationship direct is incorrect, invert it. + """ + if self.enforce_schema != SchemaEnforcementMode.STRICT: + return extracted_relationships + + valid_rels = [] + + valid_nodes = {node.id: node.label for node in filtered_nodes} + + potential_schema = schema.potential_schema + + for rel in extracted_relationships: + schema_relation = schema.relations.get(rel.type) + if not schema_relation: + continue + + if (rel.start_node_id not in valid_nodes or + rel.end_node_id not in valid_nodes): + continue + + start_label = valid_nodes[rel.start_node_id] + end_label = valid_nodes[rel.end_node_id] + + tuple_valid = True + if potential_schema: + tuple_valid = (start_label, rel.type, end_label) in potential_schema + reverse_tuple_valid = ((end_label, rel.type, start_label) in + potential_schema) + + if not tuple_valid and not reverse_tuple_valid: + continue + + allowed_props = schema_relation.get("properties", []) + filtered_props = self._enforce_properties(rel.properties, allowed_props) + + valid_rels.append( + Neo4jRelationship( + start_node_id=rel.start_node_id if tuple_valid else rel.end_node_id, + end_node_id=rel.end_node_id if tuple_valid else rel.start_node_id, + type=rel.type, + properties=filtered_props, + embedding_properties=rel.embedding_properties, + ) + ) + + return valid_rels + + def _enforce_properties( + self, + properties: Dict[str, Any], + valid_properties: List[Dict[str, Any]] + ) -> Dict[str, Any]: + """ + Filter properties. + Keep only those that exist in schema (i.e., valid properties). + """ + valid_prop_names = {prop["name"] for prop in valid_properties} + return { + key: value + for key, value in properties.items() + if key in valid_prop_names + } + diff --git a/src/neo4j_graphrag/experimental/components/types.py b/src/neo4j_graphrag/experimental/components/types.py index 689e6b6ce..b1ed46569 100644 --- a/src/neo4j_graphrag/experimental/components/types.py +++ b/src/neo4j_graphrag/experimental/components/types.py @@ -15,6 +15,7 @@ from __future__ import annotations import uuid +from enum import Enum from typing import Any, Dict, Optional from pydantic import BaseModel, Field, field_validator @@ -170,3 +171,8 @@ def lexical_graph_node_labels(self) -> tuple[str, ...]: class GraphResult(DataModel): graph: Neo4jGraph config: LexicalGraphConfig + + +class SchemaEnforcementMode(str, Enum): + NONE = "NONE" + STRICT = "STRICT" diff --git a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py index 14ee112ab..2e45a2db1 100644 --- a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py +++ b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py @@ -37,7 +37,10 @@ from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import ( FixedSizeSplitter, ) -from neo4j_graphrag.experimental.components.types import LexicalGraphConfig +from neo4j_graphrag.experimental.components.types import ( + LexicalGraphConfig, + SchemaEnforcementMode +) from neo4j_graphrag.experimental.pipeline.config.object_config import ComponentType from neo4j_graphrag.experimental.pipeline.config.template_pipeline.base import ( TemplatePipelineConfig, @@ -71,6 +74,7 @@ class SimpleKGPipelineConfig(TemplatePipelineConfig): entities: Sequence[EntityInputType] = [] relations: Sequence[RelationInputType] = [] potential_schema: Optional[list[tuple[str, str, str]]] = None + enforce_schema: SchemaEnforcementMode = SchemaEnforcementMode.NONE on_error: OnError = OnError.IGNORE prompt_template: Union[ERExtractionTemplate, str] = ERExtractionTemplate() perform_entity_resolution: bool = True @@ -124,6 +128,7 @@ def _get_extractor(self) -> EntityRelationExtractor: return LLMEntityRelationExtractor( llm=self.get_default_llm(), prompt_template=self.prompt_template, + enforce_schema=self.enforce_schema, on_error=self.on_error, ) diff --git a/tests/unit/experimental/components/test_entity_relation_extractor.py b/tests/unit/experimental/components/test_entity_relation_extractor.py index f76ab5c9c..f117c1893 100644 --- a/tests/unit/experimental/components/test_entity_relation_extractor.py +++ b/tests/unit/experimental/components/test_entity_relation_extractor.py @@ -25,11 +25,13 @@ balance_curly_braces, fix_invalid_json, ) +from neo4j_graphrag.experimental.components.schema import SchemaConfig from neo4j_graphrag.experimental.components.types import ( DocumentInfo, Neo4jGraph, TextChunk, TextChunks, + SchemaEnforcementMode, ) from neo4j_graphrag.experimental.pipeline.exceptions import InvalidJSONError from neo4j_graphrag.llm import LLMInterface, LLMResponse @@ -229,6 +231,288 @@ async def test_extractor_custom_prompt() -> None: llm.ainvoke.assert_called_once_with("this is my prompt") +@pytest.mark.asyncio +async def test_extractor_no_schema_enforcement() -> None: + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"0","label":"Alien","properties":{"foo":"bar"}}],' + '"relationships":[]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.NONE) + + schema = SchemaConfig( + entities={"Person": {"label": "Person", + "properties": [{"name": "name", "type": "STRING"}]}}, + relations={}, + potential_schema=[]) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks=chunks, schema=schema) + + assert len(result.nodes) == 1 + assert result.nodes[0].label == "Alien" + assert result.nodes[0].properties == {"chunk_index": 0, "foo": "bar"} + + +@pytest.mark.asyncio +async def test_extractor_schema_enforcement_when_no_schema_provided(): + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"0","label":"Alien","properties":{"foo":"bar"}}],' + '"relationships":[]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.STRICT) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks=chunks) + + assert len(result.nodes) == 1 + assert result.nodes[0].label == "Alien" + assert result.nodes[0].properties == {"chunk_index": 0, "foo": "bar"} + + +@pytest.mark.asyncio +async def test_extractor_schema_enforcement_invalid_nodes(): + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"0","label":"Alien","properties":{"foo":"bar"}},' + '{"id":"1","label":"Person","properties":{"name":"Alice"}}],' + '"relationships":[]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.STRICT) + + schema = SchemaConfig( + entities={"Person": {"label": "Person", + "properties": [{"name": "name", "type": "STRING"}]}}, + relations={}, + potential_schema=[]) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks=chunks, schema=schema) + + assert len(result.nodes) == 1 + assert result.nodes[0].label == "Person" + assert result.nodes[0].properties == {"chunk_index": 0, "name": "Alice"} + + +@pytest.mark.asyncio +async def test_extraction_schema_enforcement_invalid_node_properties(): + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"1","label":"Person","properties":' + '{"name":"Alice","age":30,"foo":"bar"}}],' + '"relationships":[]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.STRICT) + + schema = SchemaConfig( + entities={"Person": {"label": "Person", + "properties": [{"name": "name", "type": "STRING"}, + {"name": "age", "type": "INTEGER"}]}}, + relations={}, + potential_schema=[]) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks, schema=schema) + + # "foo" is removed + assert len(result.nodes) == 1 + assert len(result.nodes[0].properties) == 3 + assert "foo" not in result.nodes[0].properties + + +@pytest.mark.asyncio +async def test_extractor_schema_enforcement_valid_nodes_with_empty_props(): + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"1","label":"Person","properties":{"foo":"bar"}}],' + '"relationships":[]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.STRICT) + + schema = SchemaConfig(entities={"Person": {"label": "Person"}}, + relations={}, + potential_schema=[]) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks, schema=schema) + + assert len(result.nodes) == 0 + + +@pytest.mark.asyncio +async def test_extractor_schema_enforcement_invalid_relations_wrong_types(): + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"1","label":"Person","properties":' + '{"name":"Alice"}},{"id":"2","label":"Person","properties":' + '{"name":"Bob"}}],' + '"relationships":[{"start_node_id":"1","end_node_id":"2",' + '"type":"FRIENDS_WITH","properties":{}}]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.STRICT) + + schema = SchemaConfig( + entities={"Person": {"label": "Person", + "properties": [{"name": "name", "type": "STRING"}]}}, + relations={"LIKES": {"label": "LIKES"}}, + potential_schema=[]) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks, schema=schema) + + assert len(result.nodes) == 2 + assert len(result.relationships) == 0 + + +@pytest.mark.asyncio +async def test_extractor_schema_enforcement_invalid_relations_wrong_start_node(): + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"1","label":"Person","properties":{"name":"Alice"}},' + '{"id":"2","label":"Person","properties":{"name":"Bob"}}, ' + '{"id":"3","label":"City","properties":{"name":"London"}}],' + '"relationships":[{"start_node_id":"1","end_node_id":"2",' + '"type":"LIVES_IN","properties":{}}]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.STRICT) + + schema = SchemaConfig( + entities={"Person": {"label": "Person", + "properties": [{"name": "name", "type": "STRING"}]}, + "City": {"label": "City", + "properties": [{"name": "name", "type": "STRING"}]}}, + relations={"LIVES_IN": {"label": "LIVES_IN"}}, + potential_schema=[("Person", "LIVES_IN", "City")]) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks, schema=schema) + + assert len(result.nodes) == 3 + assert len(result.relationships) == 0 + + +@pytest.mark.asyncio +async def test_extractor_schema_enforcement_invalid_relation_properties(): + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"1","label":"Person","properties":{"name":"Alice"}},' + '{"id":"2","label":"Person","properties":{"name":"Bob"}}],' + '"relationships":[{"start_node_id":"1","end_node_id":"2",' + '"type":"LIKES","properties":{"strength":"high","foo":"bar"}}]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.STRICT) + + schema = SchemaConfig( + entities={"Person": {"label": "Person", + "properties": [{"name": "name", "type": "STRING"}]}}, + relations={"LIKES": {"label": "LIKES", + "properties": [{"name": "strength", "type": "STRING"}]}}, + potential_schema=[] + ) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks, schema=schema) + + assert len(result.nodes) == 2 + assert len(result.relationships) == 1 + rel = result.relationships[0] + assert "foo" not in rel.properties + assert rel.properties["strength"] == "high" + + +@pytest.mark.asyncio +async def test_extractor_schema_enforcement_removed_relation_start_end_nodes(): + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"1","label":"Alien","properties":{}},' + '{"id":"2","label":"Robot","properties":{}}],' + '"relationships":[{"start_node_id":"1","end_node_id":"2",' + '"type":"LIKES","properties":{}}]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.STRICT) + + schema = SchemaConfig( + entities={"Person": {"label": "Person", + "properties": [{"name": "name", "type": "STRING"}]}}, + relations={"LIKES": {"label": "LIKES"}}, + potential_schema=[("Person", "LIKES", "Person")]) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks, schema=schema) + + assert len(result.nodes) == 0 + assert len(result.relationships) == 0 + + +@pytest.mark.asyncio +async def test_extractor_schema_enforcement_inverted_relation_direction(): + llm = MagicMock(spec=LLMInterface) + llm.ainvoke.return_value = LLMResponse( + content='{"nodes":[{"id":"1","label":"Person","properties":{"name":"Alice"}},' + '{"id":"2","label":"City","properties":{"name":"London"}}],' + '"relationships":[{"start_node_id":"2","end_node_id":"1",' + '"type":"LIVES_IN","properties":{}}]}' + ) + + extractor = LLMEntityRelationExtractor(llm=llm, + create_lexical_graph=False, + enforce_schema=SchemaEnforcementMode.STRICT) + + schema = SchemaConfig( + entities={"Person": {"label": "Person", + "properties": [{"name": "name", "type": "STRING"}]}, + "City": {"label": "City", + "properties": [{"name": "name", "type": "STRING"}]}}, + relations={"LIVES_IN": {"label": "LIVES_IN"}}, + potential_schema=[("Person", "LIVES_IN", "City")]) + + chunks = TextChunks(chunks=[TextChunk(text="some text", index=0)]) + + result: Neo4jGraph = await extractor.run(chunks, schema=schema) + + assert len(result.nodes) == 2 + assert len(result.relationships) == 1 + assert result.relationships[0].start_node_id.split(":")[1] == "1" + assert result.relationships[0].end_node_id.split(":")[1] == "2" + + def test_fix_invalid_json_empty_result() -> None: json_string = "invalid json"