Ruff

NathalieCharbel · NathalieCharbel · commit 52a2686155ab · 2025-04-29T09:02:55.000+03:00
diff --git a/src/neo4j_graphrag/experimental/components/schema.py b/src/neo4j_graphrag/experimental/components/schema.py
@@ -136,81 +136,83 @@ def store_as_json(self, file_path: str) -> None:
         Args:
             file_path (str): The path where the schema configuration will be saved.
         """
-        with open(file_path, 'w') as f:
+        with open(file_path, "w") as f:
             json.dump(self.model_dump(), f, indent=2)
-            
+
     def store_as_yaml(self, file_path: str) -> None:
         """
         Save the schema configuration to a YAML file.
 
         Args:
             file_path (str): The path where the schema configuration will be saved.
-        """         
+        """
         # create a copy of the data and convert tuples to lists for YAML compatibility
         data = self.model_dump()
-        if data.get('potential_schema'):
-            data['potential_schema'] = [list(item) for item in data['potential_schema']]
-        
-        with open(file_path, 'w') as f:
+        if data.get("potential_schema"):
+            data["potential_schema"] = [list(item) for item in data["potential_schema"]]
+
+        with open(file_path, "w") as f:
             yaml.dump(data, f, default_flow_style=False, sort_keys=False)
-            
+
     @classmethod
     def from_file(cls, file_path: Union[str, Path]) -> Self:
         """
         Load a schema configuration from a file (either JSON or YAML).
-        
+
         The file format is automatically detected based on the file extension.
-        
+
         Args:
             file_path (Union[str, Path]): The path to the schema configuration file.
-            
+
         Returns:
             SchemaConfig: The loaded schema configuration.
         """
         file_path = Path(file_path)
-        
+
         if not file_path.exists():
             raise FileNotFoundError(f"Schema file not found: {file_path}")
-            
-        if file_path.suffix.lower() in ['.json']:
+
+        if file_path.suffix.lower() in [".json"]:
             return cls.from_json(file_path)
-        elif file_path.suffix.lower() in ['.yaml', '.yml']:
+        elif file_path.suffix.lower() in [".yaml", ".yml"]:
             return cls.from_yaml(file_path)
         else:
-            raise ValueError(f"Unsupported file format: {file_path.suffix}. Use .json, .yaml, or .yml")
-            
+            raise ValueError(
+                f"Unsupported file format: {file_path.suffix}. Use .json, .yaml, or .yml"
+            )
+
     @classmethod
     def from_json(cls, file_path: Union[str, Path]) -> Self:
         """
         Load a schema configuration from a JSON file.
-        
+
         Args:
             file_path (Union[str, Path]): The path to the JSON schema configuration file.
-            
+
         Returns:
             SchemaConfig: The loaded schema configuration.
         """
-        with open(file_path, 'r') as f:
+        with open(file_path, "r") as f:
             try:
                 data = json.load(f)
                 return cls.model_validate(data)
             except json.JSONDecodeError as e:
                 raise ValueError(f"Invalid JSON file: {e}")
             except ValidationError as e:
                 raise SchemaValidationError(f"Schema validation failed: {e}")
-                
+
     @classmethod
     def from_yaml(cls, file_path: Union[str, Path]) -> Self:
         """
         Load a schema configuration from a YAML file.
-        
+
         Args:
             file_path (Union[str, Path]): The path to the YAML schema configuration file.
-            
+
         Returns:
             SchemaConfig: The loaded schema configuration.
         """
-        with open(file_path, 'r') as f:
+        with open(file_path, "r") as f:
             try:
                 data = yaml.safe_load(f)
                 return cls.model_validate(data)
@@ -348,11 +350,13 @@ def __init__(
     ) -> None:
         super().__init__()
         self._llm: LLMInterface = llm
-        self._prompt_template: PromptTemplate = prompt_template or SchemaExtractionTemplate()
+        self._prompt_template: PromptTemplate = (
+            prompt_template or SchemaExtractionTemplate()
+        )
         self._llm_params: dict[str, Any] = llm_params or {}
 
     @validate_call
-    async def run(self, text: str, examples:str = "", **kwargs: Any) -> SchemaConfig:
+    async def run(self, text: str, examples: str = "", **kwargs: Any) -> SchemaConfig:
         """
         Asynchronously extracts the schema from text and returns a SchemaConfig object.
 
@@ -367,23 +371,27 @@ async def run(self, text: str, examples:str = "", **kwargs: Any) -> SchemaConfig
 
         response = await self._llm.invoke(prompt, **self._llm_params)
         content: str = (
-            response if isinstance(response, str) else getattr(response, "content", str(response))
+            response
+            if isinstance(response, str)
+            else getattr(response, "content", str(response))
         )
 
         try:
             extracted_schema: Dict[str, Any] = json.loads(content)
         except json.JSONDecodeError as exc:
-            raise ValueError(
-                "LLM response is not valid JSON."
-            ) from exc
+            raise ValueError("LLM response is not valid JSON.") from exc
 
         extracted_entities: List[dict] = extracted_schema.get("entities", [])
         extracted_relations: Optional[List[dict]] = extracted_schema.get("relations")
-        potential_schema: Optional[List[Tuple[str, str, str]]] = extracted_schema.get("potential_schema")
+        potential_schema: Optional[List[Tuple[str, str, str]]] = extracted_schema.get(
+            "potential_schema"
+        )
 
         entities: List[SchemaEntity] = [SchemaEntity(**e) for e in extracted_entities]
         relations: Optional[List[SchemaRelation]] = (
-            [SchemaRelation(**r) for r in extracted_relations] if extracted_relations else None
+            [SchemaRelation(**r) for r in extracted_relations]
+            if extracted_relations
+            else None
         )
 
         return await super().run(
diff --git a/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py
@@ -59,7 +59,8 @@
 
 logger = logging.getLogger(__name__)
 
-T = TypeVar('T', bound='SimpleKGPipelineConfig')
+T = TypeVar("T", bound="SimpleKGPipelineConfig")
+
 
 class SimpleKGPipelineConfig(TemplatePipelineConfig):
     COMPONENTS: ClassVar[list[str]] = [
@@ -94,40 +95,47 @@ class SimpleKGPipelineConfig(TemplatePipelineConfig):
     text_splitter: Optional[ComponentType] = None
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
-    
-    @model_validator(mode='after')
+
+    @model_validator(mode="after")
     def handle_schema_precedence(self) -> T:
         """Handle schema precedence and warnings"""
         self._process_schema_parameters()
         return self
-    
+
     def _process_schema_parameters(self) -> None:
         """
         Process schema parameters and handle precedence between 'schema' parameter and individual components.
         Also logs warnings for deprecated usage.
         """
         # check if both schema and individual components are provided
-        has_individual_schema_components = any([self.entities, self.relations, self.potential_schema])
-        
+        has_individual_schema_components = any(
+            [self.entities, self.relations, self.potential_schema]
+        )
+
         if has_individual_schema_components and self.schema is not None:
             logger.warning(
                 "Both 'schema' and individual schema components (entities, relations, potential_schema) "
                 "were provided. The 'schema' parameter takes precedence. In the future, individual "
                 "components will be removed. Please use only the 'schema' parameter.",
-                stacklevel=2
+                stacklevel=2,
             )
-            
+
         elif has_individual_schema_components:
             logger.warning(
                 "The 'entities', 'relations', and 'potential_schema' parameters are deprecated "
                 "and will be removed in a future version. "
                 "Please use the 'schema' parameter instead.",
-                stacklevel=2
+                stacklevel=2,
             )
 
     def has_user_provided_schema(self) -> bool:
         """Check if the user has provided schema information"""
-        return bool(self.entities or self.relations or self.potential_schema or self.schema is not None)
+        return bool(
+            self.entities
+            or self.relations
+            or self.potential_schema
+            or self.schema is not None
+        )
 
     def _get_pdf_loader(self) -> Optional[PdfLoader]:
         if not self.from_pdf:
@@ -165,13 +173,17 @@ def _get_schema(self) -> Union[SchemaBuilder, SchemaFromText]:
             return SchemaFromText(llm=self.get_default_llm())
         return SchemaBuilder()
 
-    def _process_schema_with_precedence(self) -> tuple[list[SchemaEntity], list[SchemaRelation], Optional[list[tuple[str, str, str]]]]:
+    def _process_schema_with_precedence(
+        self,
+    ) -> tuple[
+        list[SchemaEntity], list[SchemaRelation], Optional[list[tuple[str, str, str]]]
+    ]:
         """
         Process schema inputs according to precedence rules:
         1. If schema is provided as SchemaConfig object, use it
         2. If schema is provided as dictionary, extract from it
         3. Otherwise, use individual schema components
-        
+
         Returns:
             Tuple of (entities, relations, potential_schema)
         """
@@ -184,15 +196,29 @@ def _process_schema_with_precedence(self) -> tuple[list[SchemaEntity], list[Sche
                 potential_schema = self.schema.potential_schema
             else:
                 # extract from dictionary
-                entities = [SchemaEntity.from_text_or_dict(e) for e in self.schema.get("entities", [])]
-                relations = [SchemaRelation.from_text_or_dict(r) for r in self.schema.get("relations", [])]
+                entities = [
+                    SchemaEntity.from_text_or_dict(e)
+                    for e in self.schema.get("entities", [])
+                ]
+                relations = [
+                    SchemaRelation.from_text_or_dict(r)
+                    for r in self.schema.get("relations", [])
+                ]
                 potential_schema = self.schema.get("potential_schema")
         else:
             # use individual components
-            entities = [SchemaEntity.from_text_or_dict(e) for e in self.entities] if self.entities else []
-            relations = [SchemaRelation.from_text_or_dict(r) for r in self.relations] if self.relations else []
+            entities = (
+                [SchemaEntity.from_text_or_dict(e) for e in self.entities]
+                if self.entities
+                else []
+            )
+            relations = (
+                [SchemaRelation.from_text_or_dict(r) for r in self.relations]
+                if self.relations
+                else []
+            )
             potential_schema = self.potential_schema
-        
+
         return entities, relations, potential_schema
 
     def _get_run_params_for_schema(self) -> dict[str, Any]:
@@ -201,8 +227,10 @@ def _get_run_params_for_schema(self) -> dict[str, Any]:
             return {}
         else:
             # process schema components according to precedence rules
-            entities, relations, potential_schema = self._process_schema_with_precedence()
-            
+            entities, relations, potential_schema = (
+                self._process_schema_with_precedence()
+            )
+
             return {
                 "entities": entities,
                 "relations": relations,
@@ -248,7 +276,7 @@ def _get_connections(self) -> list[ConnectionDefinition]:
                     input_config={"text": "pdf_loader.text"},
                 )
             )
-            
+
             # handle automatic schema extraction
             if self.auto_schema_extraction and not self.has_user_provided_schema():
                 connections.append(
@@ -258,7 +286,7 @@ def _get_connections(self) -> list[ConnectionDefinition]:
                         input_config={"text": "pdf_loader.text"},
                     )
                 )
-            
+
             connections.append(
                 ConnectionDefinition(
                     start="schema",
@@ -279,7 +307,7 @@ def _get_connections(self) -> list[ConnectionDefinition]:
                         input_config={"text": "text"},  # use the original text input
                     )
                 )
-            
+
             connections.append(
                 ConnectionDefinition(
                     start="schema",
diff --git a/src/neo4j_graphrag/experimental/pipeline/kg_builder.py b/src/neo4j_graphrag/experimental/pipeline/kg_builder.py
@@ -47,6 +47,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class SimpleKGPipeline:
     """
     A class to simplify the process of building a knowledge graph from text documents.
@@ -123,7 +124,9 @@ def __init__(
                 perform_entity_resolution=perform_entity_resolution,
                 lexical_graph_config=lexical_graph_config,
                 neo4j_database=neo4j_database,
-                auto_schema_extraction=not bool(schema or entities or relations or potential_schema),
+                auto_schema_extraction=not bool(
+                    schema or entities or relations or potential_schema
+                ),
             )
         except (ValidationError, ValueError) as e:
             raise PipelineDefinitionError() from e
diff --git a/src/neo4j_graphrag/generation/__init__.py b/src/neo4j_graphrag/generation/__init__.py
@@ -1,9 +1,4 @@
 from .graphrag import GraphRAG
 from .prompts import PromptTemplate, RagTemplate, SchemaExtractionTemplate
 
-__all__ = [
-    "GraphRAG",
-    "PromptTemplate",
-    "RagTemplate",
-    "SchemaExtractionTemplate"
-]
+__all__ = ["GraphRAG", "PromptTemplate", "RagTemplate", "SchemaExtractionTemplate"]
diff --git a/tests/unit/experimental/components/test_schema.py b/tests/unit/experimental/components/test_schema.py