fix: fix lint errors

ChenZiHong-Gavin · ChenZiHong-Gavin · commit 77a41eb02260 · 2025-01-14T15:57:15.000+08:00
diff --git a/generate.py b/generate.py
@@ -78,5 +78,9 @@
     graph_gen.judge(re_judge=False)
 
     graph_gen.traverse()
-    with open(os.path.join(sys_path, "cache", "configs", f"graphgen_{unique_id}.yaml"), "w", encoding='utf-8') as f:
+
+    config_path = os.path.join(sys_path, "cache", "configs", f"graphgen_{unique_id}.yaml")
+    if not os.path.exists(config_path):
+        os.makedirs(config_path)
+    with open(config_path, "w", encoding='utf-8') as f:
         yaml.dump(traverse_strategy.to_yaml(), f)
diff --git a/models/llm/openai_model.py b/models/llm/openai_model.py
@@ -71,8 +71,8 @@ async def generate_topk_per_token(self, text: str, history: Optional[List[str]]
             kwargs["logprobs"] = True
             kwargs["top_logprobs"] = self.topk_per_token
 
-        # Limit max_tokens to 2 to avoid long completions
-        kwargs["max_tokens"] = 2
+        # Limit max_tokens to 1 to avoid long completions
+        kwargs["max_tokens"] = 1
 
         completion = await self.client.chat.completions.create(
             model=self.model_name,
diff --git a/templates/kg_extraction.py b/templates/kg_extraction.py
@@ -1,3 +1,5 @@
+# pylint: disable=C0301
+
 TEMPLATE_EN: str = """You are an NLP expert, skilled at analyzing text to extract named entities and their relationships.
 
 -Goal-
@@ -174,12 +176,14 @@
 输出：
 """
 
-CONTINUE_EN: str = """MANY entities and relationships were missed in the last extraction.  Add them below using the same format:
+CONTINUE_EN: str = """MANY entities and relationships were missed in the last extraction.  \
+Add them below using the same format:
 """
 
 CONTINUE_ZH: str = """很多实体和关系在上一次的提取中可能被遗漏了。请在下面使用相同的格式添加它们："""
 
-IF_LOOP_EN: str = """It appears some entities and relationships may have still been missed.  Answer YES | NO if there are still entities and relationships that need to be added.
+IF_LOOP_EN: str = """It appears some entities and relationships may have still been missed.  \
+Answer YES | NO if there are still entities and relationships that need to be added.
 """
 
 IF_LOOP_ZH: str = """看起来可能仍然遗漏了一些实体和关系。如果仍有实体和关系需要添加，请回答YES | NO。"""
@@ -199,7 +203,8 @@
         "tuple_delimiter": "<|>",
         "record_delimiter": "##",
         "completion_delimiter": "<|COMPLETE|>",
-        "entity_types": "concept, date, location, keyword, organization, person, event, work, nature, artificial, science, technology, mission, gene",
+        "entity_types": "concept, date, location, keyword, organization, person, event, work, nature, artificial, \
+science, technology, mission, gene",
         "language": "English",
     },
 }
diff --git a/templates/search_judgement.py b/templates/search_judgement.py
@@ -1,3 +1,5 @@
+# pylint: disable=C0301
+
 TEMPLATE: str = """-Goal-
 Please select the most relevant search result for the given entity. 
 The name and description of the entity are provided. The search results are provided as a list.
diff --git a/utils/detect_lang.py b/utils/detect_lang.py
@@ -26,8 +26,7 @@ def is_english_char(char):
 
     if chinese_ratio >= 0.5:
         return 'zh'
-    else:
-        return 'en'
+    return 'en'
 
 def detect_if_chinese(text):
     """
diff --git a/utils/format.py b/utils/format.py
@@ -43,12 +43,12 @@ async def handle_single_entity_extraction(
     entity_type = clean_str(record_attributes[2].upper())
     entity_description = clean_str(record_attributes[3])
     entity_source_id = chunk_key
-    return dict(
-        entity_name=entity_name,
-        entity_type=entity_type,
-        description=entity_description,
-        source_id=entity_source_id,
-    )
+    return {
+        "entity_name": entity_name,
+        "entity_type": entity_type,
+        "description": entity_description,
+        "source_id": entity_source_id,
+    }
 
 def is_float_regex(value):
     return bool(re.match(r"^[-+]?[0-9]*\.?[0-9]+$", value))
@@ -65,12 +65,12 @@ async def handle_single_relationship_extraction(
     edge_description = clean_str(record_attributes[3])
 
     edge_source_id = chunk_key
-    return dict(
-        src_id=source,
-        tgt_id=target,
-        description=edge_description,
-        source_id=edge_source_id,
-    )
+    return {
+        "src_id": source,
+        "tgt_id": target,
+        "description": edge_description,
+        "source_id": edge_source_id,
+    }
 
 def load_json(file_name):
     if not os.path.exists(file_name):
diff --git a/utils/help_nltk.py b/utils/help_nltk.py
@@ -1,7 +1,7 @@
 import os
+from typing import Dict, List, Optional
 import nltk
 import jieba
-from typing import Dict, List, Optional
 
 resource_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources")
 
@@ -30,11 +30,10 @@ def get_stopwords(self, lang: str) -> List[str]:
     def word_tokenize(text: str, lang: str) -> List[str]:
         if lang == "zh":
             return jieba.lcut(text)
-        else:
-            nltk.data.path.append(os.path.join(resource_path, "nltk_data"))
-            try:
-                nltk.data.find("tokenizers/punkt_tab")
-            except LookupError:
-                nltk.download("punkt_tab", download_dir=os.path.join(resource_path, "nltk_data"))
+        nltk.data.path.append(os.path.join(resource_path, "nltk_data"))
+        try:
+            nltk.data.find("tokenizers/punkt_tab")
+        except LookupError:
+            nltk.download("punkt_tab", download_dir=os.path.join(resource_path, "nltk_data"))
 
-            return nltk.word_tokenize(text)
+        return nltk.word_tokenize(text)
diff --git a/utils/log.py b/utils/log.py
@@ -13,18 +13,20 @@ def set_logger(log_file: str, log_level: int = logging.INFO, if_stream: bool = T
     file_handler.setLevel(log_level)
     file_handler.setFormatter(formatter)
 
+    stream_handler = None
+
     if if_stream:
         stream_handler = logging.StreamHandler()
         stream_handler.setLevel(log_level)
         stream_handler.setFormatter(formatter)
 
     if not logger.handlers:
         logger.addHandler(file_handler)
-        if if_stream:
+        if if_stream and stream_handler:
             logger.addHandler(stream_handler)
 
 
 def parse_log(log_file: str):
-    with open(log_file, "r") as f:
+    with open(log_file, "r", encoding='utf-8') as f:
         lines = f.readlines()
     return lines

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# pylint: disable=C0301`
	`2`	`+`
`1`	`3`	`TEMPLATE: str = """-Goal-`
`2`	`4`	`Please select the most relevant search result for the given entity.`
`3`	`5`	`The name and description of the entity are provided. The search results are provided as a list.`