open-sciencelab
diff --git a/‎graphgen/bases/base_generator.py‎
Lines changed: 2 additions & 3 deletions b/‎graphgen/bases/base_generator.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎graphgen/bases/base_kg_builder.py‎
Lines changed: 4 additions & 8 deletions b/‎graphgen/bases/base_kg_builder.py‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎graphgen/bases/base_partitioner.py‎
Lines changed: 0 additions & 2 deletions b/‎graphgen/bases/base_partitioner.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎graphgen/bases/base_splitter.py‎
Lines changed: 15 additions & 8 deletions b/‎graphgen/bases/base_splitter.py‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎graphgen/bases/base_storage.py‎
Lines changed: 0 additions & 3 deletions b/‎graphgen/bases/base_storage.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎graphgen/bases/base_tokenizer.py‎
Lines changed: 2 additions & 3 deletions b/‎graphgen/bases/base_tokenizer.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎graphgen/models/evaluator/base_evaluator.py‎
Lines changed: 3 additions & 4 deletions b/‎graphgen/models/evaluator/base_evaluator.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎graphgen/models/evaluator/length_evaluator.py‎
Lines changed: 3 additions & 6 deletions b/‎graphgen/models/evaluator/length_evaluator.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎graphgen/models/evaluator/mtld_evaluator.py‎
Lines changed: 4 additions & 8 deletions b/‎graphgen/models/evaluator/mtld_evaluator.py‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎graphgen/models/generator/aggregated_generator.py‎
Lines changed: 0 additions & 2 deletions b/‎graphgen/models/generator/aggregated_generator.py‎
Lines changed: 0 additions & 2 deletions
@@ -1,17 +1,16 @@
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import Any
 
 from graphgen.bases.base_llm_client import BaseLLMClient
 
 
-@dataclass
 class BaseGenerator(ABC):
     """
     Generate QAs based on given prompts.
     """
 
-    llm_client: BaseLLMClient
+    def __init__(self, llm_client: BaseLLMClient):
+        self.llm_client = llm_client
 
     @staticmethod
     @abstractmethod
 
@@ -1,21 +1,17 @@
 from abc import ABC, abstractmethod
 from collections import defaultdict
-from dataclasses import dataclass, field
 from typing import Dict, List, Tuple
 
 from graphgen.bases.base_llm_client import BaseLLMClient
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Chunk
 
 
-@dataclass
 class BaseKGBuilder(ABC):
-    llm_client: BaseLLMClient
-
-    _nodes: Dict[str, List[dict]] = field(default_factory=lambda: defaultdict(list))
-    _edges: Dict[Tuple[str, str], List[dict]] = field(
-        default_factory=lambda: defaultdict(list)
-    )
+    def __init__(self, llm_client: BaseLLMClient):
+        self.llm_client = llm_client
+        self._nodes: Dict[str, List[dict]] = defaultdict(list)
+        self._edges: Dict[Tuple[str, str], List[dict]] = defaultdict(list)
 
     @abstractmethod
     async def extract(
 
@@ -1,12 +1,10 @@
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import Any, List
 
 from graphgen.bases.base_storage import BaseGraphStorage
 from graphgen.bases.datatypes import Community
 
 
-@dataclass
 class BasePartitioner(ABC):
     @abstractmethod
     async def partition(
 
@@ -1,25 +1,32 @@
 import copy
 import re
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import Callable, Iterable, List, Literal, Optional, Union
 
 from graphgen.bases.datatypes import Chunk
 from graphgen.utils import logger
 
 
-@dataclass
 class BaseSplitter(ABC):
     """
     Abstract base class for splitting text into smaller chunks.
     """
 
-    chunk_size: int = 1024
-    chunk_overlap: int = 100
-    length_function: Callable[[str], int] = len
-    keep_separator: bool = False
-    add_start_index: bool = False
-    strip_whitespace: bool = True
+    def __init__(
+        self,
+        chunk_size: int = 1024,
+        chunk_overlap: int = 100,
+        length_function: Callable[[str], int] = len,
+        keep_separator: bool = False,
+        add_start_index: bool = False,
+        strip_whitespace: bool = True,
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.length_function = length_function
+        self.keep_separator = keep_separator
+        self.add_start_index = add_start_index
+        self.strip_whitespace = strip_whitespace
 
     @abstractmethod
     def split_text(self, text: str) -> List[str]:
 
@@ -16,7 +16,6 @@ async def query_done_callback(self):
         """commit the storage operations after querying"""
 
 
-@dataclass
 class BaseListStorage(Generic[T], StorageNameSpace):
     async def all_items(self) -> list[T]:
         raise NotImplementedError
@@ -34,7 +33,6 @@ async def drop(self):
         raise NotImplementedError
 
 
-@dataclass
 class BaseKVStorage(Generic[T], StorageNameSpace):
     async def all_keys(self) -> list[str]:
         raise NotImplementedError
@@ -58,7 +56,6 @@ async def drop(self):
         raise NotImplementedError
 
 
-@dataclass
 class BaseGraphStorage(StorageNameSpace):
     async def has_node(self, node_id: str) -> bool:
         raise NotImplementedError
 
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import List
 
 
-@dataclass
 class BaseTokenizer(ABC):
-    model_name: str = "cl100k_base"
+    def __init__(self, model_name: str = "cl100k_base"):
+        self.model_name = model_name
 
     @abstractmethod
     def encode(self, text: str) -> List[int]:
 
@@ -1,16 +1,15 @@
 import asyncio
-from dataclasses import dataclass
 
 from tqdm.asyncio import tqdm as tqdm_async
 
 from graphgen.bases.datatypes import QAPair
 from graphgen.utils import create_event_loop
 
 
-@dataclass
 class BaseEvaluator:
-    max_concurrent: int = 100
-    results: list[float] = None
+    def __init__(self, max_concurrent: int = 100):
+        self.max_concurrent = max_concurrent
+        self.results: list[float] = None
 
     def evaluate(self, pairs: list[QAPair]) -> list[float]:
         """
 
@@ -1,16 +1,13 @@
-from dataclasses import dataclass
-
 from graphgen.bases.datatypes import QAPair
 from graphgen.models.evaluator.base_evaluator import BaseEvaluator
 from graphgen.models.tokenizer import Tokenizer
 from graphgen.utils import create_event_loop
 
 
-@dataclass
 class LengthEvaluator(BaseEvaluator):
-    tokenizer_name: str = "cl100k_base"
-
-    def __post_init__(self):
+    def __init__(self, tokenizer_name: str = "cl100k_base", max_concurrent: int = 100):
+        super().__init__(max_concurrent)
+        self.tokenizer_name = tokenizer_name
         self.tokenizer = Tokenizer(model_name=self.tokenizer_name)
 
     async def evaluate_single(self, pair: QAPair) -> float:
 
@@ -1,4 +1,3 @@
-from dataclasses import dataclass, field
 from typing import Set
 
 from graphgen.bases.datatypes import QAPair
@@ -8,18 +7,15 @@
 nltk_helper = NLTKHelper()
 
 
-@dataclass
 class MTLDEvaluator(BaseEvaluator):
     """
     衡量文本词汇多样性的指标
     """
 
-    stopwords_en: Set[str] = field(
-        default_factory=lambda: set(nltk_helper.get_stopwords("english"))
-    )
-    stopwords_zh: Set[str] = field(
-        default_factory=lambda: set(nltk_helper.get_stopwords("chinese"))
-    )
+    def __init__(self, max_concurrent: int = 100):
+        super().__init__(max_concurrent)
+        self.stopwords_en: Set[str] = set(nltk_helper.get_stopwords("english"))
+        self.stopwords_zh: Set[str] = set(nltk_helper.get_stopwords("chinese"))
 
     async def evaluate_single(self, pair: QAPair) -> float:
         loop = create_event_loop()
 
@@ -1,12 +1,10 @@
-from dataclasses import dataclass
 from typing import Any
 
 from graphgen.bases import BaseGenerator
 from graphgen.templates import AGGREGATED_GENERATION_PROMPT
 from graphgen.utils import compute_content_hash, detect_main_language, logger
 
 
-@dataclass
 class AggregatedGenerator(BaseGenerator):
     """
     Aggregated Generator follows a TWO-STEP process: