| 
1 | 1 | import copy  | 
2 | 2 | import re  | 
3 | 3 | from abc import ABC, abstractmethod  | 
4 |  | -from dataclasses import dataclass  | 
5 | 4 | from typing import Callable, Iterable, List, Literal, Optional, Union  | 
6 | 5 | 
 
  | 
7 | 6 | from graphgen.bases.datatypes import Chunk  | 
8 | 7 | from graphgen.utils import logger  | 
9 | 8 | 
 
  | 
10 | 9 | 
 
  | 
11 |  | -@dataclass  | 
12 | 10 | class BaseSplitter(ABC):  | 
13 | 11 |     """  | 
14 | 12 |     Abstract base class for splitting text into smaller chunks.  | 
15 | 13 |     """  | 
16 | 14 | 
 
  | 
17 |  | -    chunk_size: int = 1024  | 
18 |  | -    chunk_overlap: int = 100  | 
19 |  | -    length_function: Callable[[str], int] = len  | 
20 |  | -    keep_separator: bool = False  | 
21 |  | -    add_start_index: bool = False  | 
22 |  | -    strip_whitespace: bool = True  | 
 | 15 | +    def __init__(  | 
 | 16 | +        self,  | 
 | 17 | +        chunk_size: int = 1024,  | 
 | 18 | +        chunk_overlap: int = 100,  | 
 | 19 | +        length_function: Callable[[str], int] = len,  | 
 | 20 | +        keep_separator: bool = False,  | 
 | 21 | +        add_start_index: bool = False,  | 
 | 22 | +        strip_whitespace: bool = True,  | 
 | 23 | +    ):  | 
 | 24 | +        self.chunk_size = chunk_size  | 
 | 25 | +        self.chunk_overlap = chunk_overlap  | 
 | 26 | +        self.length_function = length_function  | 
 | 27 | +        self.keep_separator = keep_separator  | 
 | 28 | +        self.add_start_index = add_start_index  | 
 | 29 | +        self.strip_whitespace = strip_whitespace  | 
23 | 30 | 
 
  | 
24 | 31 |     @abstractmethod  | 
25 | 32 |     def split_text(self, text: str) -> List[str]:  | 
 | 
0 commit comments