| 
7 | 7 | from dataclasses import dataclass  | 
8 | 8 | from tqdm.asyncio import tqdm as tqdm_async  | 
9 | 9 | 
 
  | 
10 |  | -from .operators import *  | 
11 | 10 | from models import Chunk, JsonKVStorage, OpenAIModel, NetworkXStorage, WikiSearch, Tokenizer, TraverseStrategy  | 
12 | 11 | from utils import create_event_loop, logger, compute_content_hash  | 
13 | 12 | from models.storage.base_storage import StorageNameSpace  | 
 | 13 | +from .operators import *  | 
14 | 14 | 
 
  | 
15 | 15 | 
 
  | 
16 | 16 | sys_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))  | 
@@ -81,7 +81,8 @@ async def async_split_chunks(self, data: Union[List[list], List[dict]], data_typ  | 
81 | 81 |                     compute_content_hash(dp["content"], prefix="chunk-"): {  | 
82 | 82 |                         **dp,  | 
83 | 83 |                         'full_doc_id': doc_key  | 
84 |  | -                    } for dp in self.tokenizer_instance.chunk_by_token_size(doc["content"], self.chunk_overlap_size, self.chunk_size)  | 
 | 84 | +                    } for dp in self.tokenizer_instance.chunk_by_token_size(doc["content"],  | 
 | 85 | +                                                                            self.chunk_overlap_size, self.chunk_size)  | 
85 | 86 |                 }  | 
86 | 87 |                 inserting_chunks.update(chunks)  | 
87 | 88 |             _add_chunk_keys = await self.text_chunks_storage.filter_keys(list(inserting_chunks.keys()))  | 
@@ -156,21 +157,29 @@ async def async_insert(self, data: Union[List[list], List[dict]], data_type: str  | 
156 | 157 | 
 
  | 
157 | 158 |     async def _insert_done(self):  | 
158 | 159 |         tasks = []  | 
159 |  | -        for storage_instance in [self.full_docs_storage, self.text_chunks_storage, self.graph_storage, self.wiki_storage]:  | 
 | 160 | +        for storage_instance in [self.full_docs_storage, self.text_chunks_storage,  | 
 | 161 | +                                 self.graph_storage, self.wiki_storage]:  | 
160 | 162 |             if storage_instance is None:  | 
161 | 163 |                 continue  | 
162 | 164 |             tasks.append(cast(StorageNameSpace, storage_instance).index_done_callback())  | 
163 | 165 |         await asyncio.gather(*tasks)  | 
164 | 166 | 
 
  | 
165 |  | -    def judge(self, re_judge=False, max_samples=1):  | 
 | 167 | +    def quiz(self, max_samples=1):  | 
166 | 168 |         loop = create_event_loop()  | 
167 |  | -        loop.run_until_complete(self.async_judge(re_judge, max_samples))  | 
 | 169 | +        loop.run_until_complete(self.async_quiz(max_samples))  | 
 | 170 | + | 
 | 171 | +    async def async_quiz(self, max_samples=1):  | 
 | 172 | +        await quiz_relations(self.teacher_llm_client, self.graph_storage, self.rephrase_storage, max_samples)  | 
 | 173 | +        await self.rephrase_storage.index_done_callback()  | 
168 | 174 | 
 
  | 
169 |  | -    async def async_judge(self, re_judge=False, max_samples=1):  | 
 | 175 | +    def judge(self, re_judge=False):  | 
 | 176 | +        loop = create_event_loop()  | 
 | 177 | +        loop.run_until_complete(self.async_judge(re_judge))  | 
 | 178 | + | 
 | 179 | +    async def async_judge(self, re_judge=False):  | 
170 | 180 |         _update_relations = await judge_relations(self.teacher_llm_client, self.student_llm_client,  | 
171 |  | -                                                  self.graph_storage, self.rephrase_storage, re_judge, max_samples)  | 
 | 181 | +                                                  self.graph_storage, self.rephrase_storage, re_judge)  | 
172 | 182 |         await _update_relations.index_done_callback()  | 
173 |  | -        await self.rephrase_storage.index_done_callback()  | 
174 | 183 | 
 
  | 
175 | 184 |     def traverse(self):  | 
176 | 185 |         loop = create_event_loop()  | 
 | 
0 commit comments