Skip to content

Commit 410b1f0

Browse files
committed
feat: 重构数据处理与程序退出逻辑
- 修复了多选字段(如剧本、平台)因包含 ,、/ 等分隔符而导致的同步失败问题。新逻辑能智能区分不同字段和分隔符,确保数据被正确分割。
1 parent 7ff70e6 commit 410b1f0

File tree

5 files changed

+71
-15
lines changed

5 files changed

+71
-15
lines changed

clients/dlsite_client.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,16 @@ async def get_game_detail(self, url):
125125
key = th.get_text(strip=True)
126126

127127
def extract_list_from_td(table_cell):
128+
# Replace <br> tags with a common separator
128129
for br in table_cell.find_all("br"):
129-
br.replace_with("/")
130-
all_text = table_cell.get_text(separator="/", strip=True)
131-
return [name.strip() for name in all_text.split("/") if name.strip()]
130+
br.replace_with(",")
131+
132+
# Get all text, using the common separator
133+
all_text = table_cell.get_text(separator=",")
134+
135+
# Standardize all separators to the common one and then split
136+
processed_text = all_text.replace('、', ',').replace('/', ',').replace(',', ',')
137+
return [name.strip() for name in processed_text.split(',') if name.strip()]
132138

133139
if key in self.STAFF_MAPPING:
134140
details[self.STAFF_MAPPING[key]] = extract_list_from_td(td)

clients/notion_client.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -375,14 +375,34 @@ async def create_or_update_game(self, properties_schema: dict, page_id=None, **i
375375
options = []
376376
values_to_process = value if isinstance(value, list) else [value]
377377

378+
# For tags, trust the TagManager and don't split further.
378379
if notion_prop_name == FIELDS["tags"]:
379380
for item in values_to_process:
380381
if item:
381382
options.append(str(item))
382383
else:
384+
# For other multi-selects, use a smart splitting heuristic.
383385
for item in values_to_process:
384-
if item:
385-
options.append(str(item))
386+
if not isinstance(item, str):
387+
if item:
388+
options.append(str(item))
389+
continue
390+
391+
# Heuristic: Only split by '/' if it doesn't create very short segments.
392+
# This helps differentiate "Artist A / Artist B" from "Windows 7 / 8".
393+
use_slash_as_separator = True
394+
if '/' in item:
395+
# Threshold for a segment to be considered "too short"
396+
MIN_SEGMENT_LEN = 3
397+
if any(len(part.strip()) < MIN_SEGMENT_LEN for part in item.split('/')):
398+
use_slash_as_separator = False
399+
400+
# Standardize separators
401+
processed_item = item.replace(',', ',').replace('、', ',')
402+
if use_slash_as_separator:
403+
processed_item = processed_item.replace('/', ',')
404+
405+
options.extend([opt.strip() for opt in processed_item.split(',') if opt.strip()])
386406

387407
if options:
388408
unique_options = list(dict.fromkeys(options))

core/driver_factory.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,26 +89,42 @@ async def get_driver(self, driver_key: str) -> Optional[WebDriver]:
8989
logger.warn(f"{driver_key} 既未创建也无创建任务。可能需要先调用 start_background_creation。")
9090
return None
9191

92-
def shutdown(self):
93-
"""关闭所有驱动并停止后台事件循环。"""
92+
def shutdown_sync(self):
93+
"""同步关闭所有驱动并停止后台事件循环。会阻塞调用线程。"""
9494
if not self._loop:
9595
return
9696
logger.system("正在关闭驱动工厂...")
97-
# 提交关闭所有驱动的任务
9897
if self._drivers or self._creation_futures:
9998
future = asyncio.run_coroutine_threadsafe(self.close_all_drivers(), self._loop)
10099
try:
101-
future.result(timeout=10) # 等待关闭完成
100+
# 在同步上下文中,我们阻塞等待,直到驱动程序关闭
101+
future.result()
102102
except Exception as e:
103103
logger.error(f"关闭驱动时发生错误: {e}")
104104

105-
# 停止事件循环
106105
if self._loop.is_running():
107106
self._loop.call_soon_threadsafe(self._loop.stop)
108107

109-
# 等待线程结束
110108
if self._thread:
111-
self._thread.join(timeout=5)
109+
self._thread.join()
110+
logger.system("驱动工厂已关闭。")
111+
112+
async def shutdown_async(self):
113+
"""异步关闭所有驱动并停止后台事件循环。"""
114+
if not self._loop:
115+
return
116+
logger.system("正在关闭驱动工厂...")
117+
if self._drivers or self._creation_futures:
118+
await self.close_all_drivers()
119+
120+
if self._loop.is_running():
121+
self._loop.call_soon_threadsafe(self._loop.stop)
122+
123+
if self._thread:
124+
# 在异步函数中,为了不阻塞事件循环,我们不能直接join
125+
# 但由于这是程序退出的最后一步,短暂的阻塞是可以接受的
126+
# 更好的方法是使用 to_thread,但对于退出逻辑,join是清晰的
127+
await asyncio.to_thread(self._thread.join)
112128
logger.system("驱动工厂已关闭。")
113129

114130
async def close_all_drivers(self):

core/init.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ async def init_context():
1515

1616
async def close_context(context: dict):
1717
# Shutdown browser drivers first
18-
driver_factory.shutdown()
18+
await driver_factory.shutdown_async()
1919

2020
# Close loop-specific resources
2121
if context.get("async_client"):

utils/tag_manager.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,18 @@ async def process_tags(
164164
interaction_provider: InteractionProvider,
165165
) -> List[str]:
166166
async with self._interaction_lock:
167+
168+
def _split_tags(tags: List[str]) -> List[str]:
169+
processed_tags = []
170+
for tag in tags:
171+
# 拆分可能包含逗号或分号的标签字符串
172+
processed_tags.extend([t.strip() for t in tag.replace(',', ',').replace(';', ',').split(',') if t.strip()])
173+
return processed_tags
174+
175+
dlsite_tags = _split_tags(dlsite_tags)
176+
fanza_tags = _split_tags(fanza_tags)
177+
ggbases_tags = _split_tags(ggbases_tags)
178+
167179
translated_tags = []
168180
source_maps = [
169181
(dlsite_tags, self._jp_to_cn_map, TAG_JP_TO_CN_PATH, "DLsite"),
@@ -179,12 +191,14 @@ async def process_tags(
179191
tag, source_map, map_path, source_name, interaction_provider
180192
)
181193
if translation:
182-
translated_tags.append(translation)
194+
# Also split the translated tags
195+
translated_tags.extend([t.strip() for t in translation.replace(',', ',').replace(';', ',').split(',') if t.strip()])
183196

184197
for tag in ggbases_tags:
185198
translated = self._ggbase_map.get(tag, tag) or tag
186199
if translated:
187-
translated_tags.append(translated)
200+
# Also split the translated tags
201+
translated_tags.extend([t.strip() for t in translated.replace(',', ',').replace(';', ',').split(',') if t.strip()])
188202

189203
final_tags_set: Set[str] = set()
190204
for concept in list(dict.fromkeys(translated_tags)):

0 commit comments

Comments
 (0)