Skip to content

Commit 5c9a6c5

Browse files
committed
对整个代码库进行了全面的重构与界面现代化改造,显著提升了代码质量、用户体验和可维护性。
### ✨ 主要变更 - **GUI Modernization** - 移除 `qdarkstyle`,改用 `gui/style.qss` 中的自定义浅色主题,带来更统一、清新的视觉体验。 - 完全重构「映射文件编辑器」:采用左右分栏布局,支持键搜索、动态增删键值对,并支持未保存状态追踪。 - 「批处理工具」区域引入 `FlowLayout`,实现响应式按钮布局。 - **Code Quality & Style** - 对几乎所有 `.py` 文件进行了格式化与 lint 修复。 - 移除了未使用的导入与多余空行,统一了代码风格。 - **Development & Testing** - 新增 `pyproject.toml`,为构建和包管理打下基础。 - 添加 `TagManager` 的单元测试(`tests/test_tag_manager.py`)。 - 在 `requirements.txt` 中新增开发依赖:`pytest`, `ruff`, `mypy`。 - **Refactor** - 重构 `TagManager` 构造函数,支持路径注入以便于测试。
1 parent ebd33e1 commit 5c9a6c5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+589
-326
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ venv/
1212
.venv/
1313

1414
# Application-generated files
15+
.mypy_cache/
16+
.pytest_cache
1517
cache/
1618
*.tmp
1719
*.bak_*

batch_updater.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
import asyncio
33
import logging
44
import re
5+
from typing import Any, Dict, List
6+
57
from tqdm import tqdm
6-
from typing import List, Dict, Any
78

8-
from core.init import init_context, close_context
9-
from config.config_token import GAME_DB_ID, BRAND_DB_ID, CHARACTER_DB_ID
109
from config.config_fields import FIELDS
10+
from config.config_token import BRAND_DB_ID, CHARACTER_DB_ID, GAME_DB_ID
11+
from core.init import close_context, init_context
1112

1213
# --- 可配置项 ---
1314
# 这现在是每一批次并发处理的数量
@@ -146,20 +147,20 @@ async def write_item_to_notion(context, item_data: Dict[str, Any], db_key: str):
146147
try:
147148
if db_key == "games":
148149
schema = context["schema_manager"].get_schema(config["id"])
149-
150+
150151
# [关键修复] 在提交通知前,对需要分割的字段进行处理
151152
fields_to_split = ["剧本", "原画", "声优", "音乐", "作品形式"]
152153
for field in fields_to_split:
153154
if field in bangumi_data:
154155
raw_values = bangumi_data[field]
155156
if not isinstance(raw_values, list):
156157
raw_values = [raw_values]
157-
158+
158159
processed_names = set()
159160
for raw_item in raw_values:
160161
split_results = await name_splitter.smart_split(raw_item, interaction_provider)
161162
processed_names.update(split_results)
162-
163+
163164
bangumi_data[field] = sorted(list(processed_names))
164165

165166
await notion_client.create_or_update_game(
@@ -293,4 +294,4 @@ def get_user_choice():
293294
if __name__ == "__main__":
294295
from utils.logger import setup_logging_for_cli
295296
setup_logging_for_cli()
296-
asyncio.run(main())
297+
asyncio.run(main())

clients/bangumi_client.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,12 @@
22
# 该模块用于与 Bangumi API 交互,获取游戏和角色信息
33
import asyncio
44
import logging
5-
from rapidfuzz import fuzz
6-
7-
import json
8-
import os
95
import re
10-
import time
116
import unicodedata
12-
from typing import Set
7+
from typing import Any, Dict, Set
138

149
import httpx
10+
from rapidfuzz import fuzz
1511

1612
from clients.notion_client import NotionClient
1713
from config.config_fields import FIELDS
@@ -134,9 +130,9 @@ async def search_and_select_bangumi_id(self, keyword: str) -> str | None:
134130
f"🔍 [Bangumi] 模糊匹配成功(放宽判定): {best['name']} (相似度 {candidates[0][0]:.2f})"
135131
)
136132
return str(best["id"])
137-
133+
138134
logging.warning("⚠️ Bangumi自动匹配相似度不足,请手动选择:")
139-
135+
140136
# Format candidates for display in GUI
141137
gui_candidates = []
142138
for idx, (ratio, item) in enumerate(candidates[:10]):
@@ -145,7 +141,7 @@ async def search_and_select_bangumi_id(self, keyword: str) -> str | None:
145141

146142
# Use the interaction provider to get the user's choice
147143
selected_id = await self.interaction_provider.get_bangumi_game_choice(keyword, gui_candidates)
148-
144+
149145
return selected_id
150146

151147
async def fetch_game(self, subject_id: str) -> dict:
@@ -170,9 +166,9 @@ async def fetch_game(self, subject_id: str) -> dict:
170166
game_data.update(infobox_data)
171167
return game_data
172168

173-
async def _process_infobox(self, infobox: list, target_db_id: str, bangumi_url: str) -> dict:
174-
processed = {}
175-
if not infobox:
169+
async def _process_infobox(self, infobox: list, target_db_id: str | None, bangumi_url: str) -> dict:
170+
processed: Dict[str, Any] = {}
171+
if not infobox or not target_db_id:
176172
return processed
177173

178174
async def _map_and_set_prop(key, value):
@@ -265,7 +261,7 @@ async def fetch_characters(self, subject_id: str) -> list:
265261

266262
characters = []
267263
for char_summary, detail_resp in zip(char_list_with_actors, responses):
268-
if isinstance(detail_resp, Exception) or detail_resp.status_code != 200:
264+
if not isinstance(detail_resp, httpx.Response) or detail_resp.status_code != 200:
269265
continue
270266

271267
detail = detail_resp.json()
@@ -327,7 +323,7 @@ async def create_or_update_character(self, char: dict, warned_keys: Set[str]) ->
327323
"BWH": FIELDS["character_bwh"],
328324
"身高": FIELDS["character_height"],
329325
}
330-
props = {}
326+
props: Dict[str, Any] = {}
331327
for internal_key, value in char.items():
332328
if not value:
333329
continue
@@ -383,7 +379,7 @@ async def create_or_link_characters(self, game_page_id: str, subject_id: str):
383379
"PATCH", f"https://api.notion.com/v1/pages/{game_page_id}", patch
384380
)
385381
return
386-
warned_keys_for_this_game = set()
382+
warned_keys_for_this_game: Set[str] = set()
387383
tasks = [
388384
self.create_or_update_character(ch, warned_keys_for_this_game) for ch in characters
389385
]

clients/base_client.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
import logging
44
import os
5+
56
import httpx
67

78

@@ -45,20 +46,20 @@ async def _request(self, method: str, url: str, **kwargs) -> httpx.Response | No
4546
try:
4647
# 确保URL是绝对路径
4748
full_url = url if url.startswith("http") else f"{self.base_url}{url}"
48-
49+
4950
# 合并默认headers和调用时传入的headers
5051
request_headers = self.headers.copy()
5152
if "headers" in kwargs:
5253
request_headers.update(kwargs.pop("headers"))
5354

5455
logging.info(f"🔍 [{self.__class__.__name__}] {method.upper()} {full_url}")
55-
56+
5657
response = await self.client.request(method, full_url, headers=request_headers, **kwargs)
5758
response.raise_for_status()
58-
59+
5960
logging.debug(f"✅ [{self.__class__.__name__}] 请求成功: {response.status_code} {response.reason_phrase}")
6061
return response
61-
62+
6263
except httpx.HTTPStatusError as e:
6364
logging.error(f"❌ [{self.__class__.__name__}] 请求失败: {e.response.status_code} for url: {e.request.url}")
6465
logging.error(f" -> 响应: {e.response.text[:300]}") # 打印部分响应内容

clients/brand_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,4 @@ def _hash_content(self, data: dict) -> str:
9595
serialized = json.dumps(data, sort_keys=True)
9696
return hashlib.md5(serialized.encode("utf-8")).hexdigest()
9797
except Exception:
98-
return ""
98+
return ""

clients/dlsite_client.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from selenium.webdriver.support.ui import WebDriverWait
1313
from selenium_stealth import stealth
1414

15-
from utils.driver import create_driver
1615
from utils.tag_logger import append_new_tags
16+
1717
from .base_client import BaseClient
1818

1919
TAG_JP_PATH = os.path.join(os.path.dirname(__file__), "..", "mapping", "tag_jp_to_cn.json")
@@ -48,7 +48,7 @@ async def search(self, keyword, limit=30):
4848
logging.info(f"🔍 [Dlsite] 正在搜索关键词: {keyword}")
4949
query = urllib.parse.quote_plus(keyword)
5050
url = f"/maniax/fsr/=/language/jp/sex_category%5B0%5D/male/keyword/{query}/work_category%5B0%5D/doujin/work_category%5B1%5D/books/work_category%5B2%5D/pc/work_category%5B3%5D/app/order%5B0%5D/trend/options_and_or/and/per_page/30/page/1/from/fs.header"
51-
51+
5252
resp = await self.get(url, timeout=15)
5353
if not resp:
5454
return []
@@ -128,10 +128,10 @@ def extract_list_from_td(table_cell):
128128
# Replace <br> tags with a common separator
129129
for br in table_cell.find_all("br"):
130130
br.replace_with(",")
131-
131+
132132
# Get all text, using the common separator
133133
all_text = table_cell.get_text(separator=",")
134-
134+
135135
# Standardize all separators to the common one and then split
136136
processed_text = all_text.replace('、', ',').replace('/', ',').replace(',', ',')
137137
return [name.strip() for name in processed_text.split(',') if name.strip()]
@@ -234,4 +234,4 @@ def _blocking_task():
234234
)
235235
return {}
236236

237-
return await asyncio.to_thread(_blocking_task)
237+
return await asyncio.to_thread(_blocking_task)

clients/fanza_client.py

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# clients/fanza_client.py
22
import logging
33
import re
4+
from typing import Any, Dict, List
45
from urllib.parse import quote, urljoin
56

67
from bs4 import BeautifulSoup, Tag
@@ -20,26 +21,32 @@ async def search(self, keyword: str, limit=30):
2021
encoded_keyword = quote(keyword.encode("utf-8", errors="ignore"))
2122
url = f"/search/?service=pcgame&searchstr={encoded_keyword}&sort=date"
2223
resp = await self.get(url, timeout=15, cookies=self.cookies)
23-
24+
2425
results = []
2526
if resp:
2627
soup = BeautifulSoup(resp.text, "lxml")
2728
result_list = soup.select_one("ul.component-legacy-productTile")
2829
if result_list:
2930
for li in result_list.find_all("li", class_="component-legacy-productTile__item", limit=limit):
31+
if not isinstance(li, Tag):
32+
continue
3033
title_tag = li.select_one(".component-legacy-productTile__title")
3134
price_tag = li.select_one(".component-legacy-productTile__price")
3235
url_tag = li.select_one("a.component-legacy-productTile__detailLink")
3336
type_tag = li.select_one(".component-legacy-productTile__relatedInfo")
3437
item_type = type_tag.get_text(strip=True) if type_tag else "未知"
3538

36-
if not (title_tag and url_tag and url_tag.has_attr("href")):
39+
if not (title_tag and url_tag):
40+
continue
41+
42+
href = url_tag.get("href")
43+
if not isinstance(href, str):
3744
continue
3845

3946
title = title_tag.get_text(strip=True)
4047
price_text = price_tag.get_text(strip=True) if price_tag else "未知"
4148
price = price_text.split("円")[0].replace(",", "").strip()
42-
full_url = urljoin(self.base_url, url_tag["href"])
49+
full_url = urljoin(self.base_url, href)
4350

4451
results.append({
4552
"title": title, "url": full_url,
@@ -59,13 +66,13 @@ async def search(self, keyword: str, limit=30):
5966
if final_count > 0:
6067
logging.info(f"✅ [Fanza] 主搜索成功,找到 {initial_count} 个原始结果,筛选后剩余 {final_count} 个游戏。")
6168
return filtered_results
62-
69+
6370
# --- 后备搜索逻辑 (如果主搜索无结果) ---
6471
logging.warning("⚠️ [Fanza] 主搜索 (dlsoft) 未找到结果,尝试后备搜索 (mono)...")
65-
72+
6673
fallback_base_url = "https://www.dmm.co.jp"
6774
url_fallback = f"{fallback_base_url}/mono/-/search/=/searchstr={encoded_keyword}/sort=date/"
68-
75+
6976
resp_fallback = await self.get(url_fallback, timeout=15, cookies=self.cookies)
7077
if not resp_fallback:
7178
logging.error("❌ [Fanza] 后备搜索请求失败。")
@@ -79,24 +86,30 @@ async def search(self, keyword: str, limit=30):
7986
return []
8087

8188
for li in result_list_fallback.find_all("li", limit=limit):
89+
if not isinstance(li, Tag):
90+
continue
8291
url_tag = li.select_one(".tmb a")
8392
if not url_tag: continue
84-
93+
8594
title_tag = url_tag.select_one(".txt")
8695
price_tag = li.select_one(".price")
8796

88-
if not (title_tag and url_tag.has_attr("href")): continue
97+
if not (title_tag and url_tag): continue
98+
99+
href = url_tag.get("href")
100+
if not isinstance(href, str):
101+
continue
89102

90103
title = title_tag.get_text(strip=True)
91104
price_text = price_tag.get_text(strip=True) if price_tag else "未知"
92105
price = price_text.split("円")[0].replace(",", "").strip()
93-
full_url = urljoin(fallback_base_url, url_tag["href"])
106+
full_url = urljoin(fallback_base_url, href)
94107

95108
results_fallback.append({
96109
"title": title, "url": full_url,
97110
"价格": price or "未知", "类型": "未知(后备)",
98111
})
99-
112+
100113
initial_count_fallback = len(results_fallback)
101114
filtered_results_fallback = [
102115
item for item in results_fallback
@@ -117,28 +130,32 @@ async def get_game_detail(self, url: str) -> dict:
117130

118131
try:
119132
soup = BeautifulSoup(resp.text, "lxml")
120-
details = {}
133+
details: Dict[str, Any] = {}
134+
game_types: List[str] = []
121135

122136
# ==================================================================
123137
# 智能解析:根据URL判断使用哪套解析逻辑
124138
# ==================================================================
125139
if "/mono/" in url:
126140
# --- 旧版/后备接口 (`/mono/`) 的解析逻辑 ---
127141
logging.info("🔍 [Fanza] 检测到 /mono/ 链接,使用旧版表格解析器。")
128-
142+
129143
if title_tag := soup.select_one("h1#title"):
130144
details["标题"] = title_tag.get_text(strip=True)
131-
145+
132146
if cover_tag := soup.select_one("#sample-video img, .area-img img"):
133147
if src := cover_tag.get("src"):
134-
details["封面图链接"] = urljoin(self.base_url, src)
148+
if isinstance(src, str):
149+
details["封面图链接"] = urljoin(self.base_url, src)
135150

136151
if main_table := soup.select_one("table.mg-b20"):
137152
rows = main_table.find_all("tr")
138153
for row in rows:
154+
if not isinstance(row, Tag):
155+
continue
139156
cells = row.find_all("td")
140157
if len(cells) < 2: continue
141-
158+
142159
key = cells[0].get_text(strip=True)
143160
value_cell = cells[1]
144161

@@ -153,11 +170,9 @@ async def get_game_detail(self, url: str) -> dict:
153170
elif key.startswith("ジャンル"):
154171
details["标签"] = [a.get_text(strip=True) for a in value_cell.find_all("a")]
155172
elif "ゲームジャンル" in key:
156-
game_types = details.get("作品形式", [])
157173
genre_text = value_cell.get_text(strip=True).upper()
158174
for genre_key, genre_value in self._genre_reverse_mapping.items():
159175
if genre_key in genre_text: game_types.append(genre_value)
160-
if game_types: details["作品形式"] = list(dict.fromkeys(game_types))
161176
elif "ボイス" in key:
162177
if "あり" in value_cell.get_text(strip=True):
163178
game_types = details.get("作品形式", [])
@@ -168,6 +183,8 @@ async def get_game_detail(self, url: str) -> dict:
168183
logging.info("🔍 [Fanza] 未检测到 /mono/ 链接,使用新版解析器。")
169184
if top_table := soup.select_one(".contentsDetailTop__table"):
170185
for row in top_table.find_all("div", class_="contentsDetailTop__tableRow"):
186+
if not isinstance(row, Tag):
187+
continue
171188
key_tag = row.select_one(".contentsDetailTop__tableDataLeft p")
172189
value_tag = row.select_one(".contentsDetailTop__tableDataRight")
173190
if not (key_tag and value_tag): continue
@@ -200,7 +217,6 @@ def extract_list(value_div: Tag | None) -> list[str]:
200217
for key in details:
201218
if isinstance(details[key], list): details[key] = sorted(list(set(details[key])))
202219

203-
game_types = []
204220
if genre_div := find_row_value("ゲームジャンル"):
205221
genre_text = genre_div.get_text(strip=True).upper()
206222
for key, value in self._genre_reverse_mapping.items():
@@ -220,13 +236,16 @@ def extract_list(value_div: Tag | None) -> list[str]:
220236
cover_selector = (".productPreview__mainImage img, #fn-main_image, .main-visual img")
221237
if cover_img_tag := soup.select_one(cover_selector):
222238
if src := cover_img_tag.get("src"): details["封面图链接"] = urljoin(self.base_url, src)
223-
239+
224240
if title_tag := soup.select_one("h1.productTitle__txt"):
225241
details["标题"] = title_tag.get_text(strip=True)
226242
if price_tag := soup.select_one(".priceInformation__price"):
227243
details["价格"] = price_tag.get_text(strip=True).replace("円", "").replace(",", "")
228244

245+
if game_types:
246+
details["作品形式"] = sorted(list(dict.fromkeys(game_types)))
247+
229248
return details
230249
except Exception as e:
231250
logging.error(f"❌ [Fanza] 解析详情页失败: {e}")
232-
return {}
251+
return {}

0 commit comments

Comments
 (0)