Skip to content

Commit 7ff70e6

Browse files
committed
feat: 增强品牌匹配并修复客户端问题
- 增强 Fanza 客户端: - 优先使用 og:image 提取封面图,兼容未发售游戏。 - 实现智能品牌匹配: - 修复日志乱码:
1 parent e3bc981 commit 7ff70e6

File tree

10 files changed

+120
-33
lines changed

10 files changed

+120
-33
lines changed

clients/fanza_client.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,19 @@ def extract_list(value_div: Tag | None) -> list[str]:
150150
if tags_div := find_row_value("ジャンル"):
151151
details["标签"] = [a.get_text(strip=True) for a in tags_div.select("li a")]
152152

153-
if cover_img_tag := soup.select_one(".productPreview__mainImage img, #fn-main_image"):
154-
if cover_img_tag.has_attr("src"):
155-
details["封面图链接"] = urljoin(self.base_url, cover_img_tag["src"])
153+
# 优先使用 og:image 获取封面图
154+
if cover_tag := soup.find("meta", property="og:image"):
155+
details["封面图链接"] = urljoin(self.base_url, cover_tag["content"])
156+
else:
157+
# 兼容旧的封面图提取逻辑,并为未发售游戏增加选择器
158+
cover_selector = (
159+
".productPreview__mainImage img, "
160+
"#fn-main_image, "
161+
".main-visual img"
162+
)
163+
if cover_img_tag := soup.select_one(cover_selector):
164+
if src := cover_img_tag.get("src"):
165+
details["封面图链接"] = urljoin(self.base_url, src)
156166
if title_tag := soup.select_one("h1.productTitle__txt"):
157167
details["标题"] = title_tag.get_text(strip=True)
158168
if price_tag := soup.select_one(".priceInformation__price"):

clients/notion_client.py

Lines changed: 34 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from config.config_fields import FIELDS
1111
from utils import logger
12-
from utils.utils import convert_date_jp_to_iso
12+
from utils.utils import convert_date_jp_to_iso, normalize_brand_name
1313

1414

1515
class NotionClient:
@@ -23,6 +23,7 @@ def __init__(self, token, game_db_id, brand_db_id, client: httpx.AsyncClient):
2323
"Notion-Version": "2022-06-28",
2424
"Content-Type": "application/json",
2525
}
26+
self._all_brands_cache = None
2627

2728
async def _request(self, method, url, json_data=None):
2829
try:
@@ -88,29 +89,39 @@ async def check_page_exists(self, page_id):
8889
except Exception:
8990
return False
9091

91-
async def search_brand(self, brand_name):
92-
url = f"https://api.notion.com/v1/databases/{self.brand_db_id}/query"
93-
payload = {"filter": {"property": FIELDS["brand_name"], "title": {"equals": brand_name}}}
94-
resp = await self._request("POST", url, payload)
95-
return resp.get("results", []) if resp else []
92+
async def get_all_brands(self):
93+
if self._all_brands_cache is not None:
94+
return self._all_brands_cache
95+
96+
all_pages = await self.get_all_pages_from_db(self.brand_db_id)
97+
brands = []
98+
for page in all_pages:
99+
props = page.get("properties", {})
100+
title_data = props.get(FIELDS["brand_name"], {}).get("title", [])
101+
title = "".join([t.get("plain_text", "") for t in title_data]).strip()
102+
if title:
103+
brands.append({"title": title, "id": page["id"]})
104+
self._all_brands_cache = brands
105+
return brands
96106

97107
async def get_brand_details_by_name(self, name: str) -> dict | None:
98108
"""根据品牌名称查找品牌,并返回其ID和图标状态。"""
99-
results = await self.search_brand(name)
100-
if not results:
109+
if not name:
101110
return None
102-
103-
# 假设第一个结果是正确的
104-
page = results[0]
105-
page_id = page.get("id")
106-
properties = page.get("properties", {})
107-
icon_prop = properties.get(FIELDS["brand_icon"], {})
108-
109-
# 修正:仅当品牌logo文件(files属性)存在时,才认为已有图标。
110-
# 移除 or bool(page.get("icon")),避免将页面的Emoji误判为有效图标。
111-
has_icon = bool(icon_prop.get("files"))
112-
113-
return {"page_id": page_id, "has_icon": has_icon}
111+
112+
all_brands = await self.get_all_brands()
113+
normalized_name = normalize_brand_name(name)
114+
115+
for brand in all_brands:
116+
if normalize_brand_name(brand["title"]) == normalized_name:
117+
page = await self.get_page(brand["id"])
118+
if not page:
119+
continue
120+
properties = page.get("properties", {})
121+
icon_prop = properties.get(FIELDS["brand_icon"], {})
122+
has_icon = bool(icon_prop.get("files"))
123+
return {"page_id": brand["id"], "has_icon": has_icon}
124+
return None
114125

115126
async def get_all_game_titles(self):
116127
url = f"https://api.notion.com/v1/databases/{self.game_db_id}/query"
@@ -399,8 +410,9 @@ async def create_or_update_game(self, properties_schema: dict, page_id=None, **i
399410

400411
async def create_or_update_brand(self, brand_name, page_id=None, **info):
401412
if not page_id:
402-
existing = await self.search_brand(brand_name)
403-
page_id = existing[0]["id"] if existing else None
413+
brand_details = await self.get_brand_details_by_name(brand_name)
414+
if brand_details:
415+
page_id = brand_details.get("id")
404416

405417
schema_data = await self.get_database_schema(self.brand_db_id)
406418
if not schema_data:

core/context_factory.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from clients.notion_client import NotionClient
1212
from config.config_token import BRAND_DB_ID, CHARACTER_DB_ID, GAME_DB_ID, NOTION_TOKEN
1313
from core.interaction import InteractionProvider
14-
from core.mapping_manager import BangumiMappingManager
14+
from core.mapping_manager import BangumiMappingManager, BrandMappingManager
1515
from core.name_splitter import NameSplitter
1616
from core.schema_manager import NotionSchemaManager
1717
from utils import logger
@@ -30,6 +30,7 @@ def create_shared_context():
3030
# 管理器是共享的
3131
tag_manager = TagManager()
3232
name_splitter = NameSplitter()
33+
brand_mapping_manager = BrandMappingManager()
3334

3435
brand_cache = BrandCache()
3536
brand_cache.load_cache()
@@ -43,6 +44,7 @@ def create_shared_context():
4344
"data_manager": data_manager,
4445
"tag_manager": tag_manager,
4546
"name_splitter": name_splitter,
47+
"brand_mapping_manager": brand_mapping_manager,
4648
}
4749

4850

core/gui_worker.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,8 @@ async def game_flow(self) -> bool:
257257
secondary_tasks["ggbases_info"] = self.context["ggbases"].get_info_by_url_with_selenium(ggbases_url)
258258

259259
# --- 准备品牌任务 ---
260-
brand_name = detail.get("品牌")
260+
raw_brand_name = detail.get("品牌")
261+
brand_name = self.context["brand_mapping_manager"].get_canonical_name(raw_brand_name)
261262
brand_page_id, needs_fetching = await check_brand_status(self.context, brand_name)
262263
if needs_fetching and brand_name:
263264
logger.step(f"品牌 '{brand_name}' 需要抓取新信息...")

core/mapping_manager.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,55 @@
88
from utils.similarity_check import get_close_matches_with_ratio
99
from utils import logger
1010
from core.interaction import InteractionProvider
11+
from utils.utils import normalize_brand_name
1112

1213

1314
MAPPING_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "mapping")
1415
BGM_PROP_MAPPING_PATH = os.path.join(MAPPING_DIR, "bangumi_prop_mapping.json")
1516
BGM_IGNORE_LIST_PATH = os.path.join(MAPPING_DIR, "bangumi_ignore_list.json")
17+
BRAND_MAPPING_PATH = os.path.join(MAPPING_DIR, "brand_mapping.json")
1618

1719
DB_ID_TO_NAMESPACE = {
1820
GAME_DB_ID: "games",
1921
CHARACTER_DB_ID: "characters",
2022
BRAND_DB_ID: "brands",
2123
}
2224

25+
class BrandMappingManager:
26+
def __init__(self, file_path: str = BRAND_MAPPING_PATH):
27+
self.file_path = file_path
28+
self._mapping: Dict[str, List[str]] = {}
29+
self._reverse_mapping: Dict[str, str] = {}
30+
self._load_mapping()
31+
32+
def _load_mapping(self):
33+
if not os.path.exists(self.file_path):
34+
logger.warn(f"品牌映射文件不存在: {self.file_path}")
35+
return
36+
try:
37+
with open(self.file_path, "r", encoding="utf-8") as f:
38+
content = f.read()
39+
self._mapping = json.loads(content) if content else {}
40+
except (json.JSONDecodeError, IOError) as e:
41+
logger.error(f"加载品牌映射文件失败: {e}")
42+
self._mapping = {}
43+
self._build_reverse_mapping()
44+
45+
def _build_reverse_mapping(self):
46+
self._reverse_mapping = {}
47+
for canonical_name, aliases in self._mapping.items():
48+
# The canonical name itself is an alias
49+
normalized_canonical = normalize_brand_name(canonical_name)
50+
self._reverse_mapping[normalized_canonical] = canonical_name
51+
for alias in aliases:
52+
normalized_alias = normalize_brand_name(alias)
53+
self._reverse_mapping[normalized_alias] = canonical_name
54+
55+
def get_canonical_name(self, name: str) -> str:
56+
if not name:
57+
return ""
58+
normalized_name = normalize_brand_name(name)
59+
return self._reverse_mapping.get(normalized_name, name)
2360

2461
class BangumiMappingManager:
2562
def __init__(self, interaction_provider: InteractionProvider, file_path: str = BGM_PROP_MAPPING_PATH):
@@ -212,4 +249,4 @@ async def handle_new_key(
212249
# Default case if action is unknown or None
213250
logger.error("无效操作,将忽略此属性。")
214251
self.ignore_key_session(bangumi_key)
215-
return None
252+
return None

main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ async def run_single_game_flow(context: dict) -> bool:
153153
secondary_tasks["ggbases_info"] = context["ggbases"].get_info_by_url_with_selenium(ggbases_url)
154154

155155
# --- 准备品牌任务 ---
156-
brand_name = detail.get("品牌")
156+
raw_brand_name = detail.get("品牌")
157+
brand_name = context["brand_mapping_manager"].get_canonical_name(raw_brand_name)
157158
brand_page_id, needs_fetching = await check_brand_status(context, brand_name)
158159
if needs_fetching and brand_name:
159160
logger.step(f"品牌 '{brand_name}' 需要抓取新信息...")

mapping/tag_fanza_to_cn.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"アナル": "肛交",
66
"ウェイトレス": "女服务员",
77
"オナニー": "自慰",
8+
"コスプレ": "角色扮演",
89
"シナリオがいい": "剧情不错",
910
"ダーク系": "暗黑系",
1011
"ツインテール": "双马尾",
@@ -36,10 +37,12 @@
3637
"恋愛": "恋爱",
3738
"悪堕ち": "恶堕",
3839
"母乳": "母乳",
40+
"水着": "泳装",
3941
"泣きゲー": "催泪",
4042
"淫乱": "淫乱",
4143
"淫語": "淫语",
4244
"演出がいい": "声优不错",
45+
"濡れスケ": "湿身透视",
4346
"田舎が舞台のゲーム": "乡下",
4447
"癒されるゲーム": "治愈",
4548
"褐色肌": "小麦肤",

mapping/tag_ignore_list.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"セット商品",
1717
"デモ・体験版あり",
1818
"ブラウザ対応",
19+
"予約作品最大20%ポイント還元キャンペーン",
1920
"初心者おすすめ",
2021
"感謝祭の最大16%ポイント還元キャンペーン 第1弾",
2122
"感謝祭の最大16%ポイント還元キャンペーン 第2弾",

utils/similarity_check.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,15 +234,15 @@ def _interactive_selection():
234234
choice, sorted_candidates = await asyncio.to_thread(_interactive_selection)
235235

236236
if choice == "s":
237-
logger.info("已选择跳过。 ולאחר מכן")
237+
logger.info("已选择跳过。")
238238
return False, cached_titles, "skip", None
239239
elif choice == "c":
240240
confirm_check = await notion_client.search_game(new_title)
241241
if confirm_check:
242-
logger.warn("注意:你选择了强制新建,但Notion中已存在完全同名的游戏,自动转为更新。 ולאחר מכן")
242+
logger.warn("注意:你选择了强制新建,但Notion中已存在完全同名的游戏,自动转为更新。")
243243
return True, cached_titles, "update", confirm_check[0].get("id")
244244
else:
245-
logger.success("确认创建为新游戏。 ולאחר מכן")
245+
logger.success("确认创建为新游戏。")
246246
return True, cached_titles, "create", None
247247
else: # 默认为 u
248248
selected_id = sorted_candidates[0][0].get("id")

utils/utils.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,26 @@
44
from datetime import datetime
55

66

7+
def normalize_brand_name(name: str) -> str:
8+
if not name:
9+
return ""
10+
# 全角转半角
11+
full_width_chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ "
12+
half_width_chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ "
13+
translator = str.maketrans(full_width_chars, half_width_chars)
14+
name = name.translate(translator)
15+
16+
# 统一小写
17+
name = name.lower()
18+
19+
# 移除特殊符号
20+
name = re.sub(r'[\'"`’.,!@#$%^&*()_\-+\\=[\\]{};:<>/?~]', ' ', name)
21+
22+
# 多个空格合并为一个
23+
name = re.sub(r'\s+', ' ', name).strip()
24+
25+
return name
26+
727
def extract_main_keyword(raw_keyword):
828
pattern = re.compile(r"[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FFA-Za-z0-9\-〜~]+")
929
matches = pattern.findall(raw_keyword)
@@ -37,4 +57,4 @@ def convert_date_jp_to_iso(date_str):
3757
except ValueError:
3858
continue
3959

40-
return None
60+
return None

0 commit comments

Comments
 (0)