feat: 增强品牌匹配并修复客户端问题

baiheyyds · baiheyyds · commit 7ff70e64b712 · 2025-10-02T19:19:39.000+08:00
- 增强 Fanza 客户端:
    - 优先使用 og:image 提取封面图，兼容未发售游戏。

   - 实现智能品牌匹配:
   - 修复日志乱码:
diff --git a/clients/fanza_client.py b/clients/fanza_client.py
@@ -150,9 +150,19 @@ def extract_list(value_div: Tag | None) -> list[str]:
                 if tags_div := find_row_value("ジャンル"):
                     details["标签"] = [a.get_text(strip=True) for a in tags_div.select("li a")]
 
-            if cover_img_tag := soup.select_one(".productPreview__mainImage img, #fn-main_image"):
-                if cover_img_tag.has_attr("src"):
-                    details["封面图链接"] = urljoin(self.base_url, cover_img_tag["src"])
+            # 优先使用 og:image 获取封面图
+            if cover_tag := soup.find("meta", property="og:image"):
+                details["封面图链接"] = urljoin(self.base_url, cover_tag["content"])
+            else:
+                # 兼容旧的封面图提取逻辑，并为未发售游戏增加选择器
+                cover_selector = (
+                    ".productPreview__mainImage img, "
+                    "#fn-main_image, "
+                    ".main-visual img"
+                )
+                if cover_img_tag := soup.select_one(cover_selector):
+                    if src := cover_img_tag.get("src"):
+                        details["封面图链接"] = urljoin(self.base_url, src)
             if title_tag := soup.select_one("h1.productTitle__txt"):
                 details["标题"] = title_tag.get_text(strip=True)
             if price_tag := soup.select_one(".priceInformation__price"):
diff --git a/clients/notion_client.py b/clients/notion_client.py
@@ -9,7 +9,7 @@
 
 from config.config_fields import FIELDS
 from utils import logger
-from utils.utils import convert_date_jp_to_iso
+from utils.utils import convert_date_jp_to_iso, normalize_brand_name
 
 
 class NotionClient:
@@ -23,6 +23,7 @@ def __init__(self, token, game_db_id, brand_db_id, client: httpx.AsyncClient):
             "Notion-Version": "2022-06-28",
             "Content-Type": "application/json",
         }
+        self._all_brands_cache = None
 
     async def _request(self, method, url, json_data=None):
         try:
@@ -88,29 +89,39 @@ async def check_page_exists(self, page_id):
         except Exception:
             return False
 
-    async def search_brand(self, brand_name):
-        url = f"https://api.notion.com/v1/databases/{self.brand_db_id}/query"
-        payload = {"filter": {"property": FIELDS["brand_name"], "title": {"equals": brand_name}}}
-        resp = await self._request("POST", url, payload)
-        return resp.get("results", []) if resp else []
+    async def get_all_brands(self):
+        if self._all_brands_cache is not None:
+            return self._all_brands_cache
+
+        all_pages = await self.get_all_pages_from_db(self.brand_db_id)
+        brands = []
+        for page in all_pages:
+            props = page.get("properties", {})
+            title_data = props.get(FIELDS["brand_name"], {}).get("title", [])
+            title = "".join([t.get("plain_text", "") for t in title_data]).strip()
+            if title:
+                brands.append({"title": title, "id": page["id"]})
+        self._all_brands_cache = brands
+        return brands
 
     async def get_brand_details_by_name(self, name: str) -> dict | None:
         """根据品牌名称查找品牌，并返回其ID和图标状态。"""
-        results = await self.search_brand(name)
-        if not results:
+        if not name:
             return None
-
-        # 假设第一个结果是正确的
-        page = results[0]
-        page_id = page.get("id")
-        properties = page.get("properties", {})
-        icon_prop = properties.get(FIELDS["brand_icon"], {})
-
-        # 修正：仅当品牌logo文件（files属性）存在时，才认为已有图标。
-        # 移除 or bool(page.get("icon"))，避免将页面的Emoji误判为有效图标。
-        has_icon = bool(icon_prop.get("files"))
-
-        return {"page_id": page_id, "has_icon": has_icon}
+        
+        all_brands = await self.get_all_brands()
+        normalized_name = normalize_brand_name(name)
+
+        for brand in all_brands:
+            if normalize_brand_name(brand["title"]) == normalized_name:
+                page = await self.get_page(brand["id"])
+                if not page:
+                    continue
+                properties = page.get("properties", {})
+                icon_prop = properties.get(FIELDS["brand_icon"], {})
+                has_icon = bool(icon_prop.get("files"))
+                return {"page_id": brand["id"], "has_icon": has_icon}
+        return None
 
     async def get_all_game_titles(self):
         url = f"https://api.notion.com/v1/databases/{self.game_db_id}/query"
@@ -399,8 +410,9 @@ async def create_or_update_game(self, properties_schema: dict, page_id=None, **i
 
     async def create_or_update_brand(self, brand_name, page_id=None, **info):
         if not page_id:
-            existing = await self.search_brand(brand_name)
-            page_id = existing[0]["id"] if existing else None
+            brand_details = await self.get_brand_details_by_name(brand_name)
+            if brand_details:
+                page_id = brand_details.get("id")
 
         schema_data = await self.get_database_schema(self.brand_db_id)
         if not schema_data:
diff --git a/core/context_factory.py b/core/context_factory.py
@@ -11,7 +11,7 @@
 from clients.notion_client import NotionClient
 from config.config_token import BRAND_DB_ID, CHARACTER_DB_ID, GAME_DB_ID, NOTION_TOKEN
 from core.interaction import InteractionProvider
-from core.mapping_manager import BangumiMappingManager
+from core.mapping_manager import BangumiMappingManager, BrandMappingManager
 from core.name_splitter import NameSplitter
 from core.schema_manager import NotionSchemaManager
 from utils import logger
@@ -30,6 +30,7 @@ def create_shared_context():
     # 管理器是共享的
     tag_manager = TagManager()
     name_splitter = NameSplitter()
+    brand_mapping_manager = BrandMappingManager()
 
     brand_cache = BrandCache()
     brand_cache.load_cache()
@@ -43,6 +44,7 @@ def create_shared_context():
         "data_manager": data_manager,
         "tag_manager": tag_manager,
         "name_splitter": name_splitter,
+        "brand_mapping_manager": brand_mapping_manager,
     }
 
 
diff --git a/core/gui_worker.py b/core/gui_worker.py
@@ -257,7 +257,8 @@ async def game_flow(self) -> bool:
                         secondary_tasks["ggbases_info"] = self.context["ggbases"].get_info_by_url_with_selenium(ggbases_url)
 
             # --- 准备品牌任务 ---
-            brand_name = detail.get("品牌")
+            raw_brand_name = detail.get("品牌")
+            brand_name = self.context["brand_mapping_manager"].get_canonical_name(raw_brand_name)
             brand_page_id, needs_fetching = await check_brand_status(self.context, brand_name)
             if needs_fetching and brand_name:
                 logger.step(f"品牌 '{brand_name}' 需要抓取新信息...")
diff --git a/core/mapping_manager.py b/core/mapping_manager.py
@@ -8,18 +8,55 @@
 from utils.similarity_check import get_close_matches_with_ratio
 from utils import logger
 from core.interaction import InteractionProvider
+from utils.utils import normalize_brand_name
 
 
 MAPPING_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "mapping")
 BGM_PROP_MAPPING_PATH = os.path.join(MAPPING_DIR, "bangumi_prop_mapping.json")
 BGM_IGNORE_LIST_PATH = os.path.join(MAPPING_DIR, "bangumi_ignore_list.json")
+BRAND_MAPPING_PATH = os.path.join(MAPPING_DIR, "brand_mapping.json")
 
 DB_ID_TO_NAMESPACE = {
     GAME_DB_ID: "games",
     CHARACTER_DB_ID: "characters",
     BRAND_DB_ID: "brands",
 }
 
+class BrandMappingManager:
+    def __init__(self, file_path: str = BRAND_MAPPING_PATH):
+        self.file_path = file_path
+        self._mapping: Dict[str, List[str]] = {}
+        self._reverse_mapping: Dict[str, str] = {}
+        self._load_mapping()
+
+    def _load_mapping(self):
+        if not os.path.exists(self.file_path):
+            logger.warn(f"品牌映射文件不存在: {self.file_path}")
+            return
+        try:
+            with open(self.file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+                self._mapping = json.loads(content) if content else {}
+        except (json.JSONDecodeError, IOError) as e:
+            logger.error(f"加载品牌映射文件失败: {e}")
+            self._mapping = {}
+        self._build_reverse_mapping()
+
+    def _build_reverse_mapping(self):
+        self._reverse_mapping = {}
+        for canonical_name, aliases in self._mapping.items():
+            # The canonical name itself is an alias
+            normalized_canonical = normalize_brand_name(canonical_name)
+            self._reverse_mapping[normalized_canonical] = canonical_name
+            for alias in aliases:
+                normalized_alias = normalize_brand_name(alias)
+                self._reverse_mapping[normalized_alias] = canonical_name
+
+    def get_canonical_name(self, name: str) -> str:
+        if not name:
+            return ""
+        normalized_name = normalize_brand_name(name)
+        return self._reverse_mapping.get(normalized_name, name)
 
 class BangumiMappingManager:
     def __init__(self, interaction_provider: InteractionProvider, file_path: str = BGM_PROP_MAPPING_PATH):
@@ -212,4 +249,4 @@ async def handle_new_key(
             # Default case if action is unknown or None
             logger.error("无效操作，将忽略此属性。")
             self.ignore_key_session(bangumi_key)
-            return None
+            return None
diff --git a/main.py b/main.py
@@ -153,7 +153,8 @@ async def run_single_game_flow(context: dict) -> bool:
                     secondary_tasks["ggbases_info"] = context["ggbases"].get_info_by_url_with_selenium(ggbases_url)
 
         # --- 准备品牌任务 ---
-        brand_name = detail.get("品牌")
+        raw_brand_name = detail.get("品牌")
+        brand_name = context["brand_mapping_manager"].get_canonical_name(raw_brand_name)
         brand_page_id, needs_fetching = await check_brand_status(context, brand_name)
         if needs_fetching and brand_name:
             logger.step(f"品牌 '{brand_name}' 需要抓取新信息...")
diff --git a/mapping/tag_fanza_to_cn.json b/mapping/tag_fanza_to_cn.json
@@ -5,6 +5,7 @@
   "アナル": "肛交",
   "ウェイトレス": "女服务员",
   "オナニー": "自慰",
+  "コスプレ": "角色扮演",
   "シナリオがいい": "剧情不错",
   "ダーク系": "暗黑系",
   "ツインテール": "双马尾",
@@ -36,10 +37,12 @@
   "恋愛": "恋爱",
   "悪堕ち": "恶堕",
   "母乳": "母乳",
+  "水着": "泳装",
   "泣きゲー": "催泪",
   "淫乱": "淫乱",
   "淫語": "淫语",
   "演出がいい": "声优不错",
+  "濡れスケ": "湿身透视",
   "田舎が舞台のゲーム": "乡下",
   "癒されるゲーム": "治愈",
   "褐色肌": "小麦肤",
diff --git a/mapping/tag_ignore_list.json b/mapping/tag_ignore_list.json
@@ -16,6 +16,7 @@
   "セット商品",
   "デモ・体験版あり",
   "ブラウザ対応",
+  "予約作品最大20%ポイント還元キャンペーン",
   "初心者おすすめ",
   "感謝祭の最大16%ポイント還元キャンペーン 第1弾",
   "感謝祭の最大16%ポイント還元キャンペーン 第2弾",
diff --git a/utils/similarity_check.py b/utils/similarity_check.py
@@ -234,15 +234,15 @@ def _interactive_selection():
     choice, sorted_candidates = await asyncio.to_thread(_interactive_selection)
 
     if choice == "s":
-        logger.info("已选择跳过。 ולאחר מכן")
+        logger.info("已选择跳过。")
         return False, cached_titles, "skip", None
     elif choice == "c":
         confirm_check = await notion_client.search_game(new_title)
         if confirm_check:
-            logger.warn("注意：你选择了强制新建，但Notion中已存在完全同名的游戏，自动转为更新。 ולאחר מכן")
+            logger.warn("注意：你选择了强制新建，但Notion中已存在完全同名的游戏，自动转为更新。")
             return True, cached_titles, "update", confirm_check[0].get("id")
         else:
-            logger.success("确认创建为新游戏。 ולאחר מכן")
+            logger.success("确认创建为新游戏。")
             return True, cached_titles, "create", None
     else:  # 默认为 u
         selected_id = sorted_candidates[0][0].get("id")
diff --git a/utils/utils.py b/utils/utils.py
@@ -4,6 +4,26 @@
 from datetime import datetime
 
 
+def normalize_brand_name(name: str) -> str:
+    if not name:
+        return ""
+    # 全角转半角
+    full_width_chars = "０１２３４５６７８９ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ！＂＃＄％＆＇（）＊＋，－．／：；＜＝＞？＠［＼］＾＿｀｛｜｝～　"
+    half_width_chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ "
+    translator = str.maketrans(full_width_chars, half_width_chars)
+    name = name.translate(translator)
+    
+    # 统一小写
+    name = name.lower()
+    
+    # 移除特殊符号
+    name = re.sub(r'[\'"`’.,!@#$%^&*()_\-+\\=[\\]{};:<>/?~]', ' ', name)
+    
+    # 多个空格合并为一个
+    name = re.sub(r'\s+', ' ', name).strip()
+    
+    return name
+
 def extract_main_keyword(raw_keyword):
     pattern = re.compile(r"[\u4E00-\u9FFF\u3040-\u309F\u30A0-\u30FFA-Za-z0-9\-〜～]+")
     matches = pattern.findall(raw_keyword)
@@ -37,4 +57,4 @@ def convert_date_jp_to_iso(date_str):
         except ValueError:
             continue
 
-    return None
+    return None