11# clients/fanza_client.py
22import logging
33import re
4+ from typing import Any , Dict , List
45from urllib .parse import quote , urljoin
56
67from bs4 import BeautifulSoup , Tag
@@ -20,26 +21,32 @@ async def search(self, keyword: str, limit=30):
2021 encoded_keyword = quote (keyword .encode ("utf-8" , errors = "ignore" ))
2122 url = f"/search/?service=pcgame&searchstr={ encoded_keyword } &sort=date"
2223 resp = await self .get (url , timeout = 15 , cookies = self .cookies )
23-
24+
2425 results = []
2526 if resp :
2627 soup = BeautifulSoup (resp .text , "lxml" )
2728 result_list = soup .select_one ("ul.component-legacy-productTile" )
2829 if result_list :
2930 for li in result_list .find_all ("li" , class_ = "component-legacy-productTile__item" , limit = limit ):
31+ if not isinstance (li , Tag ):
32+ continue
3033 title_tag = li .select_one (".component-legacy-productTile__title" )
3134 price_tag = li .select_one (".component-legacy-productTile__price" )
3235 url_tag = li .select_one ("a.component-legacy-productTile__detailLink" )
3336 type_tag = li .select_one (".component-legacy-productTile__relatedInfo" )
3437 item_type = type_tag .get_text (strip = True ) if type_tag else "未知"
3538
36- if not (title_tag and url_tag and url_tag .has_attr ("href" )):
39+ if not (title_tag and url_tag ):
40+ continue
41+
42+ href = url_tag .get ("href" )
43+ if not isinstance (href , str ):
3744 continue
3845
3946 title = title_tag .get_text (strip = True )
4047 price_text = price_tag .get_text (strip = True ) if price_tag else "未知"
4148 price = price_text .split ("円" )[0 ].replace ("," , "" ).strip ()
42- full_url = urljoin (self .base_url , url_tag [ " href" ] )
49+ full_url = urljoin (self .base_url , href )
4350
4451 results .append ({
4552 "title" : title , "url" : full_url ,
@@ -59,13 +66,13 @@ async def search(self, keyword: str, limit=30):
5966 if final_count > 0 :
6067 logging .info (f"✅ [Fanza] 主搜索成功,找到 { initial_count } 个原始结果,筛选后剩余 { final_count } 个游戏。" )
6168 return filtered_results
62-
69+
6370 # --- 后备搜索逻辑 (如果主搜索无结果) ---
6471 logging .warning ("⚠️ [Fanza] 主搜索 (dlsoft) 未找到结果,尝试后备搜索 (mono)..." )
65-
72+
6673 fallback_base_url = "https://www.dmm.co.jp"
6774 url_fallback = f"{ fallback_base_url } /mono/-/search/=/searchstr={ encoded_keyword } /sort=date/"
68-
75+
6976 resp_fallback = await self .get (url_fallback , timeout = 15 , cookies = self .cookies )
7077 if not resp_fallback :
7178 logging .error ("❌ [Fanza] 后备搜索请求失败。" )
@@ -79,24 +86,30 @@ async def search(self, keyword: str, limit=30):
7986 return []
8087
8188 for li in result_list_fallback .find_all ("li" , limit = limit ):
89+ if not isinstance (li , Tag ):
90+ continue
8291 url_tag = li .select_one (".tmb a" )
8392 if not url_tag : continue
84-
93+
8594 title_tag = url_tag .select_one (".txt" )
8695 price_tag = li .select_one (".price" )
8796
88- if not (title_tag and url_tag .has_attr ("href" )): continue
97+ if not (title_tag and url_tag ): continue
98+
99+ href = url_tag .get ("href" )
100+ if not isinstance (href , str ):
101+ continue
89102
90103 title = title_tag .get_text (strip = True )
91104 price_text = price_tag .get_text (strip = True ) if price_tag else "未知"
92105 price = price_text .split ("円" )[0 ].replace ("," , "" ).strip ()
93- full_url = urljoin (fallback_base_url , url_tag [ " href" ] )
106+ full_url = urljoin (fallback_base_url , href )
94107
95108 results_fallback .append ({
96109 "title" : title , "url" : full_url ,
97110 "价格" : price or "未知" , "类型" : "未知(后备)" ,
98111 })
99-
112+
100113 initial_count_fallback = len (results_fallback )
101114 filtered_results_fallback = [
102115 item for item in results_fallback
@@ -117,28 +130,32 @@ async def get_game_detail(self, url: str) -> dict:
117130
118131 try :
119132 soup = BeautifulSoup (resp .text , "lxml" )
120- details = {}
133+ details : Dict [str , Any ] = {}
134+ game_types : List [str ] = []
121135
122136 # ==================================================================
123137 # 智能解析:根据URL判断使用哪套解析逻辑
124138 # ==================================================================
125139 if "/mono/" in url :
126140 # --- 旧版/后备接口 (`/mono/`) 的解析逻辑 ---
127141 logging .info ("🔍 [Fanza] 检测到 /mono/ 链接,使用旧版表格解析器。" )
128-
142+
129143 if title_tag := soup .select_one ("h1#title" ):
130144 details ["标题" ] = title_tag .get_text (strip = True )
131-
145+
132146 if cover_tag := soup .select_one ("#sample-video img, .area-img img" ):
133147 if src := cover_tag .get ("src" ):
134- details ["封面图链接" ] = urljoin (self .base_url , src )
148+ if isinstance (src , str ):
149+ details ["封面图链接" ] = urljoin (self .base_url , src )
135150
136151 if main_table := soup .select_one ("table.mg-b20" ):
137152 rows = main_table .find_all ("tr" )
138153 for row in rows :
154+ if not isinstance (row , Tag ):
155+ continue
139156 cells = row .find_all ("td" )
140157 if len (cells ) < 2 : continue
141-
158+
142159 key = cells [0 ].get_text (strip = True )
143160 value_cell = cells [1 ]
144161
@@ -153,11 +170,9 @@ async def get_game_detail(self, url: str) -> dict:
153170 elif key .startswith ("ジャンル" ):
154171 details ["标签" ] = [a .get_text (strip = True ) for a in value_cell .find_all ("a" )]
155172 elif "ゲームジャンル" in key :
156- game_types = details .get ("作品形式" , [])
157173 genre_text = value_cell .get_text (strip = True ).upper ()
158174 for genre_key , genre_value in self ._genre_reverse_mapping .items ():
159175 if genre_key in genre_text : game_types .append (genre_value )
160- if game_types : details ["作品形式" ] = list (dict .fromkeys (game_types ))
161176 elif "ボイス" in key :
162177 if "あり" in value_cell .get_text (strip = True ):
163178 game_types = details .get ("作品形式" , [])
@@ -168,6 +183,8 @@ async def get_game_detail(self, url: str) -> dict:
168183 logging .info ("🔍 [Fanza] 未检测到 /mono/ 链接,使用新版解析器。" )
169184 if top_table := soup .select_one (".contentsDetailTop__table" ):
170185 for row in top_table .find_all ("div" , class_ = "contentsDetailTop__tableRow" ):
186+ if not isinstance (row , Tag ):
187+ continue
171188 key_tag = row .select_one (".contentsDetailTop__tableDataLeft p" )
172189 value_tag = row .select_one (".contentsDetailTop__tableDataRight" )
173190 if not (key_tag and value_tag ): continue
@@ -200,7 +217,6 @@ def extract_list(value_div: Tag | None) -> list[str]:
200217 for key in details :
201218 if isinstance (details [key ], list ): details [key ] = sorted (list (set (details [key ])))
202219
203- game_types = []
204220 if genre_div := find_row_value ("ゲームジャンル" ):
205221 genre_text = genre_div .get_text (strip = True ).upper ()
206222 for key , value in self ._genre_reverse_mapping .items ():
@@ -220,13 +236,16 @@ def extract_list(value_div: Tag | None) -> list[str]:
220236 cover_selector = (".productPreview__mainImage img, #fn-main_image, .main-visual img" )
221237 if cover_img_tag := soup .select_one (cover_selector ):
222238 if src := cover_img_tag .get ("src" ): details ["封面图链接" ] = urljoin (self .base_url , src )
223-
239+
224240 if title_tag := soup .select_one ("h1.productTitle__txt" ):
225241 details ["标题" ] = title_tag .get_text (strip = True )
226242 if price_tag := soup .select_one (".priceInformation__price" ):
227243 details ["价格" ] = price_tag .get_text (strip = True ).replace ("円" , "" ).replace ("," , "" )
228244
245+ if game_types :
246+ details ["作品形式" ] = sorted (list (dict .fromkeys (game_types )))
247+
229248 return details
230249 except Exception as e :
231250 logging .error (f"❌ [Fanza] 解析详情页失败: { e } " )
232- return {}
251+ return {}
0 commit comments