@@ -51,6 +51,34 @@ def _add_exception(self, name: str):
5151 self ._exceptions .add (name )
5252 logger .info (f"已在内存中将 '{ name } ' 标记为本次运行的例外。" )
5353
54+ def _post_process_parts (self , parts : List [str ]) -> List [str ]:
55+ """
56+ 对分割后的部分进行后处理,自动合并 "J・さいろー" 或 "神・无月" 这样的模式。
57+ """
58+ if len (parts ) < 2 :
59+ return parts
60+
61+ new_parts = []
62+ i = 0
63+ while i < len (parts ):
64+ current_part = parts [i ]
65+ # --- 核心改进:检查是否为任意类型的单个字符 ---
66+ if len (current_part ) == 1 :
67+ # 如果后面还有部分,则合并
68+ if i + 1 < len (parts ):
69+ next_part = parts [i + 1 ]
70+ merged_part = f"{ current_part } ・{ next_part } "
71+ new_parts .append (merged_part )
72+ i += 2 # 跳过下一个部分,因为它已经被合并
73+ else :
74+ # 这是最后一部分,无法合并,照常添加
75+ new_parts .append (current_part )
76+ i += 1
77+ else :
78+ new_parts .append (current_part )
79+ i += 1
80+ return new_parts
81+
5482 async def smart_split (self , text : str , interaction_provider : InteractionProvider ) -> List [str ]:
5583 """
5684 智能分割名称字符串。
@@ -59,8 +87,6 @@ async def smart_split(self, text: str, interaction_provider: InteractionProvider
5987 if not text :
6088 return []
6189
62- # --- [核心升级 3] 名称标准化 ---
63- # 将所有内部空白(包括全角空格)统一替换为单个标准空格
6490 def normalize (name : str ) -> str :
6591 return re .sub (r'\s+' , ' ' , name ).strip ()
6692
@@ -70,36 +96,42 @@ def normalize(name: str) -> str:
7096 parts = SPLIT_REGEX .split (text )
7197 cleaned_parts = [normalize (p ) for p in parts if p .strip ()]
7298
73- if len (cleaned_parts ) <= 1 :
74- return cleaned_parts
99+ # --- [核心升级 2] 启发式识别:处理 '名字A・名字B' 模式 ---
100+ # 如果分割结果为三部分,且中间部分为单个字符,则极有可能是完整的姓名
101+ if len (cleaned_parts ) == 3 and len (cleaned_parts [1 ]) == 1 and (len (cleaned_parts [0 ]) > 1 or len (cleaned_parts [2 ]) > 1 ):
102+ logger .info (f"检测到 '名字・首字母・名字' 模式,自动合并: { text } " )
103+ return [normalize (text )]
104+
105+ # 在风险识别前,先进行智能后处理
106+ processed_parts = self ._post_process_parts (cleaned_parts )
107+
108+ if len (processed_parts ) <= 1 :
109+ return processed_parts
75110
76- # --- [核心升级 2] 增强风险识别 ---
77- # 规则1: 分割后出现过短的部分 (例如: 'S')
78- is_dangerous = any (len (p ) <= 1 for p in cleaned_parts )
111+ # 增强风险识别 (现在基于后处理的结果)
112+ is_dangerous = any (len (p ) <= 1 for p in processed_parts )
79113
80- # 规则2: 由'・'分割的全英文名称 (例如: 'Ryo・Lion')
81114 is_alpha_dot_split = False
82- if not is_dangerous and '・' in text and len (cleaned_parts ) > 1 :
83- if all (re .fullmatch (r'[a-zA-Z]+' , p ) for p in cleaned_parts ):
115+ if not is_dangerous and '・' in text and len (processed_parts ) > 1 :
116+ if all (re .fullmatch (r'[a-zA-Z]+' , p ) for p in processed_parts ):
84117 is_alpha_dot_split = True
85118
86119 if not is_dangerous and not is_alpha_dot_split :
87- return cleaned_parts
120+ return processed_parts
88121
89122 # --- Interactive part ---
90- choice = "keep" # Default action
123+ choice = "keep"
91124 save_exception = False
92125
93126 if interaction_provider :
94- # TODO: 将增强的风险原因传递给GUI
95- decision = await interaction_provider .get_name_split_decision (text , cleaned_parts )
127+ decision = await interaction_provider .get_name_split_decision (text , processed_parts )
96128 choice = decision .get ("action" , "keep" )
97129 save_exception = decision .get ("save_exception" , False )
98130 else :
99131 # CLI Fallback
100132 def _get_input ():
101133 logger .warn (f"检测到【高风险】的名称分割: '{ text } '" )
102- print (f" 初步分割为: { cleaned_parts } " )
134+ print (f" 初步分割为: { processed_parts } " )
103135 if is_alpha_dot_split :
104136 print (" 原因: 检测到由'・'分割的纯英文名称,这可能是一个完整的名字。" )
105137 else :
@@ -128,7 +160,7 @@ def _get_save_confirmation():
128160
129161 # --- Process decision ---
130162 if choice == "split" :
131- return cleaned_parts
163+ return processed_parts
132164 else : # "keep"
133165 logger .info (f"用户选择不分割 '{ text } '。" )
134166 if save_exception :
0 commit comments