@@ -238,18 +238,31 @@ def _replace_answer_with_prompt(self, content: str) -> str:
238
238
The modified content with the answer replaced by the thinking prompt
239
239
"""
240
240
# Pattern to match <answer>...</answer> with optional EOS token
241
+ # Use non-greedy matching and be more specific about the end
241
242
answer_pattern = r"<answer>.*?</answer>(?:\s*<\|im_end\|>)?"
242
243
243
244
# Check if there's an answer tag
244
245
if "<answer>" in content :
245
246
# Replace the answer section with the thinking prompt
246
247
prompt = self .prompt
247
248
248
- # Replace the answer section
249
+ # Replace the answer section, but preserve the EOS token if it exists
249
250
modified_content = re .sub (answer_pattern , prompt , content , flags = re .DOTALL )
250
251
251
252
# Clean up any trailing whitespace
252
253
modified_content = modified_content .rstrip ()
254
+
255
+ # Ensure we end with the EOS token if the original content had it
256
+ if content .endswith ("<|im_end|>" ):
257
+ modified_content = modified_content .rstrip () + "<|im_end|>"
258
+
259
+ # Ensure proper spacing around the prompt
260
+ if not modified_content .endswith (prompt ):
261
+ # If the prompt wasn't properly inserted, append it
262
+ modified_content = content .rstrip ()
263
+ if modified_content .endswith ("<|im_end|>" ):
264
+ modified_content = modified_content [:- len ("<|im_end|>" )].rstrip ()
265
+ modified_content = modified_content + "\n \n " + prompt + "<|im_end|>"
253
266
254
267
else :
255
268
# No answer tag found, just append the prompt
0 commit comments