Skip to content

Commit 9cb08e7

Browse files
add support QWQ enable_thinking (#2706)
* add support QWQ enable_thinking * add stream=True * fix stream=true * fix qwen --------- Co-authored-by: lizexu <lizexu@baidu.com>
1 parent dacc46f commit 9cb08e7

File tree

1 file changed

+64
-48
lines changed

1 file changed

+64
-48
lines changed

fastdeploy/reasoning/qwen3_reasoning_parsers.py

Lines changed: 64 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from typing import Optional, Union
1818

1919
from fastdeploy.entrypoints.openai.protocol import (ChatCompletionRequest,
20-
DeltaMessage)
20+
DeltaMessage)
2121
from fastdeploy.reasoning import ReasoningParser, ReasoningParserManager
2222

2323

@@ -67,79 +67,95 @@ def extract_reasoning_content_streaming(
6767
- 'abc' goes to reasoning_content
6868
- 'xyz' goes to content
6969
"""
70-
# Skip single special tokens
7170
if len(delta_token_ids) == 1 and (delta_token_ids[0] in [
7271
self.think_start_token_id, self.think_end_token_id
7372
]):
7473
return "", ""
7574

76-
if self.think_start_token_id in previous_token_ids:
77-
if self.think_end_token_id in delta_token_ids:
78-
# <think> in previous, </think> in delta,
79-
# extract reasoning content
80-
end_index = delta_text.find(self.think_end_token)
81-
reasoning_content = delta_text[:end_index]
82-
content = delta_text[end_index + len(self.think_end_token):]
83-
content = content if content else None
84-
return reasoning_content, content
85-
elif self.think_end_token_id in previous_token_ids:
86-
# <think> in previous, </think> in previous,
87-
# reasoning content continues
88-
return "", delta_text
89-
else:
90-
# <think> in previous, no </think> in previous or delta,
91-
# reasoning content continues
92-
return delta_text, ""
93-
elif self.think_start_token_id in delta_token_ids:
94-
if self.think_end_token_id in delta_token_ids:
95-
# <think> in delta, </think> in delta, extract reasoning content
75+
# </think> in delta
76+
if self.think_end_token_id in delta_token_ids:
77+
#<think> in delta, </think> in delta, extract reasoning content
78+
if self.think_start_token_id in delta_token_ids:
9679
start_index = delta_text.find(self.think_start_token)
97-
end_index = delta_text.find(self.think_end_token)
80+
end_index = delta_token_ids.find(self.think_end_token)
9881
reasoning_content = delta_text[start_index +
9982
len(self.think_start_token
10083
):end_index]
84+
content = delta_text[end_index+len(self.think_end_token):]
85+
return reasoning_content, content
86+
# <think> in previous, </think> in delta,
87+
else:
88+
end_index = delta_text.find(self.think_end_token)
89+
reasoning_content = delta_text[:end_index]
10190
content = delta_text[end_index + len(self.think_end_token):]
10291
content = content if content else None
10392
return reasoning_content, content
104-
else:
105-
# <think> in delta, no </think> in delta,
106-
# reasoning content continues
107-
return delta_text, ""
108-
else:
109-
# thinking is disabled, just content
93+
# </think> in previous reasoning content continues
94+
elif self.think_end_token_id in previous_token_ids:
11095
return "", delta_text
96+
# <think> in previous
97+
elif self.think_start_token_id in previous_token_ids:
98+
return delta_text,""
99+
# <think> in delta
100+
elif self.think_start_token_id in delta_token_ids:
101+
start_index=delta_text.find(self.think_start_token)
102+
reasoning_content=delta_text[start_index + len(self.think_start_token):]
103+
content = ""
104+
return reasoning_content, content
105+
else:
106+
return delta_text, ""
107+
108+
109+
110+
111111

112112
def extract_reasoning_content(
113113
self, model_output: str, request: ChatCompletionRequest
114114
) -> tuple[Optional[str], Optional[str]]:
115115
"""
116116
Extract reasoning content from the model output.
117117
118-
For text abc</think>xyz:
119-
- 'abc' goes to reasoning_content
120-
- 'xyz' goes to content
118+
支持两种格式:
119+
1. <think>abc</think>xyz - 标准格式
120+
2. abc</think>xyz - 缺少起始标签的格式
121121
122122
Returns:
123123
tuple[Optional[str], Optional[str]]: reasoning content and content
124124
"""
125125

126-
# Check if the model output contains the <think> and </think> tokens.
127-
if (self.think_start_token not in model_output
128-
or self.think_end_token not in model_output):
129-
return None, model_output
130-
# Check if the <think> is present in the model output, remove it
131-
# if it is present.
132-
model_output_parts = model_output.partition(self.think_start_token)
133-
model_output = model_output_parts[2] if model_output_parts[
134-
1] else model_output_parts[0]
135-
# Check if the model output contains the </think> tokens.
136-
# If the end token is not found, return the model output as is.
126+
# 检查是否包含结束标签
137127
if self.think_end_token not in model_output:
138128
return None, model_output
139129

140-
# Extract reasoning content from the model output.
141-
reasoning_content, _, content = model_output.partition(
142-
self.think_end_token)
130+
# 检查是否有起始标签
131+
if self.think_start_token in model_output:
132+
# 标准格式:<think>content</think>answer
133+
if (self.think_start_token not in model_output
134+
or self.think_end_token not in model_output):
135+
return None, model_output
136+
# Check if the <think> is present in the model output, remove it
137+
# if it is present.
138+
model_output_parts = model_output.partition(self.think_start_token)
139+
model_output = model_output_parts[2] if model_output_parts[
140+
1] else model_output_parts[0]
141+
# Check if the model output contains the </think> tokens.
142+
# If the end token is not found, return the model output as is.
143+
if self.think_end_token not in model_output:
144+
return None, model_output
145+
146+
# Extract reasoning content from the model output.
147+
reasoning_content, _, content = model_output.partition(
148+
self.think_end_token)
149+
150+
final_content = content or None
151+
return reasoning_content, final_content
152+
else:
153+
# 缺少起始标签的格式:content</think>answer
154+
parts = model_output.split(self.think_end_token, 1)
155+
156+
if len(parts) == 2:
157+
reasoning_content = parts[0].strip()
158+
final_content = parts[1].strip() if parts[1].strip() else None
159+
return reasoning_content, final_content
143160

144-
final_content = content or None
145-
return reasoning_content, final_content
161+
return None, model_output

0 commit comments

Comments
 (0)