Skip to content

Commit 58485a5

Browse files
committed
[Model] Add reason parser for Hunyuan A13B Model.
- A new `hunyuan_a13b parser` reason parser was added. Because the upcoming model will use a spcial token for think state, so this parser only for a13b model. - For non-stream mode, use a regex to absorb the reason part and the response part. - For stream mode, use a token id based state machine to control the state change. - Add test case. Signed-off-by: Asher Zhang <asherszhang@tencent.com>
1 parent 71d1d75 commit 58485a5

File tree

3 files changed

+411
-0
lines changed

3 files changed

+411
-0
lines changed
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
import pytest
3+
from transformers import AutoTokenizer
4+
5+
from tests.reasoning.utils import run_reasoning_extraction
6+
from vllm.reasoning import ReasoningParser, ReasoningParserManager
7+
8+
parser_name = "hunyuan_a13b"
9+
START_REASONING = "<think>\n"
10+
START_RESPONSE = "\n</think>\n<answer>\n"
11+
END_RESPONSE = "\n</answer>"
12+
13+
NO_REASONING_QUICK_THROUGHT = {
14+
"output":
15+
f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}", #noqa: E501
16+
"reasoning_content": None,
17+
"content": "This is the rest",
18+
}
19+
20+
SIMPLE_REASONING = {
21+
"output":
22+
f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest{END_RESPONSE}", #noqa: E501
23+
"reasoning_content": "This is a reasoning section",
24+
"content": "This is the rest",
25+
}
26+
COMPLETE_REASONING = {
27+
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
28+
"reasoning_content": "This is a reasoning section",
29+
"content": None,
30+
}
31+
NO_REASONING = {
32+
"output": "This is content",
33+
"reasoning_content": None,
34+
"content": "This is content",
35+
}
36+
MULTIPLE_LINES = {
37+
"output":
38+
f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
39+
"reasoning_content": "This\nThat",
40+
"content": "This is the rest\nThat",
41+
}
42+
REASONING_WITH_THINK = {
43+
"output":
44+
f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", #noqa: E501
45+
"reasoning_content": "This is a reasoning section",
46+
"content": "This is the rest",
47+
}
48+
COMPLETE_REASONING_WITH_THINK = {
49+
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
50+
"reasoning_content": "This is a reasoning section",
51+
"content": None,
52+
}
53+
MULTIPLE_LINES_WITH_THINK = {
54+
"output":
55+
f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
56+
"reasoning_content": "This\nThat",
57+
"content": "This is the rest\nThat",
58+
}
59+
60+
TEST_CASES = [
61+
pytest.param(
62+
False,
63+
SIMPLE_REASONING,
64+
id="simple_reasoning",
65+
),
66+
pytest.param(
67+
False,
68+
COMPLETE_REASONING,
69+
id="complete_reasoning",
70+
),
71+
pytest.param(
72+
False,
73+
NO_REASONING,
74+
id="no_reasoning",
75+
),
76+
pytest.param(False, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick"),
77+
pytest.param(
78+
False,
79+
MULTIPLE_LINES,
80+
id="multiple_lines",
81+
),
82+
pytest.param(
83+
False,
84+
REASONING_WITH_THINK,
85+
id="reasoning_with_think",
86+
),
87+
pytest.param(
88+
False,
89+
COMPLETE_REASONING_WITH_THINK,
90+
id="complete_reasoning_with_think",
91+
),
92+
pytest.param(
93+
False,
94+
MULTIPLE_LINES_WITH_THINK,
95+
id="multiple_lines_with_think",
96+
),
97+
pytest.param(
98+
True,
99+
SIMPLE_REASONING,
100+
id="simple_reasoning_streaming",
101+
),
102+
pytest.param(
103+
True,
104+
COMPLETE_REASONING,
105+
id="complete_reasoning_streaming",
106+
),
107+
pytest.param(
108+
True,
109+
NO_REASONING,
110+
id="no_reasoning_streaming",
111+
),
112+
pytest.param(True,
113+
NO_REASONING_QUICK_THROUGHT,
114+
id="no_reasoning_quick_stream"),
115+
pytest.param(
116+
True,
117+
MULTIPLE_LINES,
118+
id="multiple_lines_streaming",
119+
),
120+
pytest.param(
121+
True,
122+
REASONING_WITH_THINK,
123+
id="reasoning_with_think_streaming",
124+
),
125+
pytest.param(
126+
True,
127+
COMPLETE_REASONING_WITH_THINK,
128+
id="complete_reasoning_with_think_streaming",
129+
),
130+
pytest.param(
131+
True,
132+
MULTIPLE_LINES_WITH_THINK,
133+
id="multiple_lines_with_think_streaming",
134+
),
135+
]
136+
137+
# Global tokenizer initialization to avoid repeated loading
138+
tokenizer = AutoTokenizer.from_pretrained("tencent/Hunyuan-A13B-Instruct",
139+
trust_remote_code=True)
140+
141+
142+
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
143+
def test_reasoning(
144+
streaming: bool,
145+
param_dict: dict,
146+
):
147+
output = tokenizer.tokenize(param_dict["output"])
148+
# decode everything to tokens
149+
output_tokens: list[str] = [
150+
tokenizer.convert_tokens_to_string([token]) for token in output
151+
]
152+
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
153+
parser_name)(tokenizer)
154+
155+
reasoning, content = run_reasoning_extraction(parser,
156+
output_tokens,
157+
streaming=streaming)
158+
159+
assert reasoning == param_dict["reasoning_content"]
160+
assert content == param_dict["content"]

vllm/reasoning/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
from .abs_reasoning_parsers import ReasoningParser, ReasoningParserManager
55
from .deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
66
from .granite_reasoning_parser import GraniteReasoningParser
7+
from .hunyuan_a13b_reasoning_parser import HunyuanA13BReasoningParser
78
from .qwen3_reasoning_parser import Qwen3ReasoningParser
89

910
__all__ = [
1011
"ReasoningParser",
1112
"ReasoningParserManager",
1213
"DeepSeekR1ReasoningParser",
1314
"GraniteReasoningParser",
15+
"HunyuanA13BReasoningParser",
1416
"Qwen3ReasoningParser",
1517
]

0 commit comments

Comments
 (0)