Skip to content

Commit bd6cf37

Browse files
authored
[Fix] Fix deepseek r1 stop tokens (#3098)
This PR removes `deepseek_v3` conversation template, and adds `deepseek_r1_qwen` and `deepseek_r1_llama`, which only differ by the stop token. Note that DeepSeek V3 has its own stop token id, different from the R1's.
1 parent a175d44 commit bd6cf37

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

python/mlc_llm/conversation_template/deepseek.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@
3636
)
3737
)
3838

39-
# Deepseek V3
39+
# DeepSeek-R1-Distill-Qwen
4040
ConvTemplateRegistry.register_conv_template(
4141
Conversation(
42-
name="deepseek_v3",
42+
name="deepseek_r1_qwen",
4343
system_template=f"<|begin▁of▁sentence|>{MessagePlaceholders.SYSTEM.value}",
4444
system_message="You are a helpful assistant.",
4545
roles={"user": "<|User|>", "assistant": "<|Assistant|>"},
@@ -49,3 +49,17 @@
4949
stop_token_ids=[151643],
5050
)
5151
)
52+
53+
# DeepSeek-R1-Distill-Llama, exactly the same as DeepSeek-R1-Distill-Qwen, but different stop token
54+
ConvTemplateRegistry.register_conv_template(
55+
Conversation(
56+
name="deepseek_r1_llama",
57+
system_template=f"<|begin▁of▁sentence|>{MessagePlaceholders.SYSTEM.value}",
58+
system_message="You are a helpful assistant.",
59+
roles={"user": "<|User|>", "assistant": "<|Assistant|>"},
60+
seps=["", "<|end▁of▁sentence|>"],
61+
role_content_sep="",
62+
role_empty_sep="",
63+
stop_token_ids=[128001],
64+
)
65+
)

python/mlc_llm/interface/gen_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,8 @@ def gen_config( # pylint: disable=too-many-locals,too-many-arguments,too-many-b
309309
"aya-23",
310310
"deepseek",
311311
"deepseek_v2",
312-
"deepseek_v3",
312+
"deepseek_r1_qwen",
313+
"deepseek_r1_llama",
313314
"olmo",
314315
"nemotron",
315316
}

0 commit comments

Comments
 (0)