[Fix] Fix gemma 3 conv template stop token (#3191)

CharlieFRuan · web-flow · commit e89a48464ac7 · 2025-03-27T17:08:30.000-04:00
Currently for all the gemma3 instruction models, we reused the previous gemma conv template. However, there is a difference in the stop token. - In gemma2, the stop tokens are 1, 107: https://huggingface.co/google/gemma-2-2b-it/blob/main/generation_config.json - In gemma3, the stop tokens are 1, 106: https://huggingface.co/google/gemma-3-1b-it/blob/main/generation_config.json This PR add a new `gemma3_instruction` conv template, and will manually update the already-uploaded gemma 3 models on HF. In gemma3, 107 is `\n`. So currently whenever the model tries to change to a new line, it ends generation.
diff --git a/python/mlc_llm/conversation_template/gemma.py b/python/mlc_llm/conversation_template/gemma.py
@@ -19,3 +19,19 @@
         system_prefix_token_ids=[2],
     )
 )
+
+# Gemma 3 Instruction. Same as gemma_instruction but with different stop token id
+ConvTemplateRegistry.register_conv_template(
+    Conversation(
+        name="gemma3_instruction",
+        system_template=f"{MessagePlaceholders.SYSTEM.value}",
+        system_message="",
+        roles={"user": "<start_of_turn>user", "assistant": "<start_of_turn>model"},
+        seps=["<end_of_turn>\n"],
+        role_content_sep="\n",
+        role_empty_sep="\n",
+        stop_str=["<end_of_turn>"],
+        stop_token_ids=[1, 106],
+        system_prefix_token_ids=[2],
+    )
+)