vllm-project · Isotr0py · Jul 11, 2025 · Jul 11, 2025 · Jul 11, 2025 · gemini-code-assist
@@ -42,8 +42,7 @@
 from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
 from vllm.multimodal.utils import MediaConnector
 # yapf: disable
-from vllm.transformers_utils.chat_templates import (
-    get_chat_template_fallback_path)
+from vllm.transformers_utils.chat_templates import get_chat_template_fallback
 # yapf: enable
 from vllm.transformers_utils.processor import cached_get_processor
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
@@ -385,29 +384,40 @@ def resolve_hf_chat_template(
                                ProcessorMixin),
                 trust_remote_code=model_config.trust_remote_code,
             )
-            if isinstance(processor, ProcessorMixin) and \
-                hasattr(processor, 'chat_template') and \
-                processor.chat_template is not None:
-                return processor.chat_template
+            if (isinstance(processor, ProcessorMixin) and
+                hasattr(processor, 'chat_template') and
+                processor.chat_template is not None):
+                chat_template = processor.chat_template
         except Exception:
             logger.debug("Failed to load AutoProcessor chat template for %s", tokenizer.name_or_path, exc_info=True)  # noqa: E501
 
     # 3rd priority: AutoTokenizer chat template
-    try:
-        return tokenizer.get_chat_template(chat_template, tools=tools)
-    except Exception:
-        logger.debug("Failed to load AutoTokenizer chat template for %s",
-                     tokenizer.name_or_path, exc_info=True)
+    if chat_template is None:
+        try:
+            chat_template = tokenizer.get_chat_template(chat_template,
+                                                        tools=tools)
+        except Exception:
+            logger.debug("Failed to load AutoTokenizer chat template for %s",
+                        tokenizer.name_or_path, exc_info=True)
 
     # 4th priority: Predefined fallbacks
-    path = get_chat_template_fallback_path(
+    fallback_info = get_chat_template_fallback(
         model_type=model_config.hf_config.model_type,
-        tokenizer_name_or_path=model_config.tokenizer,
     )
-    if path is not None:
+    path = (fallback_info.get_path(model_config.tokenizer)
+            if fallback_info is not None else None)
+    if path is not None and chat_template is None:
         logger.info("Loading chat template fallback for %s as there isn't one "
                     "defined on HF Hub.", tokenizer.name_or_path)
         chat_template = load_chat_template(path)
+    elif fallback_info is not None and fallback_info.override_exists:
+        chat_template_to_override = load_chat_template(path)
+        if chat_template_to_override != chat_template:
+            chat_template = chat_template_to_override
+            logger.warning("Override existing chat template for %s as "
+                           "its defined ones on HF Hub is not compatible "
+                           "with OpenAI server.",
+                            tokenizer.name_or_path)
-    if path is not None and chat_template is None:
-        logger.info("Loading chat template fallback for %s as there isn't one "
-                    "defined on HF Hub.", tokenizer.name_or_path)
-        chat_template = load_chat_template(path)
-    elif fallback_info is not None and fallback_info.override_exists:
-        chat_template_to_override = load_chat_template(path)
-        if chat_template_to_override != chat_template:
-            chat_template = chat_template_to_override
-            logger.warning("Override existing chat template for %s as "
-                           "its defined ones on HF Hub is not compatible "
-                           "with OpenAI server.",
-                            tokenizer.name_or_path)
+    # 4th priority: Predefined fallbacks
+    fallback_info = get_chat_template_fallback(
+        model_type=model_config.hf_config.model_type,
+    )
+    path = (fallback_info.get_path(model_config.tokenizer)
+            if fallback_info is not None else None)
+    if path is not None and chat_template is None:
+        logger.info("Loading chat template fallback for %s as there isn't one "
+                    "defined on HF Hub.", tokenizer.name_or_path)
+        chat_template = load_chat_template(path)
+    elif fallback_info is not None and fallback_info.override_exists and path is not None:
+        chat_template_to_override = load_chat_template(path)
+        if chat_template_to_override != chat_template:
+            chat_template = chat_template_to_override
+            logger.warning("Override existing chat template for %s as "
+                           "its defined ones on HF Hub is not compatible "
+                           "with OpenAI server.",
+                            tokenizer.name_or_path)
-    if path is not None and chat_template is None:
-        logger.info("Loading chat template fallback for %s as there isn't one "
-                    "defined on HF Hub.", tokenizer.name_or_path)
-        chat_template = load_chat_template(path)
-    elif fallback_info is not None and fallback_info.override_exists:
-        chat_template_to_override = load_chat_template(path)
-        if chat_template_to_override != chat_template:
-            chat_template = chat_template_to_override
-            logger.warning("Override existing chat template for %s as "
-                           "its defined ones on HF Hub is not compatible "
-                           "with OpenAI server.",
-                            tokenizer.name_or_path)
+    # 4th priority: Predefined fallbacks
+    fallback_info = get_chat_template_fallback(
+        model_type=model_config.hf_config.model_type,
+    )
+    path = (fallback_info.get_path(model_config.tokenizer)
+            if fallback_info is not None else None)
+    if path is not None and chat_template is None:
+        logger.info("Loading chat template fallback for %s as there isn't one "
+                    "defined on HF Hub.", tokenizer.name_or_path)
+        chat_template = load_chat_template(path)
+    elif fallback_info is not None and fallback_info.override_exists and path is not None:
+        chat_template_to_override = load_chat_template(path)
+        if chat_template_to_override != chat_template:
+            chat_template = chat_template_to_override
+            logger.warning("Override existing chat template for %s as "
+                           "its defined ones on HF Hub is not compatible "
+                           "with OpenAI server.",
+                            tokenizer.name_or_path)
     else:
         logger.debug("There is no chat template fallback for %s",
                      tokenizer.name_or_path)

diff --git a/vllm/transformers_utils/chat_templates/__init__.py b/vllm/transformers_utils/chat_templates/__init__.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from .registry import get_chat_template_fallback_path
+from .registry import (get_chat_template_fallback,
+                       get_chat_template_fallback_path)
 
-__all__ = ["get_chat_template_fallback_path"]
+__all__ = ["get_chat_template_fallback", "get_chat_template_fallback_path"]
diff --git a/vllm/transformers_utils/chat_templates/registry.py b/vllm/transformers_utils/chat_templates/registry.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Callable, Optional, Union
 
@@ -20,41 +21,64 @@ def _get_qwen_chat_template_fallback(
     return CHAT_TEMPLATES_DIR / "template_basic.jinja"
 
 
+@dataclass(frozen=True)
+class TemplateInfo:
+    path: ChatTemplatePath
+    override_exists: bool = False
+
+    def get_path(self, tokenizer_name_or_path: str) -> Optional[Path]:
+        if callable(self.path):
+            return self.path(tokenizer_name_or_path)
+
+        return self.path
+
+
 # yapf: disable
-_MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK: dict[str, ChatTemplatePath] = {
-    "blip-2": CHAT_TEMPLATES_DIR / "template_blip2.jinja",
-    "chameleon": CHAT_TEMPLATES_DIR / "template_basic.jinja",
-    "deepseek_vl_v2": CHAT_TEMPLATES_DIR / "template_deepseek_vl2.jinja",
-    "florence2": CHAT_TEMPLATES_DIR / "template_basic.jinja",
-    "fuyu": CHAT_TEMPLATES_DIR / "template_fuyu.jinja",
-    "paligemma": CHAT_TEMPLATES_DIR / "template_basic.jinja",
-    "qwen": _get_qwen_chat_template_fallback,
+_MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK: dict[str, TemplateInfo] = {
+    "blip-2": TemplateInfo(CHAT_TEMPLATES_DIR / "template_blip2.jinja"),
+    "chameleon": TemplateInfo(CHAT_TEMPLATES_DIR / "template_basic.jinja"),
+    "deepseek_vl_v2": TemplateInfo(CHAT_TEMPLATES_DIR / "template_deepseek_vl2.jinja"), # noqa: E501
+    "florence2": TemplateInfo(CHAT_TEMPLATES_DIR / "template_basic.jinja"),
+    "fuyu": TemplateInfo(CHAT_TEMPLATES_DIR / "template_fuyu.jinja"),
+    "paligemma": TemplateInfo(CHAT_TEMPLATES_DIR / "template_basic.jinja"),
+    "qwen": TemplateInfo(_get_qwen_chat_template_fallback),
+    "qwen2_audio": TemplateInfo(CHAT_TEMPLATES_DIR / "template_qwen2_audio.jinja",  # noqa: E501
+                                override_exists=True),
 }
 # yapf: enable
 
 
 def register_chat_template_fallback_path(
     model_type: str,
     chat_template: ChatTemplatePath,
+    override_exists: bool = False,
 ) -> None:
     if model_type in _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK:
         logger.warning(
             "Model type %s already has a chat template registered. "
             "It will be overwritten by the new chat template %s.", model_type,
             chat_template)
 
-    _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK[model_type] = chat_template
+    _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK[model_type] = TemplateInfo(
+        chat_template, override_exists=override_exists)
+
+
+def get_chat_template_fallback(model_type: str) -> Optional[TemplateInfo]:
+    chat_template_info = _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK.get(model_type)
+
+    if chat_template_info is None:
+        return None
+
+    return chat_template_info
 
 
 def get_chat_template_fallback_path(
     model_type: str,
     tokenizer_name_or_path: str,
 ) -> Optional[Path]:
-    chat_template = _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK.get(model_type)
-    if callable(chat_template):
-        chat_template = chat_template(tokenizer_name_or_path)
+    chat_template_info = get_chat_template_fallback(model_type)
 
-    if chat_template is None:
+    if chat_template_info is None:
         return None
 
-    return chat_template
+    return chat_template_info.get_path(tokenizer_name_or_path)
-    chat_template_info = get_chat_template_fallback(model_type)
-
-    if chat_template is None:
-    if chat_template_info is None:
-        return None
-
-    return chat_template
-    return chat_template_info.get_path(tokenizer_name_or_path)
+def get_chat_template_fallback_path(
+    model_type: str,
+    tokenizer_name_or_path: str,
+) -> Optional[Path]:
+    return get_chat_template_fallback(model_type).get_path(tokenizer_name_or_path) if get_chat_template_fallback(model_type) else None
-    chat_template_info = get_chat_template_fallback(model_type)
-
-    if chat_template is None:
-    if chat_template_info is None:
-        return None
-
-    return chat_template
-    return chat_template_info.get_path(tokenizer_name_or_path)
+def get_chat_template_fallback_path(
+    model_type: str,
+    tokenizer_name_or_path: str,
+) -> Optional[Path]:
+    return get_chat_template_fallback(model_type).get_path(tokenizer_name_or_path) if get_chat_template_fallback(model_type) else None
diff --git a/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja b/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja
@@ -0,0 +1,24 @@
+{%- set audio_count = namespace(value=0) -%}
+{%- for message in messages -%}
+    {%- if loop.first and message['role'] != 'system' -%}
+        {{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
+    {%- endif -%}
+    {{ '<|im_start|>' + message['role'] + '\n' }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] + '<|im_end|>\n' }}
+    {%- else -%}
+        {%- for content in message['content'] -%}
+            {%- if 'audio' in content or 'audio_url' in content or message['type'] == 'audio' or content['type'] == 'audio' -%}
+                {%- set audio_count.value = audio_count.value + 1 -%}
+                {{ 'Audio ' + audio_count.value|string + ': <|audio_bos|><|AUDIO|><|audio_eos|>\n' }}
+            {%- elif 'text' in content -%}
+                {{ content['text'] }}
+            {%- endif -%}
+        {%- endfor -%}
+        {{ '<|im_end|>\n' }}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+    {{ '<|im_start|>assistant\n' }}
+{%- endif -%}