From 9bff87c8a1b0909263d486392ec61858a6436320 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Fri, 11 Jul 2025 23:14:31 +0800 Subject: [PATCH 1/3] fix qwen2-audio backward compatibility Signed-off-by: Isotr0py <2037008807@qq.com> --- vllm/entrypoints/chat_utils.py | 35 ++++++++----- .../chat_templates/__init__.py | 5 +- .../chat_templates/registry.py | 52 ++++++++++++++----- .../chat_templates/template_qwen2_audio.jinja | 23 ++++++++ 4 files changed, 85 insertions(+), 30 deletions(-) create mode 100644 vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index f5b7239cb30..28a3e2730ae 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -42,8 +42,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict from vllm.multimodal.utils import MediaConnector # yapf: disable -from vllm.transformers_utils.chat_templates import ( - get_chat_template_fallback_path) +from vllm.transformers_utils.chat_templates import get_chat_template_fallback # yapf: enable from vllm.transformers_utils.processor import cached_get_processor from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer @@ -385,29 +384,37 @@ def resolve_hf_chat_template( ProcessorMixin), trust_remote_code=model_config.trust_remote_code, ) - if isinstance(processor, ProcessorMixin) and \ - hasattr(processor, 'chat_template') and \ - processor.chat_template is not None: - return processor.chat_template + if (isinstance(processor, ProcessorMixin) and + hasattr(processor, 'chat_template') and + processor.chat_template is not None): + chat_template = processor.chat_template except Exception: logger.debug("Failed to load AutoProcessor chat template for %s", tokenizer.name_or_path, exc_info=True) # noqa: E501 # 3rd priority: AutoTokenizer chat template - try: - return tokenizer.get_chat_template(chat_template, tools=tools) - except Exception: - logger.debug("Failed to load AutoTokenizer chat template for %s", - tokenizer.name_or_path, exc_info=True) + if chat_template is None: + try: + chat_template = tokenizer.get_chat_template(chat_template, + tools=tools) + except Exception: + logger.debug("Failed to load AutoTokenizer chat template for %s", + tokenizer.name_or_path, exc_info=True) # 4th priority: Predefined fallbacks - path = get_chat_template_fallback_path( + fallback_info = get_chat_template_fallback( model_type=model_config.hf_config.model_type, - tokenizer_name_or_path=model_config.tokenizer, ) - if path is not None: + path = (fallback_info.get_path(model_config.tokenizer) + if fallback_info is not None else None) + if path is not None and chat_template is None: logger.info("Loading chat template fallback for %s as there isn't one " "defined on HF Hub.", tokenizer.name_or_path) chat_template = load_chat_template(path) + elif fallback_info is not None and fallback_info.override_exists: + logger.warning("Override existing template for %s as its defined ones " + "on HF Hub is not compatible with OpenAI server.", + tokenizer.name_or_path) + chat_template = load_chat_template(path) else: logger.debug("There is no chat template fallback for %s", tokenizer.name_or_path) diff --git a/vllm/transformers_utils/chat_templates/__init__.py b/vllm/transformers_utils/chat_templates/__init__.py index 2783d12a221..56be0fa4eb6 100644 --- a/vllm/transformers_utils/chat_templates/__init__.py +++ b/vllm/transformers_utils/chat_templates/__init__.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from .registry import get_chat_template_fallback_path +from .registry import (get_chat_template_fallback, + get_chat_template_fallback_path) -__all__ = ["get_chat_template_fallback_path"] +__all__ = ["get_chat_template_fallback", "get_chat_template_fallback_path"] \ No newline at end of file diff --git a/vllm/transformers_utils/chat_templates/registry.py b/vllm/transformers_utils/chat_templates/registry.py index e0ef7f0999d..198d364f04d 100644 --- a/vllm/transformers_utils/chat_templates/registry.py +++ b/vllm/transformers_utils/chat_templates/registry.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from dataclasses import dataclass from pathlib import Path from typing import Callable, Optional, Union @@ -20,15 +21,29 @@ def _get_qwen_chat_template_fallback( return CHAT_TEMPLATES_DIR / "template_basic.jinja" +@dataclass(frozen=True) +class TemplateInfo: + path: ChatTemplatePath + override_exists: bool = False + + def get_path(self, tokenizer_name_or_path: str) -> Optional[Path]: + if callable(self.path): + return self.path(tokenizer_name_or_path) + + return self.path + + # yapf: disable -_MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK: dict[str, ChatTemplatePath] = { - "blip-2": CHAT_TEMPLATES_DIR / "template_blip2.jinja", - "chameleon": CHAT_TEMPLATES_DIR / "template_basic.jinja", - "deepseek_vl_v2": CHAT_TEMPLATES_DIR / "template_deepseek_vl2.jinja", - "florence2": CHAT_TEMPLATES_DIR / "template_basic.jinja", - "fuyu": CHAT_TEMPLATES_DIR / "template_fuyu.jinja", - "paligemma": CHAT_TEMPLATES_DIR / "template_basic.jinja", - "qwen": _get_qwen_chat_template_fallback, +_MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK: dict[str, TemplateInfo] = { + "blip-2": TemplateInfo(CHAT_TEMPLATES_DIR / "template_blip2.jinja"), + "chameleon": TemplateInfo(CHAT_TEMPLATES_DIR / "template_basic.jinja"), + "deepseek_vl_v2": TemplateInfo(CHAT_TEMPLATES_DIR / "template_deepseek_vl2.jinja"), # noqa: E501 + "florence2": TemplateInfo(CHAT_TEMPLATES_DIR / "template_basic.jinja"), + "fuyu": TemplateInfo(CHAT_TEMPLATES_DIR / "template_fuyu.jinja"), + "paligemma": TemplateInfo(CHAT_TEMPLATES_DIR / "template_basic.jinja"), + "qwen": TemplateInfo(_get_qwen_chat_template_fallback), + "qwen2_audio": TemplateInfo(CHAT_TEMPLATES_DIR / "template_qwen2_audio.jinja", # noqa: E501 + override_exists=True), } # yapf: enable @@ -36,6 +51,7 @@ def _get_qwen_chat_template_fallback( def register_chat_template_fallback_path( model_type: str, chat_template: ChatTemplatePath, + override_exists: bool = False, ) -> None: if model_type in _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK: logger.warning( @@ -43,18 +59,26 @@ def register_chat_template_fallback_path( "It will be overwritten by the new chat template %s.", model_type, chat_template) - _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK[model_type] = chat_template + _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK[model_type] = TemplateInfo( + chat_template, override_exists=override_exists) + + +def get_chat_template_fallback(model_type: str) -> Optional[TemplateInfo]: + chat_template_info = _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK.get(model_type) + + if chat_template_info is None: + return None + + return chat_template_info def get_chat_template_fallback_path( model_type: str, tokenizer_name_or_path: str, ) -> Optional[Path]: - chat_template = _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK.get(model_type) - if callable(chat_template): - chat_template = chat_template(tokenizer_name_or_path) + chat_template_info = get_chat_template_fallback(model_type) - if chat_template is None: + if chat_template_info is None: return None - return chat_template + return chat_template_info.get_path(tokenizer_name_or_path) \ No newline at end of file diff --git a/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja b/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja new file mode 100644 index 00000000000..5cea6d98c6d --- /dev/null +++ b/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja @@ -0,0 +1,23 @@ +{% set audio_count = namespace(value=0) %} +{% for message in messages %} + {% if loop.first and message['role'] != 'system' %} + <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n + {% endif %} + <|im_start|>{{ message['role'] }}\n + {% if message['content'] is string %} + {{ message['content'] }}<|im_end|>\n + {% else %} + {% for content in message['content'] %} + {% if 'audio' in content or 'audio_url' in content or message['type'] == 'audio' or content['type'] == 'audio' %} + {% set audio_count.value = audio_count.value + 1 %} + Audio {{ audio_count.value }}: <|audio_bos|><|AUDIO|><|audio_eos|>\n + {% elif 'text' in content %} + {{ content['text'] }} + {% endif %} + {% endfor %} + <|im_end|>\n + {% endif %} +{% endfor %} +{% if add_generation_prompt %} + <|im_start|>assistant\n +{% endif %} \ No newline at end of file From 3b40a27aed0234dc9685083e03ac6c8cda486179 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Fri, 11 Jul 2025 23:31:51 +0800 Subject: [PATCH 2/3] fix Signed-off-by: Isotr0py <2037008807@qq.com> --- vllm/entrypoints/chat_utils.py | 11 +++++---- .../chat_templates/template_qwen2_audio.jinja | 24 +------------------ 2 files changed, 8 insertions(+), 27 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 28a3e2730ae..dd12ab18884 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -411,10 +411,13 @@ def resolve_hf_chat_template( "defined on HF Hub.", tokenizer.name_or_path) chat_template = load_chat_template(path) elif fallback_info is not None and fallback_info.override_exists: - logger.warning("Override existing template for %s as its defined ones " - "on HF Hub is not compatible with OpenAI server.", - tokenizer.name_or_path) - chat_template = load_chat_template(path) + chat_template_to_override = load_chat_template(path) + if chat_template_to_override != chat_template: + chat_template = chat_template_to_override + logger.warning("Override existing chat template for %s as " + "its defined ones on HF Hub is not compatible " + "with OpenAI server.", + tokenizer.name_or_path) else: logger.debug("There is no chat template fallback for %s", tokenizer.name_or_path) diff --git a/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja b/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja index 5cea6d98c6d..53ff909fb3a 100644 --- a/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja +++ b/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja @@ -1,23 +1 @@ -{% set audio_count = namespace(value=0) %} -{% for message in messages %} - {% if loop.first and message['role'] != 'system' %} - <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n - {% endif %} - <|im_start|>{{ message['role'] }}\n - {% if message['content'] is string %} - {{ message['content'] }}<|im_end|>\n - {% else %} - {% for content in message['content'] %} - {% if 'audio' in content or 'audio_url' in content or message['type'] == 'audio' or content['type'] == 'audio' %} - {% set audio_count.value = audio_count.value + 1 %} - Audio {{ audio_count.value }}: <|audio_bos|><|AUDIO|><|audio_eos|>\n - {% elif 'text' in content %} - {{ content['text'] }} - {% endif %} - {% endfor %} - <|im_end|>\n - {% endif %} -{% endfor %} -{% if add_generation_prompt %} - <|im_start|>assistant\n -{% endif %} \ No newline at end of file +{% set audio_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if 'audio' in content or 'audio_url' in content or message['type'] == 'audio' %}{% set audio_count.value = audio_count.value + 1 %}Audio {{ audio_count.value }}: <|audio_bos|><|AUDIO|><|audio_eos|>\n{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %} \ No newline at end of file From 37a35cc254cc870d162cfeeaec7c8ed7ce18da6e Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sat, 12 Jul 2025 00:02:31 +0800 Subject: [PATCH 3/3] fix chat template Signed-off-by: Isotr0py <2037008807@qq.com> --- .../chat_templates/template_qwen2_audio.jinja | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja b/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja index 53ff909fb3a..4e31d14097c 100644 --- a/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja +++ b/vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja @@ -1 +1,24 @@ -{% set audio_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if 'audio' in content or 'audio_url' in content or message['type'] == 'audio' %}{% set audio_count.value = audio_count.value + 1 %}Audio {{ audio_count.value }}: <|audio_bos|><|AUDIO|><|audio_eos|>\n{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %} \ No newline at end of file +{%- set audio_count = namespace(value=0) -%} +{%- for message in messages -%} + {%- if loop.first and message['role'] != 'system' -%} + {{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }} + {%- endif -%} + {{ '<|im_start|>' + message['role'] + '\n' }} + {%- if message['content'] is string -%} + {{ message['content'] + '<|im_end|>\n' }} + {%- else -%} + {%- for content in message['content'] -%} + {%- if 'audio' in content or 'audio_url' in content or message['type'] == 'audio' or content['type'] == 'audio' -%} + {%- set audio_count.value = audio_count.value + 1 -%} + {{ 'Audio ' + audio_count.value|string + ': <|audio_bos|><|AUDIO|><|audio_eos|>\n' }} + {%- elif 'text' in content -%} + {{ content['text'] }} + {%- endif -%} + {%- endfor -%} + {{ '<|im_end|>\n' }} + {%- endif -%} +{%- endfor -%} + +{%- if add_generation_prompt -%} + {{ '<|im_start|>assistant\n' }} +{%- endif -%} \ No newline at end of file