Skip to content

[Bugfix] Fix Qwen2 audio chat template for old version transformers compatibility #20826

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 24 additions & 14 deletions vllm/entrypoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
from vllm.multimodal.utils import MediaConnector
# yapf: disable
from vllm.transformers_utils.chat_templates import (
get_chat_template_fallback_path)
from vllm.transformers_utils.chat_templates import get_chat_template_fallback
# yapf: enable
from vllm.transformers_utils.processor import cached_get_processor
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
Expand Down Expand Up @@ -385,29 +384,40 @@ def resolve_hf_chat_template(
ProcessorMixin),
trust_remote_code=model_config.trust_remote_code,
)
if isinstance(processor, ProcessorMixin) and \
hasattr(processor, 'chat_template') and \
processor.chat_template is not None:
return processor.chat_template
if (isinstance(processor, ProcessorMixin) and
hasattr(processor, 'chat_template') and
processor.chat_template is not None):
chat_template = processor.chat_template
except Exception:
logger.debug("Failed to load AutoProcessor chat template for %s", tokenizer.name_or_path, exc_info=True) # noqa: E501

# 3rd priority: AutoTokenizer chat template
try:
return tokenizer.get_chat_template(chat_template, tools=tools)
except Exception:
logger.debug("Failed to load AutoTokenizer chat template for %s",
tokenizer.name_or_path, exc_info=True)
if chat_template is None:
try:
chat_template = tokenizer.get_chat_template(chat_template,
tools=tools)
except Exception:
logger.debug("Failed to load AutoTokenizer chat template for %s",
tokenizer.name_or_path, exc_info=True)

# 4th priority: Predefined fallbacks
path = get_chat_template_fallback_path(
fallback_info = get_chat_template_fallback(
model_type=model_config.hf_config.model_type,
tokenizer_name_or_path=model_config.tokenizer,
)
if path is not None:
path = (fallback_info.get_path(model_config.tokenizer)
if fallback_info is not None else None)
if path is not None and chat_template is None:
logger.info("Loading chat template fallback for %s as there isn't one "
"defined on HF Hub.", tokenizer.name_or_path)
chat_template = load_chat_template(path)
elif fallback_info is not None and fallback_info.override_exists:
chat_template_to_override = load_chat_template(path)
if chat_template_to_override != chat_template:
chat_template = chat_template_to_override
logger.warning("Override existing chat template for %s as "
"its defined ones on HF Hub is not compatible "
"with OpenAI server.",
tokenizer.name_or_path)
Comment on lines +409 to +420
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

To prevent a potential TypeError, ensure path is not None before calling load_chat_template(path). If fallback_info.get_path() returns None, load_chat_template(None) would raise an error.

Suggested change
if path is not None and chat_template is None:
logger.info("Loading chat template fallback for %s as there isn't one "
"defined on HF Hub.", tokenizer.name_or_path)
chat_template = load_chat_template(path)
elif fallback_info is not None and fallback_info.override_exists:
chat_template_to_override = load_chat_template(path)
if chat_template_to_override != chat_template:
chat_template = chat_template_to_override
logger.warning("Override existing chat template for %s as "
"its defined ones on HF Hub is not compatible "
"with OpenAI server.",
tokenizer.name_or_path)
# 4th priority: Predefined fallbacks
fallback_info = get_chat_template_fallback(
model_type=model_config.hf_config.model_type,
)
path = (fallback_info.get_path(model_config.tokenizer)
if fallback_info is not None else None)
if path is not None and chat_template is None:
logger.info("Loading chat template fallback for %s as there isn't one "
"defined on HF Hub.", tokenizer.name_or_path)
chat_template = load_chat_template(path)
elif fallback_info is not None and fallback_info.override_exists and path is not None:
chat_template_to_override = load_chat_template(path)
if chat_template_to_override != chat_template:
chat_template = chat_template_to_override
logger.warning("Override existing chat template for %s as "
"its defined ones on HF Hub is not compatible "
"with OpenAI server.",
tokenizer.name_or_path)

else:
logger.debug("There is no chat template fallback for %s",
tokenizer.name_or_path)
Expand Down
5 changes: 3 additions & 2 deletions vllm/transformers_utils/chat_templates/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from .registry import get_chat_template_fallback_path
from .registry import (get_chat_template_fallback,
get_chat_template_fallback_path)

__all__ = ["get_chat_template_fallback_path"]
__all__ = ["get_chat_template_fallback", "get_chat_template_fallback_path"]
52 changes: 38 additions & 14 deletions vllm/transformers_utils/chat_templates/registry.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Optional, Union

Expand All @@ -20,41 +21,64 @@ def _get_qwen_chat_template_fallback(
return CHAT_TEMPLATES_DIR / "template_basic.jinja"


@dataclass(frozen=True)
class TemplateInfo:
path: ChatTemplatePath
override_exists: bool = False

def get_path(self, tokenizer_name_or_path: str) -> Optional[Path]:
if callable(self.path):
return self.path(tokenizer_name_or_path)

return self.path


# yapf: disable
_MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK: dict[str, ChatTemplatePath] = {
"blip-2": CHAT_TEMPLATES_DIR / "template_blip2.jinja",
"chameleon": CHAT_TEMPLATES_DIR / "template_basic.jinja",
"deepseek_vl_v2": CHAT_TEMPLATES_DIR / "template_deepseek_vl2.jinja",
"florence2": CHAT_TEMPLATES_DIR / "template_basic.jinja",
"fuyu": CHAT_TEMPLATES_DIR / "template_fuyu.jinja",
"paligemma": CHAT_TEMPLATES_DIR / "template_basic.jinja",
"qwen": _get_qwen_chat_template_fallback,
_MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK: dict[str, TemplateInfo] = {
"blip-2": TemplateInfo(CHAT_TEMPLATES_DIR / "template_blip2.jinja"),
"chameleon": TemplateInfo(CHAT_TEMPLATES_DIR / "template_basic.jinja"),
"deepseek_vl_v2": TemplateInfo(CHAT_TEMPLATES_DIR / "template_deepseek_vl2.jinja"), # noqa: E501
"florence2": TemplateInfo(CHAT_TEMPLATES_DIR / "template_basic.jinja"),
"fuyu": TemplateInfo(CHAT_TEMPLATES_DIR / "template_fuyu.jinja"),
"paligemma": TemplateInfo(CHAT_TEMPLATES_DIR / "template_basic.jinja"),
"qwen": TemplateInfo(_get_qwen_chat_template_fallback),
"qwen2_audio": TemplateInfo(CHAT_TEMPLATES_DIR / "template_qwen2_audio.jinja", # noqa: E501
override_exists=True),
}
# yapf: enable


def register_chat_template_fallback_path(
model_type: str,
chat_template: ChatTemplatePath,
override_exists: bool = False,
) -> None:
if model_type in _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK:
logger.warning(
"Model type %s already has a chat template registered. "
"It will be overwritten by the new chat template %s.", model_type,
chat_template)

_MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK[model_type] = chat_template
_MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK[model_type] = TemplateInfo(
chat_template, override_exists=override_exists)


def get_chat_template_fallback(model_type: str) -> Optional[TemplateInfo]:
chat_template_info = _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK.get(model_type)

if chat_template_info is None:
return None

return chat_template_info


def get_chat_template_fallback_path(
model_type: str,
tokenizer_name_or_path: str,
) -> Optional[Path]:
chat_template = _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK.get(model_type)
if callable(chat_template):
chat_template = chat_template(tokenizer_name_or_path)
chat_template_info = get_chat_template_fallback(model_type)

if chat_template is None:
if chat_template_info is None:
return None

return chat_template
return chat_template_info.get_path(tokenizer_name_or_path)
Comment on lines +79 to +84
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider simplifying this function by directly returning the result of get_chat_template_fallback(model_type). This eliminates the need for the chat_template_info variable and the subsequent conditional checks, making the code more concise.

Suggested change
chat_template_info = get_chat_template_fallback(model_type)
if chat_template is None:
if chat_template_info is None:
return None
return chat_template
return chat_template_info.get_path(tokenizer_name_or_path)
def get_chat_template_fallback_path(
model_type: str,
tokenizer_name_or_path: str,
) -> Optional[Path]:
return get_chat_template_fallback(model_type).get_path(tokenizer_name_or_path) if get_chat_template_fallback(model_type) else None

24 changes: 24 additions & 0 deletions vllm/transformers_utils/chat_templates/template_qwen2_audio.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{%- set audio_count = namespace(value=0) -%}
{%- for message in messages -%}
{%- if loop.first and message['role'] != 'system' -%}
{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
{%- endif -%}
{{ '<|im_start|>' + message['role'] + '\n' }}
{%- if message['content'] is string -%}
{{ message['content'] + '<|im_end|>\n' }}
{%- else -%}
{%- for content in message['content'] -%}
{%- if 'audio' in content or 'audio_url' in content or message['type'] == 'audio' or content['type'] == 'audio' -%}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The condition message['type'] == 'audio' is likely incorrect, as message refers to the entire message object (e.g., {'role': 'user', 'content': [...]}) which typically doesn't have a type key. The check should be on the content part. Also, the check content['type'] == 'audio' is duplicated. A more correct and robust condition would be to check for content.get('type') == 'audio' and remove the other checks.

            {%- if 'audio' in content or 'audio_url' in content or content.get('type') == 'audio' -%}

{%- set audio_count.value = audio_count.value + 1 -%}
{{ 'Audio ' + audio_count.value|string + ': <|audio_bos|><|AUDIO|><|audio_eos|>\n' }}
{%- elif 'text' in content -%}
{{ content['text'] }}
{%- endif -%}
{%- endfor -%}
{{ '<|im_end|>\n' }}
{%- endif -%}
{%- endfor -%}

{%- if add_generation_prompt -%}
{{ '<|im_start|>assistant\n' }}
{%- endif -%}