Skip to content

Commit 99f536f

Browse files
authored
[Misc] Enhance warning information to user-defined chat template (#15408)
Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>
1 parent 5ebf667 commit 99f536f

File tree

3 files changed

+55
-22
lines changed

3 files changed

+55
-22
lines changed

tests/entrypoints/test_chat_utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99

1010
from vllm.assets.image import ImageAsset
1111
from vllm.config import ModelConfig
12-
from vllm.entrypoints.chat_utils import (_resolve_hf_chat_template,
13-
_try_extract_ast, load_chat_template,
12+
from vllm.entrypoints.chat_utils import (_try_extract_ast, load_chat_template,
1413
parse_chat_messages,
1514
parse_chat_messages_futures,
16-
resolve_chat_template_content_format)
15+
resolve_chat_template_content_format,
16+
resolve_hf_chat_template)
1717
from vllm.entrypoints.llm import apply_hf_chat_template
1818
from vllm.multimodal import MultiModalDataDict
1919
from vllm.multimodal.utils import encode_image_base64
@@ -747,7 +747,7 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools):
747747
}] if use_tools else None
748748

749749
# Test detecting the tokenizer's chat_template
750-
chat_template = _resolve_hf_chat_template(
750+
chat_template = resolve_hf_chat_template(
751751
tokenizer,
752752
chat_template=None,
753753
tools=tools,
@@ -781,7 +781,7 @@ def test_resolve_content_format_hf_defined(model, expected_format):
781781
tokenizer = tokenizer_group.tokenizer
782782

783783
# Test detecting the tokenizer's chat_template
784-
chat_template = _resolve_hf_chat_template(
784+
chat_template = resolve_hf_chat_template(
785785
tokenizer,
786786
chat_template=None,
787787
tools=None,

vllm/entrypoints/chat_utils.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,24 @@ def _detect_content_format(
306306
return "openai"
307307

308308

309-
def _resolve_hf_chat_template(
309+
def resolve_mistral_chat_template(
310+
chat_template: Optional[str],
311+
**kwargs: Any,
312+
) -> Optional[str]:
313+
if chat_template is not None:
314+
logger.warning_once(
315+
"'chat_template' cannot be overridden for mistral tokenizer.")
316+
if "add_generation_prompt" in kwargs:
317+
logger.warning_once(
318+
"'add_generation_prompt' is not supported for mistral tokenizer, "
319+
"so it will be ignored.")
320+
if "continue_final_message" in kwargs:
321+
logger.warning_once(
322+
"'continue_final_message' is not supported for mistral tokenizer, "
323+
"so it will be ignored.")
324+
return None
325+
326+
def resolve_hf_chat_template(
310327
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
311328
chat_template: Optional[str],
312329
tools: Optional[list[dict[str, Any]]],
@@ -352,7 +369,7 @@ def _resolve_chat_template_content_format(
352369
trust_remote_code: bool,
353370
) -> _ChatTemplateContentFormat:
354371
if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
355-
hf_chat_template = _resolve_hf_chat_template(
372+
hf_chat_template = resolve_hf_chat_template(
356373
tokenizer,
357374
chat_template=chat_template,
358375
trust_remote_code=trust_remote_code,
@@ -1140,7 +1157,7 @@ def apply_hf_chat_template(
11401157
tokenize: bool = False, # Different from HF's default
11411158
**kwargs: Any,
11421159
) -> str:
1143-
hf_chat_template = _resolve_hf_chat_template(
1160+
hf_chat_template = resolve_hf_chat_template(
11441161
tokenizer,
11451162
chat_template=chat_template,
11461163
tools=tools,
@@ -1169,17 +1186,12 @@ def apply_mistral_chat_template(
11691186
tools: Optional[list[dict[str, Any]]],
11701187
**kwargs: Any,
11711188
) -> list[int]:
1172-
if chat_template is not None:
1173-
logger.warning_once(
1174-
"'chat_template' cannot be overridden for mistral tokenizer.")
1175-
if "add_generation_prompt" in kwargs:
1176-
logger.warning_once(
1177-
"'add_generation_prompt' is not supported for mistral tokenizer, "
1178-
"so it will be ignored.")
1179-
if "continue_final_message" in kwargs:
1180-
logger.warning_once(
1181-
"'continue_final_message' is not supported for mistral tokenizer, "
1182-
"so it will be ignored.")
1189+
# The return value of resolve_mistral_chat_template is always None,
1190+
# and we won't use it.
1191+
resolve_mistral_chat_template(
1192+
chat_template=chat_template,
1193+
**kwargs,
1194+
)
11831195

11841196
return tokenizer.apply_chat_template(
11851197
messages=messages,

vllm/entrypoints/openai/api_server.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
from vllm.engine.multiprocessing.client import MQLLMEngineClient
3636
from vllm.engine.multiprocessing.engine import run_mp_engine
3737
from vllm.engine.protocol import EngineClient
38-
from vllm.entrypoints.chat_utils import load_chat_template
38+
from vllm.entrypoints.chat_utils import (load_chat_template,
39+
resolve_hf_chat_template,
40+
resolve_mistral_chat_template)
3941
from vllm.entrypoints.launcher import serve_http
4042
from vllm.entrypoints.logger import RequestLogger
4143
from vllm.entrypoints.openai.cli_args import (make_arg_parser,
@@ -84,6 +86,7 @@
8486
from vllm.logger import init_logger
8587
from vllm.transformers_utils.config import (
8688
maybe_register_config_serialize_by_value)
89+
from vllm.transformers_utils.tokenizer import MistralTokenizer
8790
from vllm.usage.usage_lib import UsageContext
8891
from vllm.utils import (Device, FlexibleArgumentParser, get_open_zmq_ipc_path,
8992
is_valid_ipv6_address, set_ulimit)
@@ -883,8 +886,26 @@ async def init_app_state(
883886

884887
resolved_chat_template = load_chat_template(args.chat_template)
885888
if resolved_chat_template is not None:
886-
logger.info("Using supplied chat template:\n%s",
887-
resolved_chat_template)
889+
# Get the tokenizer to check official template
890+
tokenizer = await engine_client.get_tokenizer()
891+
892+
if isinstance(tokenizer, MistralTokenizer):
893+
# The warning is logged in resolve_mistral_chat_template.
894+
resolved_chat_template = resolve_mistral_chat_template(
895+
chat_template=resolved_chat_template)
896+
else:
897+
hf_chat_template = resolve_hf_chat_template(
898+
tokenizer,
899+
chat_template=None,
900+
tools=None,
901+
trust_remote_code=model_config.trust_remote_code)
902+
903+
if hf_chat_template != resolved_chat_template:
904+
logger.warning(
905+
"Using supplied chat template: %s\n"
906+
"It is different from official chat template '%s'. "
907+
"This discrepancy may lead to performance degradation.",
908+
resolved_chat_template, args.model)
888909

889910
state.openai_serving_models = OpenAIServingModels(
890911
engine_client=engine_client,

0 commit comments

Comments
 (0)