2
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
3
4
4
import asyncio
5
+ import json
5
6
import time
6
7
from collections .abc import AsyncGenerator , AsyncIterator
7
8
from http import HTTPStatus
8
9
from typing import Callable , Final , Optional , Union
9
10
10
11
import jinja2
11
12
from fastapi import Request
12
- from openai .types .responses import ResponseOutputMessage , ResponseOutputText
13
+ from openai .types .responses import (ResponseFunctionToolCall ,
14
+ ResponseOutputMessage , ResponseOutputText ,
15
+ ToolChoiceFunction )
16
+ from pydantic import TypeAdapter
13
17
14
18
from vllm .config import ModelConfig
15
19
from vllm .engine .protocol import EngineClient
18
22
from vllm .entrypoints .logger import RequestLogger
19
23
# yapf conflicts with isort for this block
20
24
# yapf: disable
21
- from vllm .entrypoints .openai .protocol import (ErrorResponse ,
25
+ from vllm .entrypoints .openai .protocol import (ErrorResponse , FunctionCall ,
26
+ FunctionDefinition ,
22
27
PromptTokenUsageInfo ,
23
28
RequestResponseMetadata ,
24
29
ResponseReasoningItem ,
27
32
# yapf: enable
28
33
from vllm .entrypoints .openai .serving_engine import OpenAIServing
29
34
from vllm .entrypoints .openai .serving_models import OpenAIServingModels
35
+ from vllm .entrypoints .openai .tool_parsers import ToolParser , ToolParserManager
30
36
from vllm .logger import init_logger
31
37
from vllm .outputs import RequestOutput
32
38
from vllm .reasoning import ReasoningParser , ReasoningParserManager
33
39
from vllm .sampling_params import SamplingParams
34
40
from vllm .transformers_utils .tokenizer import AnyTokenizer
35
- from vllm .utils import random_uuid
41
+ from vllm .utils import random_fc_uuid , random_uuid
36
42
37
43
logger = init_logger (__name__ )
38
44
@@ -63,7 +69,18 @@ def __init__(
63
69
return_tokens_as_token_ids = return_tokens_as_token_ids ,
64
70
enable_force_include_usage = enable_force_include_usage ,
65
71
)
66
-
72
+ self .enable_auto_tools = enable_auto_tools
73
+ self .expand_tools_even_if_tool_choice_none = (
74
+ expand_tools_even_if_tool_choice_none )
75
+ self .tool_parser : Optional [Callable [[AnyTokenizer ], ToolParser ]] = None
76
+ if self .enable_auto_tools :
77
+ try :
78
+ self .tool_parser = ToolParserManager .get_tool_parser (
79
+ tool_parser )
80
+ except Exception as e :
81
+ raise TypeError ("Error: --enable-auto-tool-choice requires "
82
+ f"tool_parser:'{ tool_parser } ' which has not "
83
+ "been registered" ) from e
67
84
self .chat_template = chat_template
68
85
self .chat_template_content_format : Final = chat_template_content_format
69
86
@@ -139,11 +156,30 @@ async def create_responses(
139
156
) = self ._maybe_get_adapters (request )
140
157
model_name = self ._get_model_name (request .model , lora_request )
141
158
tokenizer = await self .engine_client .get_tokenizer (lora_request )
142
-
159
+ if request .tools is None :
160
+ tool_dicts = None
161
+ elif (request .tool_choice == "none"
162
+ and not self .expand_tools_even_if_tool_choice_none ):
163
+ if len (request .tools ) > 0 :
164
+ logger .warning_once (
165
+ "Tools are specified but tool_choice is set to 'none' "
166
+ "and --expand-tools-even-if-tool-choice-none is not "
167
+ "enabled. Tool definitions will be excluded from the "
168
+ "prompt. This behavior will change in vLLM v0.10 where "
169
+ "tool definitions will be included by default even "
170
+ "with tool_choice='none'. To adopt the new behavior "
171
+ "now, use --expand-tools-even-if-tool-choice-none. "
172
+ "To suppress this warning, either remove tools from "
173
+ "the request or set tool_choice to a different value." )
174
+ tool_dicts = None
175
+ else :
176
+ tool_dicts = [tool .model_dump () for tool in request .tools ]
143
177
_ , request_prompts , engine_prompts = await self ._preprocess_chat (
144
178
request ,
145
179
tokenizer ,
146
180
messages ,
181
+ tool_dicts = tool_dicts ,
182
+ tool_parser = self .tool_parser ,
147
183
chat_template = self .chat_template ,
148
184
chat_template_content_format = self .chat_template_content_format ,
149
185
)
@@ -287,28 +323,82 @@ async def responses_full_generator(
287
323
reasoning_content = None
288
324
content = final_output .text
289
325
290
- output = []
291
- if reasoning_content :
292
- reasoning_item = ResponseReasoningItem (
326
+ outputs = []
327
+ output = None
328
+ if self .tool_parser :
329
+ function_calls : list [FunctionCall ] = []
330
+ if request .tool_choice and \
331
+ isinstance (request .tool_choice ,
332
+ ToolChoiceFunction ):
333
+ # Forced Function Call
334
+ function_calls .append (
335
+ FunctionCall (name = request .tool_choice .name ,
336
+ arguments = content ))
337
+ elif request .tool_choice is None or request .tool_choice == "none" :
338
+ pass
339
+ elif request .tool_choice == "required" :
340
+ tool_calls = TypeAdapter (
341
+ list [FunctionDefinition ]).validate_json (content )
342
+ function_calls .extend ([
343
+ FunctionCall (name = tool_call .name ,
344
+ arguments = json .dumps (tool_call .parameters ,
345
+ ensure_ascii = False ))
346
+ for tool_call in tool_calls
347
+ ])
348
+ elif request .tool_choice == "auto" :
349
+ try :
350
+ tool_parser = self .tool_parser (tokenizer )
351
+ except RuntimeError as e :
352
+ logger .exception ("Error in tool parser creation." )
353
+ return self .create_error_response (str (e ))
354
+ tool_call_info = tool_parser .extract_tool_calls (
355
+ content if content is not None else "" , request = request )
356
+ if tool_call_info is not None and tool_call_info .tools_called :
357
+ function_calls .extend (
358
+ FunctionCall (
359
+ name = tool_call .function .name ,
360
+ arguments = tool_call .function .arguments ,
361
+ ) for tool_call in tool_call_info .tool_calls )
362
+ else :
363
+ logger .warning (
364
+ "Unknown tool choice: %s. "
365
+ "Using 'none' as the default tool choice." ,
366
+ request .tool_choice )
367
+ output = [
368
+ ResponseFunctionToolCall (
369
+ id = f"fc_{ random_fc_uuid ()} " ,
370
+ call_id = f"call_{ random_uuid ()} " ,
371
+ type = "function_call" ,
372
+ status = "completed" ,
373
+ name = tool_call .name ,
374
+ arguments = tool_call .arguments ,
375
+ ) for tool_call in function_calls
376
+ ]
377
+ # If no tool call is generated, we still need to return an output.
378
+ if reasoning_content and output is None :
379
+ output = ResponseReasoningItem (
293
380
text = reasoning_content ,
294
381
status = None , # NOTE: Only the last output item has status.
295
382
)
296
- output .append ( reasoning_item )
297
- if content :
383
+ # If no tool call is generated, we still need to return an output.
384
+ if content and output is None :
298
385
output_text = ResponseOutputText (
299
386
text = content ,
300
387
annotations = [], # TODO
301
388
type = "output_text" ,
302
389
logprobs = None , # TODO
303
390
)
304
- message = ResponseOutputMessage (
391
+ output = ResponseOutputMessage (
305
392
id = f"msg_{ random_uuid ()} " ,
306
393
content = [output_text ],
307
394
role = "assistant" ,
308
395
status = "completed" ,
309
396
type = "message" ,
310
397
)
311
- output .append (message )
398
+ if isinstance (output , list ):
399
+ outputs .extend (output )
400
+ else :
401
+ outputs .append (output )
312
402
313
403
# Calculate usage.
314
404
assert final_res .prompt_token_ids is not None
@@ -329,7 +419,7 @@ async def responses_full_generator(
329
419
sampling_params ,
330
420
model_name = model_name ,
331
421
created_time = created_time ,
332
- output = output ,
422
+ output = outputs ,
333
423
status = "completed" ,
334
424
usage = usage ,
335
425
)
0 commit comments