2
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
3
4
4
import asyncio
5
+ import json
5
6
import time
6
7
from collections .abc import AsyncGenerator , AsyncIterator
7
8
from http import HTTPStatus
8
9
from typing import Callable , Final , Optional , Union
9
10
10
11
import jinja2
11
12
from fastapi import Request
12
- from openai .types .responses import ResponseOutputMessage , ResponseOutputText
13
+ from openai .types .responses import (ResponseFunctionToolCall ,
14
+ ResponseOutputMessage , ResponseOutputText ,
15
+ ToolChoiceFunction )
16
+ from pydantic import TypeAdapter
13
17
14
18
from vllm .config import ModelConfig
15
19
from vllm .engine .protocol import EngineClient
18
22
from vllm .entrypoints .logger import RequestLogger
19
23
# yapf conflicts with isort for this block
20
24
# yapf: disable
21
- from vllm .entrypoints .openai .protocol import (ErrorResponse ,
25
+ from vllm .entrypoints .openai .protocol import (ErrorResponse , FunctionCall ,
26
+ FunctionDefinition ,
22
27
PromptTokenUsageInfo ,
23
28
RequestResponseMetadata ,
24
29
ResponseReasoningItem ,
27
32
# yapf: enable
28
33
from vllm .entrypoints .openai .serving_engine import OpenAIServing
29
34
from vllm .entrypoints .openai .serving_models import OpenAIServingModels
35
+ from vllm .entrypoints .openai .tool_parsers import ToolParser , ToolParserManager
30
36
from vllm .logger import init_logger
31
37
from vllm .outputs import RequestOutput
32
38
from vllm .reasoning import ReasoningParser , ReasoningParserManager
33
39
from vllm .sampling_params import SamplingParams
34
40
from vllm .transformers_utils .tokenizer import AnyTokenizer
35
- from vllm .utils import random_uuid
41
+ from vllm .utils import random_fc_uuid , random_uuid
36
42
37
43
logger = init_logger (__name__ )
38
44
@@ -64,7 +70,18 @@ def __init__(
64
70
return_tokens_as_token_ids = return_tokens_as_token_ids ,
65
71
enable_force_include_usage = enable_force_include_usage ,
66
72
)
67
-
73
+ self .enable_auto_tools = enable_auto_tools
74
+ self .expand_tools_even_if_tool_choice_none = (
75
+ expand_tools_even_if_tool_choice_none )
76
+ self .tool_parser : Optional [Callable [[AnyTokenizer ], ToolParser ]] = None
77
+ if self .enable_auto_tools :
78
+ try :
79
+ self .tool_parser = ToolParserManager .get_tool_parser (
80
+ tool_parser )
81
+ except Exception as e :
82
+ raise TypeError ("Error: --enable-auto-tool-choice requires "
83
+ f"tool_parser:'{ tool_parser } ' which has not "
84
+ "been registered" ) from e
68
85
self .chat_template = chat_template
69
86
self .chat_template_content_format : Final = chat_template_content_format
70
87
@@ -140,11 +157,30 @@ async def create_responses(
140
157
) = self ._maybe_get_adapters (request )
141
158
model_name = self ._get_model_name (request .model , lora_request )
142
159
tokenizer = await self .engine_client .get_tokenizer (lora_request )
143
-
160
+ if request .tools is None :
161
+ tool_dicts = None
162
+ elif (request .tool_choice == "none"
163
+ and not self .expand_tools_even_if_tool_choice_none ):
164
+ if len (request .tools ) > 0 :
165
+ logger .warning_once (
166
+ "Tools are specified but tool_choice is set to 'none' "
167
+ "and --expand-tools-even-if-tool-choice-none is not "
168
+ "enabled. Tool definitions will be excluded from the "
169
+ "prompt. This behavior will change in vLLM v0.10 where "
170
+ "tool definitions will be included by default even "
171
+ "with tool_choice='none'. To adopt the new behavior "
172
+ "now, use --expand-tools-even-if-tool-choice-none. "
173
+ "To suppress this warning, either remove tools from "
174
+ "the request or set tool_choice to a different value." )
175
+ tool_dicts = None
176
+ else :
177
+ tool_dicts = [tool .model_dump () for tool in request .tools ]
144
178
_ , request_prompts , engine_prompts = await self ._preprocess_chat (
145
179
request ,
146
180
tokenizer ,
147
181
messages ,
182
+ tool_dicts = tool_dicts ,
183
+ tool_parser = self .tool_parser ,
148
184
chat_template = self .chat_template ,
149
185
chat_template_content_format = self .chat_template_content_format ,
150
186
)
@@ -288,28 +324,82 @@ async def responses_full_generator(
288
324
reasoning_content = None
289
325
content = final_output .text
290
326
291
- output = []
292
- if reasoning_content :
293
- reasoning_item = ResponseReasoningItem (
327
+ outputs = []
328
+ output = None
329
+ if self .tool_parser :
330
+ function_calls : list [FunctionCall ] = []
331
+ if request .tool_choice and \
332
+ isinstance (request .tool_choice ,
333
+ ToolChoiceFunction ):
334
+ # Forced Function Call
335
+ function_calls .append (
336
+ FunctionCall (name = request .tool_choice .name ,
337
+ arguments = content ))
338
+ elif request .tool_choice is None or request .tool_choice == "none" :
339
+ pass
340
+ elif request .tool_choice == "required" :
341
+ tool_calls = TypeAdapter (
342
+ list [FunctionDefinition ]).validate_json (content )
343
+ function_calls .extend ([
344
+ FunctionCall (name = tool_call .name ,
345
+ arguments = json .dumps (tool_call .parameters ,
346
+ ensure_ascii = False ))
347
+ for tool_call in tool_calls
348
+ ])
349
+ elif request .tool_choice == "auto" :
350
+ try :
351
+ tool_parser = self .tool_parser (tokenizer )
352
+ except RuntimeError as e :
353
+ logger .exception ("Error in tool parser creation." )
354
+ return self .create_error_response (str (e ))
355
+ tool_call_info = tool_parser .extract_tool_calls (
356
+ content if content is not None else "" , request = request )
357
+ if tool_call_info is not None and tool_call_info .tools_called :
358
+ function_calls .extend (
359
+ FunctionCall (
360
+ name = tool_call .function .name ,
361
+ arguments = tool_call .function .arguments ,
362
+ ) for tool_call in tool_call_info .tool_calls )
363
+ else :
364
+ logger .warning (
365
+ "Unknown tool choice: %s. "
366
+ "Using 'none' as the default tool choice." ,
367
+ request .tool_choice )
368
+ output = [
369
+ ResponseFunctionToolCall (
370
+ id = f"fc_{ random_fc_uuid ()} " ,
371
+ call_id = f"call_{ random_uuid ()} " ,
372
+ type = "function_call" ,
373
+ status = "completed" ,
374
+ name = tool_call .name ,
375
+ arguments = tool_call .arguments ,
376
+ ) for tool_call in function_calls
377
+ ]
378
+ # If no tool call is generated, we still need to return an output.
379
+ if reasoning_content and output is None :
380
+ output = ResponseReasoningItem (
294
381
text = reasoning_content ,
295
382
status = None , # NOTE: Only the last output item has status.
296
383
)
297
- output .append ( reasoning_item )
298
- if content :
384
+ # If no tool call is generated, we still need to return an output.
385
+ if content and output is None :
299
386
output_text = ResponseOutputText (
300
387
text = content ,
301
388
annotations = [], # TODO
302
389
type = "output_text" ,
303
390
logprobs = None , # TODO
304
391
)
305
- message = ResponseOutputMessage (
392
+ output = ResponseOutputMessage (
306
393
id = f"msg_{ random_uuid ()} " ,
307
394
content = [output_text ],
308
395
role = "assistant" ,
309
396
status = "completed" ,
310
397
type = "message" ,
311
398
)
312
- output .append (message )
399
+ if isinstance (output , list ):
400
+ outputs .extend (output )
401
+ else :
402
+ outputs .append (output )
313
403
314
404
# Calculate usage.
315
405
assert final_res .prompt_token_ids is not None
@@ -330,7 +420,7 @@ async def responses_full_generator(
330
420
sampling_params ,
331
421
model_name = model_name ,
332
422
created_time = created_time ,
333
- output = output ,
423
+ output = outputs ,
334
424
status = "completed" ,
335
425
usage = usage ,
336
426
)
0 commit comments