Skip to content

Commit dfac70c

Browse files
committed
[Frontend] OpenAI Responses API supports Tool/Function calling
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
1 parent 5bac613 commit dfac70c

File tree

4 files changed

+211
-28
lines changed

4 files changed

+211
-28
lines changed

vllm/entrypoints/openai/protocol.py

Lines changed: 98 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
from openai.types.chat.chat_completion_message import (
1818
Annotation as OpenAIAnnotation)
1919
# yapf: enable
20-
from openai.types.responses import (ResponseInputParam, ResponseOutputItem,
20+
from openai.types.responses import (ResponseFunctionToolCall,
21+
ResponseInputParam, ResponseOutputItem,
2122
ResponseOutputMessage, ResponsePrompt,
22-
ResponseStatus, ResponseTextConfig)
23+
ResponseStatus, ResponseTextConfig,
24+
ToolChoiceFunction)
2325
from openai.types.responses.response import ToolChoice
2426
from openai.types.responses.tool import Tool
2527
from openai.types.shared import Metadata, Reasoning
@@ -315,16 +317,7 @@ def to_sampling_params(
315317
top_p = default_sampling_params.get(
316318
"top_p", self._DEFAULT_SAMPLING_PARAMS["top_p"])
317319

318-
# Structured output
319-
guided_decoding = None
320-
if self.text is not None and self.text.format is not None:
321-
response_format = self.text.format
322-
if response_format.type == "json_schema":
323-
guided_decoding = GuidedDecodingParams.from_optional(
324-
json=response_format.schema_)
325-
elif response_format.type == "json_object":
326-
raise NotImplementedError("json_object is not supported")
327-
320+
guided_decoding = self._get_guided_decoding()
328321
# TODO: add more parameters
329322
return SamplingParams.from_optional(
330323
temperature=temperature,
@@ -351,6 +344,97 @@ def validate_prompt(cls, data):
351344
raise ValueError("prompt template is not supported")
352345
return data
353346

347+
def _get_guided_json_from_tool(
348+
self) -> Optional[Union[str, dict, BaseModel]]:
349+
print(
350+
f"Tool choice: {self.tool_choice}, type: {type(self.tool_choice)}")
351+
# user has chosen to use a named tool
352+
if type(self.tool_choice) is ToolChoiceFunction:
353+
tool_name = self.tool_choice.name
354+
tools = {tool.name: tool for tool in \
355+
self.tools if tool.type == "function"}
356+
if tool_name not in tools:
357+
raise ValueError(
358+
f"Tool '{tool_name}' has not been passed in `tools`.")
359+
tool = tools[tool_name]
360+
print(f"Using tool '{tool_name}' for guided json decoding.")
361+
print(f"Tool parameters: {tool.parameters}")
362+
return tool.parameters
363+
364+
if self.tool_choice == "required":
365+
# Pydantic schema generation cannot be used since the JSON schema
366+
# has to be constructed for a specific instantiation of a tool list
367+
# so that parameters of a function are correctly generated
368+
# based on the chosen function name
369+
def get_tool_schema(tool: ToolChoiceFunction) -> dict:
370+
return {
371+
"properties": {
372+
"name": {
373+
"type": "string",
374+
"enum": [tool.name]
375+
},
376+
# parameters are always generated as '{}' in the final
377+
# output if they are missing from the request
378+
# (i.e. are None or '{}') so the schema is
379+
# updated to produce an empty object in that case
380+
"parameters": tool.parameters if tool.parameters else {
381+
"type": "object",
382+
"properties": {}
383+
}
384+
},
385+
"required": ["name", "parameters"]
386+
}
387+
388+
def get_tool_schema_defs(tools: list[ToolChoiceFunction]) -> dict:
389+
all_defs = dict[str, dict[str, Any]]()
390+
for tool in tools:
391+
if tool.parameters is None:
392+
continue
393+
defs = tool.parameters.pop("$defs", {})
394+
for def_name, def_schema in defs.items():
395+
if def_name in all_defs and all_defs[
396+
def_name] != def_schema:
397+
raise ValueError(
398+
f"Tool definition '{def_name}' has "
399+
"multiple schemas, which is not "
400+
"supported.")
401+
else:
402+
all_defs[def_name] = def_schema
403+
return all_defs
404+
405+
json_schema = {
406+
"type": "array",
407+
"minItems": 1,
408+
"items": {
409+
"type": "object",
410+
"anyOf": [get_tool_schema(tool) for tool in self.tools]
411+
}
412+
}
413+
json_schema_defs = get_tool_schema_defs(self.tools)
414+
if json_schema_defs:
415+
json_schema["$defs"] = json_schema_defs
416+
print("Using tool choice 'required' for guided json decoding.")
417+
print(f"JSON schema: {json_schema}")
418+
return json_schema
419+
420+
return None
421+
422+
def _get_guided_decoding(self) -> Optional[GuidedDecodingParams]:
423+
# Structured output
424+
guided_decoding = None
425+
if self.text is not None and self.text.format is not None:
426+
response_format = self.text.format
427+
if response_format.type == "json_schema":
428+
guided_decoding = GuidedDecodingParams.from_optional(
429+
json=response_format.schema_)
430+
elif response_format.type == "json_object":
431+
raise NotImplementedError("json_object is not supported")
432+
# Function call
433+
elif self.tool_choice != "none" or self.tools is not None:
434+
guided_decoding = GuidedDecodingParams.from_optional(
435+
json=self._get_guided_json_from_tool())
436+
return guided_decoding
437+
354438

355439
class ChatCompletionRequest(OpenAIBaseModel):
356440
# Ordered by official OpenAI API documentation
@@ -1673,7 +1757,8 @@ class ResponsesResponse(OpenAIBaseModel):
16731757
metadata: Optional[Metadata] = None
16741758
model: str
16751759
object: Literal["response"] = "response"
1676-
output: list[Union[ResponseOutputMessage, ResponseReasoningItem]]
1760+
output: list[Union[ResponseOutputMessage, ResponseReasoningItem,
1761+
ResponseFunctionToolCall]]
16771762
parallel_tool_calls: bool
16781763
temperature: float
16791764
tool_choice: ToolChoice

vllm/entrypoints/openai/serving_engine.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -899,8 +899,11 @@ async def _preprocess_chat(
899899
request, "tool_choice") and request.tool_choice != "none")
900900

901901
if should_parse_tools:
902-
if not isinstance(request, ChatCompletionRequest):
903-
msg = "Tool usage is only supported for Chat Completions API"
902+
if not isinstance(request,
903+
ChatCompletionRequest) and not isinstance(
904+
request, ResponsesRequest):
905+
msg = "Tool usage is only supported for Chat Completions API " \
906+
"and Responses API requests."
904907
raise NotImplementedError(msg)
905908

906909
request = tool_parser(tokenizer).adjust_request( # type: ignore

vllm/entrypoints/openai/serving_responses.py

Lines changed: 103 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,18 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

44
import asyncio
5+
import json
56
import time
67
from collections.abc import AsyncGenerator, AsyncIterator
78
from http import HTTPStatus
89
from typing import Callable, Final, Optional, Union
910

1011
import jinja2
1112
from fastapi import Request
12-
from openai.types.responses import ResponseOutputMessage, ResponseOutputText
13+
from openai.types.responses import (ResponseFunctionToolCall,
14+
ResponseOutputMessage, ResponseOutputText,
15+
ToolChoiceFunction)
16+
from pydantic import TypeAdapter
1317

1418
from vllm.config import ModelConfig
1519
from vllm.engine.protocol import EngineClient
@@ -18,7 +22,8 @@
1822
from vllm.entrypoints.logger import RequestLogger
1923
# yapf conflicts with isort for this block
2024
# yapf: disable
21-
from vllm.entrypoints.openai.protocol import (ErrorResponse,
25+
from vllm.entrypoints.openai.protocol import (ErrorResponse, FunctionCall,
26+
FunctionDefinition,
2227
PromptTokenUsageInfo,
2328
RequestResponseMetadata,
2429
ResponseReasoningItem,
@@ -27,12 +32,13 @@
2732
# yapf: enable
2833
from vllm.entrypoints.openai.serving_engine import OpenAIServing
2934
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
35+
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
3036
from vllm.logger import init_logger
3137
from vllm.outputs import RequestOutput
3238
from vllm.reasoning import ReasoningParser, ReasoningParserManager
3339
from vllm.sampling_params import SamplingParams
3440
from vllm.transformers_utils.tokenizer import AnyTokenizer
35-
from vllm.utils import random_uuid
41+
from vllm.utils import random_fc_uuid, random_uuid
3642

3743
logger = init_logger(__name__)
3844

@@ -63,7 +69,18 @@ def __init__(
6369
return_tokens_as_token_ids=return_tokens_as_token_ids,
6470
enable_force_include_usage=enable_force_include_usage,
6571
)
66-
72+
self.enable_auto_tools = enable_auto_tools
73+
self.expand_tools_even_if_tool_choice_none = (
74+
expand_tools_even_if_tool_choice_none)
75+
self.tool_parser: Optional[Callable[[AnyTokenizer], ToolParser]] = None
76+
if self.enable_auto_tools:
77+
try:
78+
self.tool_parser = ToolParserManager.get_tool_parser(
79+
tool_parser)
80+
except Exception as e:
81+
raise TypeError("Error: --enable-auto-tool-choice requires "
82+
f"tool_parser:'{tool_parser}' which has not "
83+
"been registered") from e
6784
self.chat_template = chat_template
6885
self.chat_template_content_format: Final = chat_template_content_format
6986

@@ -139,11 +156,30 @@ async def create_responses(
139156
) = self._maybe_get_adapters(request)
140157
model_name = self._get_model_name(request.model, lora_request)
141158
tokenizer = await self.engine_client.get_tokenizer(lora_request)
142-
159+
if request.tools is None:
160+
tool_dicts = None
161+
elif (request.tool_choice == "none"
162+
and not self.expand_tools_even_if_tool_choice_none):
163+
if len(request.tools) > 0:
164+
logger.warning_once(
165+
"Tools are specified but tool_choice is set to 'none' "
166+
"and --expand-tools-even-if-tool-choice-none is not "
167+
"enabled. Tool definitions will be excluded from the "
168+
"prompt. This behavior will change in vLLM v0.10 where "
169+
"tool definitions will be included by default even "
170+
"with tool_choice='none'. To adopt the new behavior "
171+
"now, use --expand-tools-even-if-tool-choice-none. "
172+
"To suppress this warning, either remove tools from "
173+
"the request or set tool_choice to a different value.")
174+
tool_dicts = None
175+
else:
176+
tool_dicts = [tool.model_dump() for tool in request.tools]
143177
_, request_prompts, engine_prompts = await self._preprocess_chat(
144178
request,
145179
tokenizer,
146180
messages,
181+
tool_dicts=tool_dicts,
182+
tool_parser=self.tool_parser,
147183
chat_template=self.chat_template,
148184
chat_template_content_format=self.chat_template_content_format,
149185
)
@@ -287,28 +323,82 @@ async def responses_full_generator(
287323
reasoning_content = None
288324
content = final_output.text
289325

290-
output = []
291-
if reasoning_content:
292-
reasoning_item = ResponseReasoningItem(
326+
outputs = []
327+
output = None
328+
if self.tool_parser:
329+
function_calls: list[FunctionCall] = []
330+
if request.tool_choice and \
331+
isinstance(request.tool_choice,
332+
ToolChoiceFunction):
333+
# Forced Function Call
334+
function_calls.append(
335+
FunctionCall(name=request.tool_choice.name,
336+
arguments=content))
337+
elif request.tool_choice is None or request.tool_choice == "none":
338+
pass
339+
elif request.tool_choice == "required":
340+
tool_calls = TypeAdapter(
341+
list[FunctionDefinition]).validate_json(content)
342+
function_calls.extend([
343+
FunctionCall(name=tool_call.name,
344+
arguments=json.dumps(tool_call.parameters,
345+
ensure_ascii=False))
346+
for tool_call in tool_calls
347+
])
348+
elif request.tool_choice == "auto":
349+
try:
350+
tool_parser = self.tool_parser(tokenizer)
351+
except RuntimeError as e:
352+
logger.exception("Error in tool parser creation.")
353+
return self.create_error_response(str(e))
354+
tool_call_info = tool_parser.extract_tool_calls(
355+
content if content is not None else "", request=request)
356+
if tool_call_info is not None and tool_call_info.tools_called:
357+
function_calls.extend(
358+
FunctionCall(
359+
name=tool_call.function.name,
360+
arguments=tool_call.function.arguments,
361+
) for tool_call in tool_call_info.tool_calls)
362+
else:
363+
logger.warning(
364+
"Unknown tool choice: %s. "
365+
"Using 'none' as the default tool choice.",
366+
request.tool_choice)
367+
output = [
368+
ResponseFunctionToolCall(
369+
id=f"fc_{random_fc_uuid()}",
370+
call_id=f"call_{random_uuid()}",
371+
type="function_call",
372+
status="completed",
373+
name=tool_call.name,
374+
arguments=tool_call.arguments,
375+
) for tool_call in function_calls
376+
]
377+
# If no tool call is generated, we still need to return an output.
378+
if reasoning_content and output is None:
379+
output = ResponseReasoningItem(
293380
text=reasoning_content,
294381
status=None, # NOTE: Only the last output item has status.
295382
)
296-
output.append(reasoning_item)
297-
if content:
383+
# If no tool call is generated, we still need to return an output.
384+
if content and output is None:
298385
output_text = ResponseOutputText(
299386
text=content,
300387
annotations=[], # TODO
301388
type="output_text",
302389
logprobs=None, # TODO
303390
)
304-
message = ResponseOutputMessage(
391+
output = ResponseOutputMessage(
305392
id=f"msg_{random_uuid()}",
306393
content=[output_text],
307394
role="assistant",
308395
status="completed",
309396
type="message",
310397
)
311-
output.append(message)
398+
if isinstance(output, list):
399+
outputs.extend(output)
400+
else:
401+
outputs.append(output)
312402

313403
# Calculate usage.
314404
assert final_res.prompt_token_ids is not None
@@ -329,7 +419,7 @@ async def responses_full_generator(
329419
sampling_params,
330420
model_name=model_name,
331421
created_time=created_time,
332-
output=output,
422+
output=outputs,
333423
status="completed",
334424
usage=usage,
335425
)

vllm/utils/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,11 @@ def random_uuid() -> str:
510510
return str(uuid.uuid4().hex)
511511

512512

513+
def random_fc_uuid() -> str:
514+
"""Generates a random UUID for function call tool outputs."""
515+
return str(os.urandom(24).hex())
516+
517+
513518
class AsyncMicrobatchTokenizer:
514519
"""Asynchronous tokenizer with micro-batching.
515520

0 commit comments

Comments
 (0)