From c7efd014f9713e7c9d4b4a099a56161a341bace2 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Sun, 13 Jul 2025 07:59:15 +0000 Subject: [PATCH 01/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 113 +++++++++++++++--- vllm/entrypoints/openai/serving_engine.py | 7 +- vllm/entrypoints/openai/serving_responses.py | 116 ++++++++++++++++--- vllm/utils/__init__.py | 5 + 4 files changed, 212 insertions(+), 29 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index f17faa23d01..8c3da89f8ad 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -17,9 +17,11 @@ from openai.types.chat.chat_completion_message import ( Annotation as OpenAIAnnotation) # yapf: enable -from openai.types.responses import (ResponseInputParam, ResponseOutputItem, +from openai.types.responses import (ResponseFunctionToolCall, + ResponseInputParam, ResponseOutputItem, ResponseOutputMessage, ResponsePrompt, - ResponseStatus, ResponseTextConfig) + ResponseStatus, ResponseTextConfig, + ToolChoiceFunction) from openai.types.responses.response import ToolChoice from openai.types.responses.tool import Tool from openai.types.shared import Metadata, Reasoning @@ -324,16 +326,7 @@ def to_sampling_params( top_p = default_sampling_params.get( "top_p", self._DEFAULT_SAMPLING_PARAMS["top_p"]) - # Structured output - guided_decoding = None - if self.text is not None and self.text.format is not None: - response_format = self.text.format - if response_format.type == "json_schema": - guided_decoding = GuidedDecodingParams.from_optional( - json=response_format.schema_) - elif response_format.type == "json_object": - raise NotImplementedError("json_object is not supported") - + guided_decoding = self._get_guided_decoding() # TODO: add more parameters return SamplingParams.from_optional( temperature=temperature, @@ -360,7 +353,7 @@ def validate_prompt(cls, data): raise ValueError("prompt template is not supported") return data - @model_validator(mode="before") + @model_validator(mode="before") def check_cache_salt_support(cls, data): if data.get("cache_salt") is not None: if not envs.VLLM_USE_V1: @@ -373,6 +366,97 @@ def check_cache_salt_support(cls, data): "non-empty string if provided.") return data + def _get_guided_json_from_tool( + self) -> Optional[Union[str, dict, BaseModel]]: + print( + f"Tool choice: {self.tool_choice}, type: {type(self.tool_choice)}") + # user has chosen to use a named tool + if type(self.tool_choice) is ToolChoiceFunction: + tool_name = self.tool_choice.name + tools = {tool.name: tool for tool in \ + self.tools if tool.type == "function"} + if tool_name not in tools: + raise ValueError( + f"Tool '{tool_name}' has not been passed in `tools`.") + tool = tools[tool_name] + print(f"Using tool '{tool_name}' for guided json decoding.") + print(f"Tool parameters: {tool.parameters}") + return tool.parameters + + if self.tool_choice == "required": + # Pydantic schema generation cannot be used since the JSON schema + # has to be constructed for a specific instantiation of a tool list + # so that parameters of a function are correctly generated + # based on the chosen function name + def get_tool_schema(tool: ToolChoiceFunction) -> dict: + return { + "properties": { + "name": { + "type": "string", + "enum": [tool.name] + }, + # parameters are always generated as '{}' in the final + # output if they are missing from the request + # (i.e. are None or '{}') so the schema is + # updated to produce an empty object in that case + "parameters": tool.parameters if tool.parameters else { + "type": "object", + "properties": {} + } + }, + "required": ["name", "parameters"] + } + + def get_tool_schema_defs(tools: list[ToolChoiceFunction]) -> dict: + all_defs = dict[str, dict[str, Any]]() + for tool in tools: + if tool.parameters is None: + continue + defs = tool.parameters.pop("$defs", {}) + for def_name, def_schema in defs.items(): + if def_name in all_defs and all_defs[ + def_name] != def_schema: + raise ValueError( + f"Tool definition '{def_name}' has " + "multiple schemas, which is not " + "supported.") + else: + all_defs[def_name] = def_schema + return all_defs + + json_schema = { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "anyOf": [get_tool_schema(tool) for tool in self.tools] + } + } + json_schema_defs = get_tool_schema_defs(self.tools) + if json_schema_defs: + json_schema["$defs"] = json_schema_defs + print("Using tool choice 'required' for guided json decoding.") + print(f"JSON schema: {json_schema}") + return json_schema + + return None + + def _get_guided_decoding(self) -> Optional[GuidedDecodingParams]: + # Structured output + guided_decoding = None + if self.text is not None and self.text.format is not None: + response_format = self.text.format + if response_format.type == "json_schema": + guided_decoding = GuidedDecodingParams.from_optional( + json=response_format.schema_) + elif response_format.type == "json_object": + raise NotImplementedError("json_object is not supported") + # Function call + elif self.tool_choice != "none" or self.tools is not None: + guided_decoding = GuidedDecodingParams.from_optional( + json=self._get_guided_json_from_tool()) + return guided_decoding + class ChatCompletionRequest(OpenAIBaseModel): # Ordered by official OpenAI API documentation @@ -1719,7 +1803,8 @@ class ResponsesResponse(OpenAIBaseModel): metadata: Optional[Metadata] = None model: str object: Literal["response"] = "response" - output: list[Union[ResponseOutputMessage, ResponseReasoningItem]] + output: list[Union[ResponseOutputMessage, ResponseReasoningItem, + ResponseFunctionToolCall]] parallel_tool_calls: bool temperature: float tool_choice: ToolChoice diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 462317a0878..4530a79f747 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -908,8 +908,11 @@ async def _preprocess_chat( request, "tool_choice") and request.tool_choice != "none") if should_parse_tools: - if not isinstance(request, ChatCompletionRequest): - msg = "Tool usage is only supported for Chat Completions API" + if not isinstance(request, + ChatCompletionRequest) and not isinstance( + request, ResponsesRequest): + msg = "Tool usage is only supported for Chat Completions API " \ + "and Responses API requests." raise NotImplementedError(msg) request = tool_parser(tokenizer).adjust_request( # type: ignore diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index f7bde6e243b..f95ce28eab7 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import asyncio +import json import time from collections.abc import AsyncGenerator, AsyncIterator from http import HTTPStatus @@ -9,7 +10,10 @@ import jinja2 from fastapi import Request -from openai.types.responses import ResponseOutputMessage, ResponseOutputText +from openai.types.responses import (ResponseFunctionToolCall, + ResponseOutputMessage, ResponseOutputText, + ToolChoiceFunction) +from pydantic import TypeAdapter from vllm.config import ModelConfig from vllm.engine.protocol import EngineClient @@ -18,7 +22,8 @@ from vllm.entrypoints.logger import RequestLogger # yapf conflicts with isort for this block # yapf: disable -from vllm.entrypoints.openai.protocol import (ErrorResponse, +from vllm.entrypoints.openai.protocol import (ErrorResponse, FunctionCall, + FunctionDefinition, PromptTokenUsageInfo, RequestResponseMetadata, ResponseReasoningItem, @@ -27,12 +32,13 @@ # yapf: enable from vllm.entrypoints.openai.serving_engine import OpenAIServing from vllm.entrypoints.openai.serving_models import OpenAIServingModels +from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager from vllm.logger import init_logger from vllm.outputs import RequestOutput from vllm.reasoning import ReasoningParser, ReasoningParserManager from vllm.sampling_params import SamplingParams from vllm.transformers_utils.tokenizer import AnyTokenizer -from vllm.utils import random_uuid +from vllm.utils import random_fc_uuid, random_uuid logger = init_logger(__name__) @@ -63,7 +69,18 @@ def __init__( return_tokens_as_token_ids=return_tokens_as_token_ids, enable_force_include_usage=enable_force_include_usage, ) - + self.enable_auto_tools = enable_auto_tools + self.expand_tools_even_if_tool_choice_none = ( + expand_tools_even_if_tool_choice_none) + self.tool_parser: Optional[Callable[[AnyTokenizer], ToolParser]] = None + if self.enable_auto_tools: + try: + self.tool_parser = ToolParserManager.get_tool_parser( + tool_parser) + except Exception as e: + raise TypeError("Error: --enable-auto-tool-choice requires " + f"tool_parser:'{tool_parser}' which has not " + "been registered") from e self.chat_template = chat_template self.chat_template_content_format: Final = chat_template_content_format @@ -139,11 +156,30 @@ async def create_responses( ) = self._maybe_get_adapters(request) model_name = self._get_model_name(request.model, lora_request) tokenizer = await self.engine_client.get_tokenizer(lora_request) - + if request.tools is None: + tool_dicts = None + elif (request.tool_choice == "none" + and not self.expand_tools_even_if_tool_choice_none): + if len(request.tools) > 0: + logger.warning_once( + "Tools are specified but tool_choice is set to 'none' " + "and --expand-tools-even-if-tool-choice-none is not " + "enabled. Tool definitions will be excluded from the " + "prompt. This behavior will change in vLLM v0.10 where " + "tool definitions will be included by default even " + "with tool_choice='none'. To adopt the new behavior " + "now, use --expand-tools-even-if-tool-choice-none. " + "To suppress this warning, either remove tools from " + "the request or set tool_choice to a different value.") + tool_dicts = None + else: + tool_dicts = [tool.model_dump() for tool in request.tools] _, request_prompts, engine_prompts = await self._preprocess_chat( request, tokenizer, messages, + tool_dicts=tool_dicts, + tool_parser=self.tool_parser, chat_template=self.chat_template, chat_template_content_format=self.chat_template_content_format, ) @@ -287,28 +323,82 @@ async def responses_full_generator( reasoning_content = None content = final_output.text - output = [] - if reasoning_content: - reasoning_item = ResponseReasoningItem( + outputs = [] + output = None + if self.tool_parser: + function_calls: list[FunctionCall] = [] + if request.tool_choice and \ + isinstance(request.tool_choice, + ToolChoiceFunction): + # Forced Function Call + function_calls.append( + FunctionCall(name=request.tool_choice.name, + arguments=content)) + elif request.tool_choice is None or request.tool_choice == "none": + pass + elif request.tool_choice == "required": + tool_calls = TypeAdapter( + list[FunctionDefinition]).validate_json(content) + function_calls.extend([ + FunctionCall(name=tool_call.name, + arguments=json.dumps(tool_call.parameters, + ensure_ascii=False)) + for tool_call in tool_calls + ]) + elif request.tool_choice == "auto": + try: + tool_parser = self.tool_parser(tokenizer) + except RuntimeError as e: + logger.exception("Error in tool parser creation.") + return self.create_error_response(str(e)) + tool_call_info = tool_parser.extract_tool_calls( + content if content is not None else "", request=request) + if tool_call_info is not None and tool_call_info.tools_called: + function_calls.extend( + FunctionCall( + name=tool_call.function.name, + arguments=tool_call.function.arguments, + ) for tool_call in tool_call_info.tool_calls) + else: + logger.warning( + "Unknown tool choice: %s. " + "Using 'none' as the default tool choice.", + request.tool_choice) + output = [ + ResponseFunctionToolCall( + id=f"fc_{random_fc_uuid()}", + call_id=f"call_{random_uuid()}", + type="function_call", + status="completed", + name=tool_call.name, + arguments=tool_call.arguments, + ) for tool_call in function_calls + ] + # If no tool call is generated, we still need to return an output. + if reasoning_content and output is None: + output = ResponseReasoningItem( text=reasoning_content, status=None, # NOTE: Only the last output item has status. ) - output.append(reasoning_item) - if content: + # If no tool call is generated, we still need to return an output. + if content and output is None: output_text = ResponseOutputText( text=content, annotations=[], # TODO type="output_text", logprobs=None, # TODO ) - message = ResponseOutputMessage( + output = ResponseOutputMessage( id=f"msg_{random_uuid()}", content=[output_text], role="assistant", status="completed", type="message", ) - output.append(message) + if isinstance(output, list): + outputs.extend(output) + else: + outputs.append(output) # Calculate usage. assert final_res.prompt_token_ids is not None @@ -329,7 +419,7 @@ async def responses_full_generator( sampling_params, model_name=model_name, created_time=created_time, - output=output, + output=outputs, status="completed", usage=usage, ) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index c18f1d12ba9..d09edfb5efe 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -510,6 +510,11 @@ def random_uuid() -> str: return str(uuid.uuid4().hex) +def random_fc_uuid() -> str: + """Generates a random UUID for function call tool outputs.""" + return str(os.urandom(24).hex()) + + class AsyncMicrobatchTokenizer: """Asynchronous tokenizer with micro-batching. From ad0994ebd3348e290cc21215e1d86a5cc79f8c26 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Sun, 13 Jul 2025 08:11:25 +0000 Subject: [PATCH 02/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 6 +---- vllm/entrypoints/openai/serving_responses.py | 22 ++++++++++--------- .../tool_parsers/abstract_tool_parser.py | 13 ++++++----- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 8c3da89f8ad..3ed59ea6400 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -368,10 +368,8 @@ def check_cache_salt_support(cls, data): def _get_guided_json_from_tool( self) -> Optional[Union[str, dict, BaseModel]]: - print( - f"Tool choice: {self.tool_choice}, type: {type(self.tool_choice)}") # user has chosen to use a named tool - if type(self.tool_choice) is ToolChoiceFunction: + if isinstance(self.tool_choice, ToolChoiceFunction): tool_name = self.tool_choice.name tools = {tool.name: tool for tool in \ self.tools if tool.type == "function"} @@ -435,8 +433,6 @@ def get_tool_schema_defs(tools: list[ToolChoiceFunction]) -> dict: json_schema_defs = get_tool_schema_defs(self.tools) if json_schema_defs: json_schema["$defs"] = json_schema_defs - print("Using tool choice 'required' for guided json decoding.") - print(f"JSON schema: {json_schema}") return json_schema return None diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index f95ce28eab7..dad551115d9 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -337,6 +337,7 @@ async def responses_full_generator( elif request.tool_choice is None or request.tool_choice == "none": pass elif request.tool_choice == "required": + assert content is not None tool_calls = TypeAdapter( list[FunctionDefinition]).validate_json(content) function_calls.extend([ @@ -364,16 +365,17 @@ async def responses_full_generator( "Unknown tool choice: %s. " "Using 'none' as the default tool choice.", request.tool_choice) - output = [ - ResponseFunctionToolCall( - id=f"fc_{random_fc_uuid()}", - call_id=f"call_{random_uuid()}", - type="function_call", - status="completed", - name=tool_call.name, - arguments=tool_call.arguments, - ) for tool_call in function_calls - ] + if function_calls: + output = [ + ResponseFunctionToolCall( + id=f"fc_{random_fc_uuid()}", + call_id=f"call_{random_uuid()}", + type="function_call", + status="completed", + name=tool_call.name, + arguments=tool_call.arguments, + ) for tool_call in function_calls + ] # If no tool call is generated, we still need to return an output. if reasoning_content and output is None: output = ResponseReasoningItem( diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py index 02aeab61363..a781d1bb282 100644 --- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py @@ -8,7 +8,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaMessage, - ExtractedToolCallInformation) + ExtractedToolCallInformation, + ResponsesRequest) from vllm.logger import init_logger from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.utils import import_from_path, is_list_of @@ -39,15 +40,17 @@ def vocab(self) -> dict[str, int]: return self.model_tokenizer.get_vocab() def adjust_request( - self, request: ChatCompletionRequest) -> ChatCompletionRequest: + self, request: Union[ChatCompletionRequest, ResponsesRequest] + ) -> ChatCompletionRequest: """ Static method that used to adjust the request parameters. """ return request def extract_tool_calls( - self, model_output: str, - request: ChatCompletionRequest) -> ExtractedToolCallInformation: + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] + ) -> ExtractedToolCallInformation: """ Static method that should be implemented for extracting tool calls from a complete model-generated string. @@ -66,7 +69,7 @@ def extract_tool_calls_streaming( previous_token_ids: Sequence[int], current_token_ids: Sequence[int], delta_token_ids: Sequence[int], - request: ChatCompletionRequest, + request: Union[ChatCompletionRequest, ResponsesRequest], ) -> Union[DeltaMessage, None]: """ Instance method that should be implemented for extracting tool calls From c5e7d3105892a132969e883af330cfa4bd7f9bec Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Sun, 13 Jul 2025 08:37:48 +0000 Subject: [PATCH 03/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../entrypoints/openai/responses/conftest.py | 7 +- .../openai/responses/test_function_call.py | 145 ++++++++++++++++++ vllm/entrypoints/openai/serving_engine.py | 2 +- vllm/entrypoints/openai/serving_responses.py | 26 +++- .../tool_parsers/abstract_tool_parser.py | 4 +- .../tool_parsers/deepseekv3_tool_parser.py | 10 +- .../granite_20b_fc_tool_parser.py | 13 +- .../tool_parsers/granite_tool_parser.py | 13 +- .../openai/tool_parsers/hermes_tool_parser.py | 10 +- .../tool_parsers/internlm2_tool_parser.py | 10 +- .../openai/tool_parsers/jamba_tool_parser.py | 16 +- .../tool_parsers/kimi_k2_tool_parser.py | 12 +- .../llama4_pythonic_tool_parser.py | 13 +- .../openai/tool_parsers/llama_tool_parser.py | 13 +- .../tool_parsers/minimax_tool_parser.py | 10 +- .../tool_parsers/mistral_tool_parser.py | 11 +- .../tool_parsers/phi4mini_tool_parser.py | 10 +- .../tool_parsers/pythonic_tool_parser.py | 13 +- .../openai/tool_parsers/xlam_tool_parser.py | 13 +- 19 files changed, 254 insertions(+), 97 deletions(-) create mode 100644 tests/v1/entrypoints/openai/responses/test_function_call.py diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/responses/conftest.py index 2dcdda04ecb..471f016227a 100644 --- a/tests/v1/entrypoints/openai/responses/conftest.py +++ b/tests/v1/entrypoints/openai/responses/conftest.py @@ -15,8 +15,13 @@ def default_server_args(): "--max-model-len", "8192", "--enforce-eager", # For faster startup. + "--enable-auto-tool-choice", + "--guided-decoding-backend", + "xgrammar", + "--tool-call-parser", + "hermes", "--reasoning-parser", - "deepseek_r1", + "qwen3", ] diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py new file mode 100644 index 00000000000..afa2b2823c3 --- /dev/null +++ b/tests/v1/entrypoints/openai/responses/test_function_call.py @@ -0,0 +1,145 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import json + +import openai # use the official client for correctness check +import pytest + + +MODEL_NAME = "Qwen/Qwen3-0.6B" + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("tool_choice", ["auto", "required"]) +async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str, + tool_choice: str): + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": + "The city to find the weather for, e.g. 'Vienna'", + "default": "Vienna", + }, + "country": { + "type": + "string", + "description": + "The country that the city is in, e.g. 'Austria'", + }, + "unit": { + "type": "string", + "description": + "The unit to fetch the temperature in", + "enum": ["celsius", "fahrenheit"], + }, + "options": { + "$ref": "#/$defs/WeatherOptions", + "description": + "Optional parameters for weather query", + }, + }, + "required": ["country", "unit"], + "$defs": { + "WeatherOptions": { + "title": "WeatherOptions", + "type": "object", + "additionalProperties": False, + "properties": { + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "default": "celsius", + "description": "Temperature unit", + "title": "Temperature Unit", + }, + "include_forecast": { + "type": "boolean", + "default": False, + "description": + "Whether to include a 24-hour forecast", + "title": "Include Forecast", + }, + "language": { + "type": "string", + "default": "zh-CN", + "description": "Language of the response", + "title": "Language", + "enum": ["zh-CN", "en-US", "ja-JP"], + }, + }, + }, + }, + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_forecast", + "description": "Get the weather forecast for a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": + "The city to get the forecast for, e.g. 'Vienna'", + "default": "Vienna", + }, + "country": { + "type": + "string", + "description": + "The country that the city is in, e.g. 'Austria'", + }, + "days": { + "type": + "integer", + "description": + "Number of days to get the forecast for (1-7)", + }, + "unit": { + "type": "string", + "description": + "The unit to fetch the temperature in", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["country", "days", "unit"], + }, + }, + }, + ] + + prompt = [{ + "role": + "user", + "content": + "Can you tell me what the current weather is in Berlin and the "\ + "forecast for the next 5 days, in fahrenheit?", + },] + response = client.responses.create( + model=model_name, + input=prompt, + tools=tools, + tool_choice=tool_choice, + ) + + assert len(response.output) >= 1 + tool_call = response.output[0] + + assert tool_call.type == "function_call" + assert json.loads(tool_call.arguments) is not None + +@pytest.mark.asyncio +async def test_named_tool_use(client: openai.AsyncOpenAI, sample_json_schema): + pass diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 4530a79f747..8a1a3b80da7 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -898,7 +898,7 @@ async def _preprocess_chat( model_config=model_config, **_chat_template_kwargs, ) - + print(f"Request prompt: {request_prompt}") mm_data = await mm_data_future # tool parsing is done only if a tool_parser has been set and if diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index dad551115d9..a7bc94691e7 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -308,7 +308,8 @@ async def responses_full_generator( assert final_res is not None assert len(final_res.outputs) == 1 final_output = final_res.outputs[0] - + print("-"*70) + print(f"Final output: {final_output}") if self.reasoning_parser: try: reasoning_parser = self.reasoning_parser(tokenizer) @@ -468,7 +469,28 @@ def _construct_input_messages( if isinstance(request.input, str): messages.append({"role": "user", "content": request.input}) else: - messages.extend(request.input) # type: ignore + for item in request.input: + if item.get("type") == "function_call": + messages.append({ + "role": + "assistant", + "tool_calls": [{ + "id": item.get("call_id"), + "function": { + "name": item.get("name"), + "arguments": item.get("arguments", "{}"), + }, + "type": "function", + }] + }) + elif item.get("type") == "function_call_output": + messages.append({ + "role": "tool", + "content": item.get("output", ""), + "tool_call_id": item.get("call_id"), + }) + else: + messages.append(item) # type: ignore return messages async def _run_background_request( diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py index a781d1bb282..a0744152102 100644 --- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py @@ -41,7 +41,7 @@ def vocab(self) -> dict[str, int]: def adjust_request( self, request: Union[ChatCompletionRequest, ResponsesRequest] - ) -> ChatCompletionRequest: + ) -> Union[ChatCompletionRequest, ResponsesRequest]: """ Static method that used to adjust the request parameters. """ @@ -69,7 +69,7 @@ def extract_tool_calls_streaming( previous_token_ids: Sequence[int], current_token_ids: Sequence[int], delta_token_ids: Sequence[int], - request: Union[ChatCompletionRequest, ResponsesRequest], + request: ChatCompletionRequest, ) -> Union[DeltaMessage, None]: """ Instance method that should be implemented for extracting tool calls diff --git a/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py index da4760ad1b6..1822f1d3f06 100644 --- a/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py @@ -7,11 +7,9 @@ import regex as re from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger @@ -71,7 +69,7 @@ def __init__(self, tokenizer: AnyTokenizer): def extract_tool_calls( self, model_output: str, - request: ChatCompletionRequest, + request: Union[ChatCompletionRequest, ResponsesRequest], ) -> ExtractedToolCallInformation: # sanity check; avoid unnecessary processing diff --git a/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py index 5508ba6a394..4490a724aa9 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py @@ -11,11 +11,9 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import (consume_space, @@ -47,8 +45,9 @@ def __init__(self, tokenizer: AnyTokenizer): self.tool_call_regex = re.compile(r"\s*") def extract_tool_calls( - self, model_output: str, - request: ChatCompletionRequest) -> ExtractedToolCallInformation: + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] + ) -> ExtractedToolCallInformation: if self.tool_start_token not in model_output: return ExtractedToolCallInformation(tools_called=False, tool_calls=[], diff --git a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py index fcc5b7edda8..44d79047028 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py @@ -9,11 +9,9 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import (consume_space, @@ -45,8 +43,9 @@ def __init__(self, tokenizer: AnyTokenizer): self.bot_string = "" def extract_tool_calls( - self, model_output: str, - request: ChatCompletionRequest) -> ExtractedToolCallInformation: + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] + ) -> ExtractedToolCallInformation: stripped = model_output.strip()\ .removeprefix(self.bot_token)\ .removeprefix(self.bot_string)\ diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index c7030d34d45..879443bb4ad 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -10,11 +10,9 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger @@ -64,7 +62,7 @@ def __init__(self, tokenizer: AnyTokenizer): def extract_tool_calls( self, model_output: str, - request: ChatCompletionRequest, + request: Union[ChatCompletionRequest, ResponsesRequest], ) -> ExtractedToolCallInformation: # sanity check; avoid unnecessary processing diff --git a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py index 92004de030d..b92fe40e2bc 100644 --- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py @@ -9,11 +9,9 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import ( @@ -183,7 +181,7 @@ def extract_tool_calls_streaming( def extract_tool_calls( self, model_output: str, - request: ChatCompletionRequest, + request: Union[ChatCompletionRequest, ResponsesRequest], ) -> ExtractedToolCallInformation: text = model_output tools = request.tools diff --git a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py index 66b483d8b0f..2cb7696ea64 100644 --- a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py @@ -10,11 +10,9 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager from vllm.entrypoints.openai.tool_parsers.utils import ( extract_intermediate_diff) @@ -64,7 +62,8 @@ def __init__(self, tokenizer: AnyTokenizer): "tokens in the tokenizer!") def adjust_request( - self, request: ChatCompletionRequest) -> ChatCompletionRequest: + self, request: Union[ChatCompletionRequest | ResponsesRequest] + ) -> Union[ChatCompletionRequest | ResponsesRequest]: if request.tools and request.tool_choice != 'none': # do not skip special tokens because jamba use the special # tokens to indicate the start and end of the tool calls @@ -73,8 +72,9 @@ def adjust_request( return request def extract_tool_calls( - self, model_output: str, - request: ChatCompletionRequest) -> ExtractedToolCallInformation: + self, model_output: str, + request: Union[ChatCompletionRequest | ResponsesRequest] + ) -> ExtractedToolCallInformation: # sanity check; avoid unnecessary processing if self.tool_calls_start_token not in model_output: diff --git a/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py index b0df442dd86..77e7c4d8422 100644 --- a/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py @@ -7,11 +7,9 @@ import regex as re -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger @@ -70,7 +68,7 @@ def __init__(self, tokenizer: AnyTokenizer): def extract_tool_calls( self, model_output: str, - request: ChatCompletionRequest, + request: Union[ChatCompletionRequest, ResponsesRequest], ) -> ExtractedToolCallInformation: # sanity check; avoid unnecessary processing @@ -374,4 +372,4 @@ def extract_tool_calls_streaming( except Exception: logger.exception("Error trying to handle streaming tool call.") - return None # do not stream a delta. skip this token ID. \ No newline at end of file + return None # do not stream a delta. skip this token ID. diff --git a/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py index 6bf44a4345a..5b51d6fed8e 100644 --- a/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py @@ -9,11 +9,9 @@ from transformers import PreTrainedTokenizerBase import vllm.envs as envs -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger @@ -55,8 +53,9 @@ def current_tool_index(self, value: int) -> None: self.current_tool_id = value def extract_tool_calls( - self, model_output: str, - request: ChatCompletionRequest) -> ExtractedToolCallInformation: + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] + ) -> ExtractedToolCallInformation: """ Extract the tool calls from a complete model response. """ diff --git a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py index 5698bc70af2..67fb22473b4 100644 --- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py @@ -12,11 +12,9 @@ from transformers import PreTrainedTokenizerBase from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import (find_common_prefix, @@ -54,8 +52,9 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase): self.tool_call_regex = re.compile(r"\[{.*?}\]", re.DOTALL) def extract_tool_calls( - self, model_output: str, - request: ChatCompletionRequest) -> ExtractedToolCallInformation: + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] + ) -> ExtractedToolCallInformation: """ Extract the tool calls from a complete model response. """ diff --git a/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py index 6ba32e38fcd..519865fa781 100644 --- a/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py @@ -10,11 +10,9 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger @@ -83,7 +81,7 @@ def remove_tool_calls_from_think(match): def extract_tool_calls( self, model_output: str, - request: ChatCompletionRequest, + request: Union[ChatCompletionRequest, ResponsesRequest], ) -> ExtractedToolCallInformation: # Preprocess to remove tool calls from thinking tags diff --git a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py index c0691f12290..f634dbb736c 100644 --- a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py @@ -12,11 +12,9 @@ from partial_json_parser.core.options import Allow from pydantic import Field -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import ( @@ -88,7 +86,8 @@ def __init__(self, tokenizer: AnyTokenizer): "the tokenizer!") def adjust_request( - self, request: ChatCompletionRequest) -> ChatCompletionRequest: + self, request: Union[ChatCompletionRequest, ResponsesRequest] + ) -> Union[ChatCompletionRequest, ResponsesRequest]: if not isinstance( self.model_tokenizer, MistralTokenizer ) and request.tools and request.tool_choice != 'none': diff --git a/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py index 5501028cf36..9e6f310c688 100644 --- a/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py @@ -3,7 +3,7 @@ import json from collections.abc import Sequence -from typing import Any, Optional +from typing import Any, Optional, Union import regex as re from transformers import PreTrainedTokenizerBase @@ -12,7 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaMessage, ExtractedToolCallInformation, - FunctionCall, ToolCall) + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger @@ -43,8 +44,9 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase) -> None: self.bot_token: str = "functools" def extract_tool_calls( - self, model_output: str, - request: ChatCompletionRequest) -> ExtractedToolCallInformation: + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] + ) -> ExtractedToolCallInformation: """ Extract the tool calls from a complete model response. """ diff --git a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py index 73329cdf701..0322408ead7 100644 --- a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py @@ -10,11 +10,9 @@ from transformers import PreTrainedTokenizerBase import vllm.envs as envs -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger @@ -58,8 +56,9 @@ def current_tool_index(self, value: int) -> None: self.current_tool_id = value def extract_tool_calls( - self, model_output: str, - request: ChatCompletionRequest) -> ExtractedToolCallInformation: + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] + ) -> ExtractedToolCallInformation: """ Extract the tool calls from a complete model response. """ diff --git a/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py index 321718b1c95..697f4e3fcd8 100644 --- a/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py @@ -8,11 +8,9 @@ import regex as re from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaFunctionCall, DeltaMessage, - DeltaToolCall, - ExtractedToolCallInformation, - FunctionCall, ToolCall) +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, + ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger @@ -115,8 +113,9 @@ def preprocess_model_output( return model_output, None def extract_tool_calls( - self, model_output: str, - request: ChatCompletionRequest) -> ExtractedToolCallInformation: + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] + ) -> ExtractedToolCallInformation: """ Extract tool calls from a complete model output. """ From 4f15211c9a1ecabe10cd4e741b8ff272bb824000 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Mon, 14 Jul 2025 02:53:09 +0000 Subject: [PATCH 04/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_responses.py | 135 ++++++++++--------- 1 file changed, 70 insertions(+), 65 deletions(-) diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index a7bc94691e7..637da61656d 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -308,8 +308,6 @@ async def responses_full_generator( assert final_res is not None assert len(final_res.outputs) == 1 final_output = final_res.outputs[0] - print("-"*70) - print(f"Final output: {final_output}") if self.reasoning_parser: try: reasoning_parser = self.reasoning_parser(tokenizer) @@ -323,85 +321,92 @@ async def responses_full_generator( else: reasoning_content = None content = final_output.text - - outputs = [] - output = None - if self.tool_parser: - function_calls: list[FunctionCall] = [] - if request.tool_choice and \ - isinstance(request.tool_choice, - ToolChoiceFunction): - # Forced Function Call - function_calls.append( - FunctionCall(name=request.tool_choice.name, - arguments=content)) - elif request.tool_choice is None or request.tool_choice == "none": - pass - elif request.tool_choice == "required": - assert content is not None - tool_calls = TypeAdapter( - list[FunctionDefinition]).validate_json(content) - function_calls.extend([ - FunctionCall(name=tool_call.name, - arguments=json.dumps(tool_call.parameters, - ensure_ascii=False)) - for tool_call in tool_calls - ]) - elif request.tool_choice == "auto": - try: - tool_parser = self.tool_parser(tokenizer) - except RuntimeError as e: - logger.exception("Error in tool parser creation.") - return self.create_error_response(str(e)) - tool_call_info = tool_parser.extract_tool_calls( - content if content is not None else "", request=request) - if tool_call_info is not None and tool_call_info.tools_called: - function_calls.extend( - FunctionCall( - name=tool_call.function.name, - arguments=tool_call.function.arguments, - ) for tool_call in tool_call_info.tool_calls) - else: - logger.warning( - "Unknown tool choice: %s. " - "Using 'none' as the default tool choice.", - request.tool_choice) - if function_calls: - output = [ - ResponseFunctionToolCall( - id=f"fc_{random_fc_uuid()}", - call_id=f"call_{random_uuid()}", - type="function_call", - status="completed", - name=tool_call.name, - arguments=tool_call.arguments, - ) for tool_call in function_calls - ] - # If no tool call is generated, we still need to return an output. - if reasoning_content and output is None: - output = ResponseReasoningItem( + reasoning_item = None + message_item = None + if reasoning_content: + reasoning_item = ResponseReasoningItem( text=reasoning_content, status=None, # NOTE: Only the last output item has status. ) - # If no tool call is generated, we still need to return an output. - if content and output is None: + if content: output_text = ResponseOutputText( text=content, annotations=[], # TODO type="output_text", logprobs=None, # TODO ) - output = ResponseOutputMessage( + message_item = ResponseOutputMessage( id=f"msg_{random_uuid()}", content=[output_text], role="assistant", status="completed", type="message", ) - if isinstance(output, list): - outputs.extend(output) + outputs = [] + function_calls: list[FunctionCall] = [] + if (not self.enable_auto_tools or not self.tool_parser): + # Tools are not enabled + if reasoning_item: + outputs.append(reasoning_item) + if message_item: + outputs.append(message_item) + elif request.tool_choice is None or request.tool_choice == "none": + # No tool calls. + if reasoning_item: + outputs.append(reasoning_item) + if message_item: + outputs.append(message_item) + elif request.tool_choice and \ + isinstance(request.tool_choice, + ToolChoiceFunction): + # Forced Function Call + function_calls.append( + FunctionCall(name=request.tool_choice.name, arguments=content)) + elif request.tool_choice == "required": + assert content is not None + tool_calls = TypeAdapter( + list[FunctionDefinition]).validate_json(content) + function_calls.extend([ + FunctionCall(name=tool_call.name, + arguments=json.dumps(tool_call.parameters, + ensure_ascii=False)) + for tool_call in tool_calls + ]) + elif request.tool_choice == "auto": + try: + tool_parser = self.tool_parser(tokenizer) + except RuntimeError as e: + logger.exception("Error in tool parser creation.") + return self.create_error_response(str(e)) + tool_call_info = tool_parser.extract_tool_calls( + content if content is not None else "", request=request) + if tool_call_info is not None and tool_call_info.tools_called: + function_calls.extend( + FunctionCall( + name=tool_call.function.name, + arguments=tool_call.function.arguments, + ) for tool_call in tool_call_info.tool_calls) + else: + # No tool calls. + if reasoning_item: + outputs.append(reasoning_item) + if message_item: + outputs.append(message_item) else: - outputs.append(output) + return self.create_error_response( + f"Invalid tool_choice: {request.tool_choice}") + + if function_calls: + outputs.extend([ + ResponseFunctionToolCall( + id=f"fc_{random_fc_uuid()}", + call_id=f"call_{random_uuid()}", + type="function_call", + status="completed", + name=tool_call.name, + arguments=tool_call.arguments, + ) for tool_call in function_calls + ]) # Calculate usage. assert final_res.prompt_token_ids is not None From 04026985b0f78a33e901000d4a8b0abc0eee7204 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Mon, 14 Jul 2025 03:17:41 +0000 Subject: [PATCH 05/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../openai_responses_client_with_tools.py | 88 +++++++++++++++++++ .../tool_parsers/deepseekv3_tool_parser.py | 9 +- .../granite_20b_fc_tool_parser.py | 9 +- .../tool_parsers/granite_tool_parser.py | 9 +- .../openai/tool_parsers/hermes_tool_parser.py | 9 +- .../tool_parsers/internlm2_tool_parser.py | 14 ++- .../openai/tool_parsers/jamba_tool_parser.py | 19 ++-- .../tool_parsers/kimi_k2_tool_parser.py | 9 +- .../llama4_pythonic_tool_parser.py | 9 +- .../openai/tool_parsers/llama_tool_parser.py | 9 +- .../tool_parsers/minimax_tool_parser.py | 9 +- .../tool_parsers/mistral_tool_parser.py | 16 ++-- .../tool_parsers/pythonic_tool_parser.py | 9 +- .../openai/tool_parsers/xlam_tool_parser.py | 9 +- 14 files changed, 180 insertions(+), 47 deletions(-) create mode 100644 examples/online_serving/openai_responses_client_with_tools.py diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py new file mode 100644 index 00000000000..35f34e7c5bb --- /dev/null +++ b/examples/online_serving/openai_responses_client_with_tools.py @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Set up this example by starting a vLLM OpenAI-compatible server with tool call +options enabled. +Reasoning models can be used through the Responses API as seen here +https://platform.openai.com/docs/api-reference/responses + +For example: + +vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3\ + --guided-decoding-backend xgrammar \ + --enable-auto-tool-choice --tool-call-parser hermes +""" + +import json + +from openai import OpenAI + + +def get_weather(latitude: float, longitude: float) -> str: + """ + Mock function to simulate getting weather data. + In a real application, this would call an external weather API. + """ + return f"Current temperature at ({latitude}, {longitude}) is 20°C." + + +tools = [{ + "type": "function", + "name": "get_weather", + "description": + "Get current temperature for provided coordinates in celsius.", + "parameters": { + "type": "object", + "properties": { + "latitude": { + "type": "number" + }, + "longitude": { + "type": "number" + } + }, + "required": ["latitude", "longitude"], + "additionalProperties": False + }, + "strict": True +}] + +input_messages = [{ + "role": "user", + "content": "What's the weather like in Paris today?" +}] + + +def main(): + base_url = "http://0.0.0.0:8000/v1" + model = "Qwen/Qwen3-1.7B" + client = OpenAI(base_url=base_url, + api_key="empty") + response = client.responses.create( + model=model, + input=input_messages, + tools=tools, + tool_choice="required" +) + tool_call = response.output[0] + args = json.loads(tool_call.arguments) + + result = get_weather(args["latitude"], args["longitude"]) + + + input_messages.append(tool_call) # append model's function call message + input_messages.append({ # append result message + "type": "function_call_output", + "call_id": tool_call.call_id, + "output": str(result) + }) + response_2 = client.responses.create( + model=model, + input=input_messages, + tools=tools, +) + print(response_2.output_text) + + +if __name__ == "__main__": + main() diff --git a/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py index 1822f1d3f06..8819fb6acaf 100644 --- a/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py @@ -7,9 +7,12 @@ import regex as re from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py index 4490a724aa9..563596ab803 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py @@ -11,9 +11,12 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import (consume_space, diff --git a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py index 44d79047028..69bd0c84a60 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py @@ -9,9 +9,12 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import (consume_space, diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index 879443bb4ad..03e0e4eabd8 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -10,9 +10,12 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py index b92fe40e2bc..38e26e256cb 100644 --- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py @@ -9,9 +9,12 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import ( @@ -30,7 +33,10 @@ def __init__(self, tokenizer: AnyTokenizer): self.position = 0 def adjust_request( - self, request: ChatCompletionRequest) -> ChatCompletionRequest: + self, request: Union[ChatCompletionRequest, ResponsesRequest] + ) -> Union[ChatCompletionRequest, ResponsesRequest]: + if not isinstance(request, ChatCompletionRequest): + return request if request.tools and request.tool_choice != 'none': # do not skip special tokens because internlm use the special # tokens to indicated the start and end of the tool calls diff --git a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py index 2cb7696ea64..4cb089a2044 100644 --- a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py @@ -10,9 +10,12 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager from vllm.entrypoints.openai.tool_parsers.utils import ( extract_intermediate_diff) @@ -62,8 +65,10 @@ def __init__(self, tokenizer: AnyTokenizer): "tokens in the tokenizer!") def adjust_request( - self, request: Union[ChatCompletionRequest | ResponsesRequest] - ) -> Union[ChatCompletionRequest | ResponsesRequest]: + self, request: Union[ChatCompletionRequest, ResponsesRequest] + ) -> Union[ChatCompletionRequest, ResponsesRequest]: + if not isinstance(request, ChatCompletionRequest): + return request if request.tools and request.tool_choice != 'none': # do not skip special tokens because jamba use the special # tokens to indicate the start and end of the tool calls @@ -72,8 +77,8 @@ def adjust_request( return request def extract_tool_calls( - self, model_output: str, - request: Union[ChatCompletionRequest | ResponsesRequest] + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] ) -> ExtractedToolCallInformation: # sanity check; avoid unnecessary processing diff --git a/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py index 77e7c4d8422..2082941fec6 100644 --- a/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py @@ -7,9 +7,12 @@ import regex as re -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py index 5b51d6fed8e..3e7a5496046 100644 --- a/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py @@ -9,9 +9,12 @@ from transformers import PreTrainedTokenizerBase import vllm.envs as envs -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py index 67fb22473b4..a71ecee5f3d 100644 --- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py @@ -12,9 +12,12 @@ from transformers import PreTrainedTokenizerBase from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import (find_common_prefix, diff --git a/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py index 519865fa781..dfa3defb074 100644 --- a/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py @@ -10,9 +10,12 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py index f634dbb736c..53c24c03b93 100644 --- a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py @@ -12,9 +12,12 @@ from partial_json_parser.core.options import Allow from pydantic import Field -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import ( @@ -88,6 +91,8 @@ def __init__(self, tokenizer: AnyTokenizer): def adjust_request( self, request: Union[ChatCompletionRequest, ResponsesRequest] ) -> Union[ChatCompletionRequest, ResponsesRequest]: + if not isinstance(request, ChatCompletionRequest): + return request if not isinstance( self.model_tokenizer, MistralTokenizer ) and request.tools and request.tool_choice != 'none': @@ -100,9 +105,8 @@ def adjust_request( return request def extract_tool_calls( - self, - model_output: str, - request: ChatCompletionRequest, + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] ) -> ExtractedToolCallInformation: """ Extract the tool calls from a complete model response. Requires diff --git a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py index 0322408ead7..351774ca1f4 100644 --- a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py @@ -10,9 +10,12 @@ from transformers import PreTrainedTokenizerBase import vllm.envs as envs -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py index 697f4e3fcd8..166b39603c0 100644 --- a/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py @@ -8,9 +8,12 @@ import regex as re from vllm.entrypoints.chat_utils import random_tool_call_id -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, - ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ResponsesRequest, + ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger From 9cfd8e8f1db8fdf1b5d241b223051ad39c33ee19 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Mon, 14 Jul 2025 03:53:26 +0000 Subject: [PATCH 06/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../openai_responses_client_with_tools.py | 43 ++- .../entrypoints/openai/responses/conftest.py | 2 +- .../openai/responses/test_function_call.py | 259 +++++++++++------- vllm/entrypoints/openai/serving_engine.py | 1 - 4 files changed, 177 insertions(+), 128 deletions(-) diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py index 35f34e7c5bb..13c789c9d79 100644 --- a/examples/online_serving/openai_responses_client_with_tools.py +++ b/examples/online_serving/openai_responses_client_with_tools.py @@ -26,7 +26,8 @@ def get_weather(latitude: float, longitude: float) -> str: return f"Current temperature at ({latitude}, {longitude}) is 20°C." -tools = [{ +tools = [ + { "type": "function", "name": "get_weather", "description": @@ -45,25 +46,21 @@ def get_weather(latitude: float, longitude: float) -> str: "additionalProperties": False }, "strict": True -}] + } +] -input_messages = [{ - "role": "user", - "content": "What's the weather like in Paris today?" -}] +input_messages = [ + {"role": "user", "content": "What's the weather like in Paris today?"} +] def main(): base_url = "http://0.0.0.0:8000/v1" model = "Qwen/Qwen3-1.7B" - client = OpenAI(base_url=base_url, - api_key="empty") + client = OpenAI(base_url=base_url, api_key="empty") response = client.responses.create( - model=model, - input=input_messages, - tools=tools, - tool_choice="required" -) + model=model, input=input_messages, tools=tools, tool_choice="required" + ) tool_call = response.output[0] args = json.loads(tool_call.arguments) @@ -71,16 +68,18 @@ def main(): input_messages.append(tool_call) # append model's function call message - input_messages.append({ # append result message - "type": "function_call_output", - "call_id": tool_call.call_id, - "output": str(result) - }) + input_messages.append( + { # append result message + "type": "function_call_output", + "call_id": tool_call.call_id, + "output": str(result), + } + ) response_2 = client.responses.create( - model=model, - input=input_messages, - tools=tools, -) + model=model, + input=input_messages, + tools=tools, + ) print(response_2.output_text) diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/responses/conftest.py index 471f016227a..20d71c13052 100644 --- a/tests/v1/entrypoints/openai/responses/conftest.py +++ b/tests/v1/entrypoints/openai/responses/conftest.py @@ -6,7 +6,7 @@ from tests.utils import RemoteOpenAIServer # Use a small reasoning model to test the responses API. -MODEL_NAME = "Qwen/Qwen3-0.6B" +MODEL_NAME = "Qwen/Qwen3-1.7B" @pytest.fixture(scope="module") diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py index afa2b2823c3..4f88a49ba44 100644 --- a/tests/v1/entrypoints/openai/responses/test_function_call.py +++ b/tests/v1/entrypoints/openai/responses/test_function_call.py @@ -6,140 +6,191 @@ import openai # use the official client for correctness check import pytest - -MODEL_NAME = "Qwen/Qwen3-0.6B" - -@pytest.mark.asyncio -@pytest.mark.parametrize("model_name", [MODEL_NAME]) -@pytest.mark.parametrize("tool_choice", ["auto", "required"]) -async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str, - tool_choice: str): - tools = [ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { +MODEL_NAME = "Qwen/Qwen3-1.7B" +tools = [ + { + "type": "function", + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": + "The city to find the weather for, e.g. 'Vienna'", + "default": "Vienna", + }, + "country": { + "type": + "string", + "description": + "The country that the city is in, e.g. 'Austria'", + }, + "unit": { + "type": "string", + "description": "The unit to fetch the temperature in", + "enum": ["celsius", "fahrenheit"], + }, + "options": { + "$ref": "#/$defs/WeatherOptions", + "description": "Optional parameters for weather query", + }, + }, + "required": ["country", "unit"], + "$defs": { + "WeatherOptions": { + "title": "WeatherOptions", "type": "object", + "additionalProperties": False, "properties": { - "city": { - "type": "string", - "description": - "The city to find the weather for, e.g. 'Vienna'", - "default": "Vienna", - }, - "country": { - "type": - "string", - "description": - "The country that the city is in, e.g. 'Austria'", - }, "unit": { "type": "string", - "description": - "The unit to fetch the temperature in", "enum": ["celsius", "fahrenheit"], + "default": "celsius", + "description": "Temperature unit", + "title": "Temperature Unit", }, - "options": { - "$ref": "#/$defs/WeatherOptions", + "include_forecast": { + "type": "boolean", + "default": False, "description": - "Optional parameters for weather query", + "Whether to include a 24-hour forecast", + "title": "Include Forecast", }, - }, - "required": ["country", "unit"], - "$defs": { - "WeatherOptions": { - "title": "WeatherOptions", - "type": "object", - "additionalProperties": False, - "properties": { - "unit": { - "type": "string", - "enum": ["celsius", "fahrenheit"], - "default": "celsius", - "description": "Temperature unit", - "title": "Temperature Unit", - }, - "include_forecast": { - "type": "boolean", - "default": False, - "description": - "Whether to include a 24-hour forecast", - "title": "Include Forecast", - }, - "language": { - "type": "string", - "default": "zh-CN", - "description": "Language of the response", - "title": "Language", - "enum": ["zh-CN", "en-US", "ja-JP"], - }, - }, + "language": { + "type": "string", + "default": "zh-CN", + "description": "Language of the response", + "title": "Language", + "enum": ["zh-CN", "en-US", "ja-JP"], }, }, }, }, }, - { - "type": "function", - "function": { - "name": "get_forecast", - "description": "Get the weather forecast for a given location", - "parameters": { - "type": "object", - "properties": { - "city": { - "type": "string", - "description": - "The city to get the forecast for, e.g. 'Vienna'", - "default": "Vienna", - }, - "country": { - "type": - "string", - "description": - "The country that the city is in, e.g. 'Austria'", - }, - "days": { - "type": - "integer", - "description": - "Number of days to get the forecast for (1-7)", - }, - "unit": { - "type": "string", - "description": - "The unit to fetch the temperature in", - "enum": ["celsius", "fahrenheit"], - }, - }, - "required": ["country", "days", "unit"], + }, + { + "type": "function", + "name": "get_forecast", + "description": "Get the weather forecast for a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": + "The city to get the forecast for, e.g. 'Vienna'", + "default": "Vienna", + }, + "country": { + "type": + "string", + "description": + "The country that the city is in, e.g. 'Austria'", + }, + "days": { + "type": "integer", + "description": + "Number of days to get the forecast for (1-7)", + }, + "unit": { + "type": "string", + "description": "The unit to fetch the temperature in", + "enum": ["celsius", "fahrenheit"], }, }, + "required": ["country", "days", "unit"], }, - ] + }, +] + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("tool_choice", ["auto", "required"]) +async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str, + tool_choice: str): prompt = [{ - "role": - "user", + "role": "user", "content": "Can you tell me what the current weather is in Berlin and the "\ "forecast for the next 5 days, in fahrenheit?", },] - response = client.responses.create( + response = await client.responses.create( model=model_name, input=prompt, tools=tools, tool_choice=tool_choice, ) - + assert len(response.output) >= 1 tool_call = response.output[0] - + assert tool_call.type == "function_call" assert json.loads(tool_call.arguments) is not None + @pytest.mark.asyncio -async def test_named_tool_use(client: openai.AsyncOpenAI, sample_json_schema): - pass +async def test_named_tool_use(client: openai.AsyncOpenAI): + + def get_weather(latitude: float, longitude: float) -> str: + """ + Mock function to simulate getting weather data. + In a real application, this would call an external weather API. + """ + return f"Current temperature at ({latitude}, {longitude}) is 20°C." + + tools = [{ + "type": "function", + "name": "get_weather", + "description": + "Get current temperature for provided coordinates in celsius.", + "parameters": { + "type": "object", + "properties": { + "latitude": { + "type": "number" + }, + "longitude": { + "type": "number" + } + }, + "required": ["latitude", "longitude"], + "additionalProperties": False + }, + "strict": True + }] + + input_messages = [{ + "role": "user", + "content": "What's the weather like in Paris today?" + }] + + response = await client.responses.create(model=MODEL_NAME, + input=input_messages, + tools=tools, + tool_choice={ + "type": "function", + "name": "get_weather" + }) + assert len(response.output) == 1 + tool_call = response.output[0] + assert tool_call.type == "function_call" + assert tool_call.name == "get_weather" + args = json.loads(tool_call.arguments) + assert args["latitude"] is not None + assert args["longitude"] is not None + # call the tool + result = get_weather(args["latitude"], args["longitude"]) + input_messages.append(tool_call) # append model's function call message + input_messages.append({ # append result message + "type": "function_call_output", + "call_id": tool_call.call_id, + "output": str(result) + }) + # create a new response with the tool call result + response_2 = await client.responses.create(model=MODEL_NAME, + input=input_messages) + # check the output + assert len(response_2.output_text) > 0 diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 8a1a3b80da7..9e643aa3252 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -898,7 +898,6 @@ async def _preprocess_chat( model_config=model_config, **_chat_template_kwargs, ) - print(f"Request prompt: {request_prompt}") mm_data = await mm_data_future # tool parsing is done only if a tool_parser has been set and if From d0e081abb4ece2095dae6317cba1828540807d5f Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Mon, 14 Jul 2025 05:25:45 +0000 Subject: [PATCH 07/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- .../openai_responses_client_with_tools.py | 44 ++++++++----------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py index 13c789c9d79..ec3dd3866f8 100644 --- a/examples/online_serving/openai_responses_client_with_tools.py +++ b/examples/online_serving/openai_responses_client_with_tools.py @@ -27,26 +27,21 @@ def get_weather(latitude: float, longitude: float) -> str: tools = [ - { - "type": "function", - "name": "get_weather", - "description": - "Get current temperature for provided coordinates in celsius.", - "parameters": { - "type": "object", - "properties": { - "latitude": { - "type": "number" + { + "type": "function", + "name": "get_weather", + "description": "Get current temperature for provided coordinates in celsius.", + "parameters": { + "type": "object", + "properties": { + "latitude": {"type": "number"}, + "longitude": {"type": "number"}, }, - "longitude": { - "type": "number" - } + "required": ["latitude", "longitude"], + "additionalProperties": False, }, - "required": ["latitude", "longitude"], - "additionalProperties": False - }, - "strict": True - } + "strict": True, + } ] input_messages = [ @@ -59,14 +54,13 @@ def main(): model = "Qwen/Qwen3-1.7B" client = OpenAI(base_url=base_url, api_key="empty") response = client.responses.create( - model=model, input=input_messages, tools=tools, tool_choice="required" + model=model, input=input_messages, tools=tools, tool_choice="required" ) tool_call = response.output[0] args = json.loads(tool_call.arguments) - + result = get_weather(args["latitude"], args["longitude"]) - - + input_messages.append(tool_call) # append model's function call message input_messages.append( { # append result message @@ -76,9 +70,9 @@ def main(): } ) response_2 = client.responses.create( - model=model, - input=input_messages, - tools=tools, + model=model, + input=input_messages, + tools=tools, ) print(response_2.output_text) From 0a6b8d724d03cd7612252ad56f4a10d28a729f3e Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Mon, 14 Jul 2025 05:50:50 +0000 Subject: [PATCH 08/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- examples/online_serving/openai_responses_client_with_tools.py | 2 +- vllm/entrypoints/openai/protocol.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/online_serving/openai_responses_client_with_tools.py b/examples/online_serving/openai_responses_client_with_tools.py index ec3dd3866f8..95072c5bddf 100644 --- a/examples/online_serving/openai_responses_client_with_tools.py +++ b/examples/online_serving/openai_responses_client_with_tools.py @@ -8,7 +8,7 @@ For example: -vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3\ +vllm serve Qwen/Qwen3-1.7B --reasoning-parser qwen3 \ --guided-decoding-backend xgrammar \ --enable-auto-tool-choice --tool-call-parser hermes """ diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 3ed59ea6400..2667b9da36e 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -377,8 +377,6 @@ def _get_guided_json_from_tool( raise ValueError( f"Tool '{tool_name}' has not been passed in `tools`.") tool = tools[tool_name] - print(f"Using tool '{tool_name}' for guided json decoding.") - print(f"Tool parameters: {tool.parameters}") return tool.parameters if self.tool_choice == "required": @@ -448,7 +446,7 @@ def _get_guided_decoding(self) -> Optional[GuidedDecodingParams]: elif response_format.type == "json_object": raise NotImplementedError("json_object is not supported") # Function call - elif self.tool_choice != "none" or self.tools is not None: + elif not (self.tool_choice == "none" or self.tools is None): guided_decoding = GuidedDecodingParams.from_optional( json=self._get_guided_json_from_tool()) return guided_decoding From 230295ba68832f2b246aa4c2911a9cf270216207 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Mon, 14 Jul 2025 06:28:05 +0000 Subject: [PATCH 09/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py | 3 +++ .../openai/tool_parsers/granite_20b_fc_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py | 3 +++ .../openai/tool_parsers/llama4_pythonic_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py | 3 +++ vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py | 3 +++ 14 files changed, 42 insertions(+) diff --git a/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py index 8819fb6acaf..8c1850ef407 100644 --- a/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py @@ -7,12 +7,15 @@ import regex as re from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py index 563596ab803..85dc3940223 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py @@ -11,12 +11,15 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import (consume_space, diff --git a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py index 69bd0c84a60..887401031c5 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py @@ -9,12 +9,15 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import (consume_space, diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index 03e0e4eabd8..b3e23fa89fd 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -10,12 +10,15 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py index 38e26e256cb..701c7c78bf8 100644 --- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py @@ -9,12 +9,15 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import ( diff --git a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py index 4cb089a2044..9fd67445b79 100644 --- a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py @@ -10,12 +10,15 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager from vllm.entrypoints.openai.tool_parsers.utils import ( extract_intermediate_diff) diff --git a/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py index 2082941fec6..05bf42e7013 100644 --- a/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py @@ -7,12 +7,15 @@ import regex as re +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py index 3e7a5496046..e0db7090081 100644 --- a/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py @@ -9,12 +9,15 @@ from transformers import PreTrainedTokenizerBase import vllm.envs as envs +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py index a71ecee5f3d..115424c5b4f 100644 --- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py @@ -12,12 +12,15 @@ from transformers import PreTrainedTokenizerBase from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import (find_common_prefix, diff --git a/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py index dfa3defb074..0a5ca0f4a89 100644 --- a/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py @@ -10,12 +10,15 @@ from partial_json_parser.core.options import Allow from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py index 53c24c03b93..706a352951d 100644 --- a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py @@ -12,12 +12,15 @@ from partial_json_parser.core.options import Allow from pydantic import Field +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.entrypoints.openai.tool_parsers.utils import ( diff --git a/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py index 9e6f310c688..6159c89f43a 100644 --- a/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py @@ -9,11 +9,14 @@ from transformers import PreTrainedTokenizerBase from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaMessage, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py index 351774ca1f4..041bd6efcc9 100644 --- a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py @@ -10,12 +10,15 @@ from transformers import PreTrainedTokenizerBase import vllm.envs as envs +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger diff --git a/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py index 166b39603c0..a2b2d456ed8 100644 --- a/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py @@ -8,12 +8,15 @@ import regex as re from vllm.entrypoints.chat_utils import random_tool_call_id +# yapf conflicts with isort for this block +# yapf: disable from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall, ExtractedToolCallInformation, FunctionCall, ResponsesRequest, ToolCall) +# yapf: enable from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) from vllm.logger import init_logger From 5a9392c1151c1720629498f6bbf1d4ea1247a588 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Mon, 14 Jul 2025 06:38:02 +0000 Subject: [PATCH 10/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- tests/v1/entrypoints/openai/responses/conftest.py | 2 +- .../entrypoints/openai/responses/test_function_call.py | 2 +- vllm/entrypoints/openai/serving_responses.py | 9 +++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/v1/entrypoints/openai/responses/conftest.py b/tests/v1/entrypoints/openai/responses/conftest.py index 20d71c13052..471f016227a 100644 --- a/tests/v1/entrypoints/openai/responses/conftest.py +++ b/tests/v1/entrypoints/openai/responses/conftest.py @@ -6,7 +6,7 @@ from tests.utils import RemoteOpenAIServer # Use a small reasoning model to test the responses API. -MODEL_NAME = "Qwen/Qwen3-1.7B" +MODEL_NAME = "Qwen/Qwen3-0.6B" @pytest.fixture(scope="module") diff --git a/tests/v1/entrypoints/openai/responses/test_function_call.py b/tests/v1/entrypoints/openai/responses/test_function_call.py index 4f88a49ba44..9cea8c56d48 100644 --- a/tests/v1/entrypoints/openai/responses/test_function_call.py +++ b/tests/v1/entrypoints/openai/responses/test_function_call.py @@ -6,7 +6,7 @@ import openai # use the official client for correctness check import pytest -MODEL_NAME = "Qwen/Qwen3-1.7B" +MODEL_NAME = "Qwen/Qwen3-0.6B" tools = [ { "type": "function", diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 637da61656d..f46a66efd4a 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -350,7 +350,9 @@ async def responses_full_generator( outputs.append(reasoning_item) if message_item: outputs.append(message_item) - elif request.tool_choice is None or request.tool_choice == "none": + elif (request.tool_choice == "none" and \ + not self.expand_tools_even_if_tool_choice_none) or \ + request.tool_choice is None: # No tool calls. if reasoning_item: outputs.append(reasoning_item) @@ -372,7 +374,7 @@ async def responses_full_generator( ensure_ascii=False)) for tool_call in tool_calls ]) - elif request.tool_choice == "auto": + elif request.tool_choice == "auto" or request.tool_choice == "none": try: tool_parser = self.tool_parser(tokenizer) except RuntimeError as e: @@ -381,6 +383,7 @@ async def responses_full_generator( tool_call_info = tool_parser.extract_tool_calls( content if content is not None else "", request=request) if tool_call_info is not None and tool_call_info.tools_called: + # extract_tool_calls() returns a list of tool calls. function_calls.extend( FunctionCall( name=tool_call.function.name, @@ -476,6 +479,7 @@ def _construct_input_messages( else: for item in request.input: if item.get("type") == "function_call": + # Append the function call as a tool call. messages.append({ "role": "assistant", @@ -489,6 +493,7 @@ def _construct_input_messages( }] }) elif item.get("type") == "function_call_output": + # Append the function call output as a tool message. messages.append({ "role": "tool", "content": item.get("output", ""), From 42d98f9efa28f6253ecbf7ab61852b712eab4e77 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 15 Jul 2025 07:00:08 +0000 Subject: [PATCH 11/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_chat.py | 46 +++---------- vllm/entrypoints/openai/serving_engine.py | 48 +++++++++++++- vllm/entrypoints/openai/serving_responses.py | 70 +++++++++----------- 3 files changed, 88 insertions(+), 76 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index b902166a25b..f2786305235 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -6,7 +6,7 @@ import time from collections.abc import AsyncGenerator, AsyncIterator from collections.abc import Sequence as GenericSequence -from typing import Callable, Final, Optional, Union +from typing import Final, Optional, Union import jinja2 import partial_json_parser @@ -31,13 +31,12 @@ from vllm.entrypoints.openai.serving_engine import (OpenAIServing, clamp_prompt_logprobs) from vllm.entrypoints.openai.serving_models import OpenAIServingModels -from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager +from vllm.entrypoints.openai.tool_parsers import ToolParser from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import ( MistralToolCall) from vllm.entrypoints.utils import get_max_tokens from vllm.logger import init_logger from vllm.outputs import CompletionOutput, RequestOutput -from vllm.reasoning import ReasoningParser, ReasoningParserManager from vllm.sampling_params import BeamSearchParams, SamplingParams from vllm.sequence import Logprob from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer @@ -78,39 +77,16 @@ def __init__( self.chat_template = chat_template self.chat_template_content_format: Final = chat_template_content_format - # set up tool use self.enable_auto_tools: bool = enable_auto_tools - if self.enable_auto_tools: - logger.info( - "\"auto\" tool choice has been enabled please note that while" - " the parallel_tool_calls client option is preset for " - "compatibility reasons, it will be ignored.") - - self.reasoning_parser: Optional[Callable[[AnyTokenizer], - ReasoningParser]] = None - if reasoning_parser: - try: - self.reasoning_parser = ( - ReasoningParserManager.get_reasoning_parser( - reasoning_parser)) - assert self.reasoning_parser is not None - except Exception as e: - raise TypeError( - f"{reasoning_parser=} has not been registered") from e - self.tool_parser: Optional[Callable[[AnyTokenizer], ToolParser]] = None - if self.enable_auto_tools: - try: - if (tool_parser == "pythonic" and - model_config.model.startswith("meta-llama/Llama-3.2")): - logger.warning( - "Llama3.2 models may struggle to emit valid pythonic" - " tool calls") - self.tool_parser = ToolParserManager.get_tool_parser( - tool_parser) - except Exception as e: - raise TypeError("Error: --enable-auto-tool-choice requires " - f"tool_parser:'{tool_parser}' which has not " - "been registered") from e + # set up reasoning parser + self.reasoning_parser = self._get_reasoning_parser( + reasoning_parser_name=reasoning_parser) + # set up tool use + self.tool_parser = self._get_tool_parser( + tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools) + + self.expand_tools_even_if_tool_choice_none = ( + expand_tools_even_if_tool_choice_none) self.enable_prompt_tokens_details = enable_prompt_tokens_details self.enable_force_include_usage = enable_force_include_usage diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 9e643aa3252..b9a07a542d8 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -57,7 +57,7 @@ TranscriptionResponse, TranslationRequest) from vllm.entrypoints.openai.serving_models import OpenAIServingModels -from vllm.entrypoints.openai.tool_parsers import ToolParser +from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager # yapf: enable from vllm.inputs.data import EmbedsPrompt as EngineEmbedsPrompt from vllm.inputs.data import TokensPrompt as EngineTokensPrompt @@ -69,6 +69,7 @@ from vllm.outputs import PoolingRequestOutput, RequestOutput from vllm.pooling_params import PoolingParams from vllm.prompt_adapter.request import PromptAdapterRequest +from vllm.reasoning import ReasoningParser, ReasoningParserManager from vllm.sampling_params import BeamSearchParams, SamplingParams from vllm.sequence import Logprob, PromptLogprobs from vllm.tracing import (contains_trace_headers, extract_trace_headers, @@ -1056,6 +1057,51 @@ def _get_model_name(self, return self.models.base_model_paths[0].name return model_name + def _get_tool_parser( + self, + tool_parser_name: str, + enable_auto_tools: bool = False + ) -> Optional[Callable[[AnyTokenizer], ToolParser]]: + """Get the tool parser based on the name.""" + parser = None + if not enable_auto_tools: + return parser + logger.info( + "\"auto\" tool choice has been enabled please note that while" + " the parallel_tool_calls client option is preset for " + "compatibility reasons, it will be ignored.") + """Get the tool parser based on the name.""" + try: + if (tool_parser_name == "pythonic" + and self.model_config.model.startswith( + "meta-llama/Llama-3.2")): + logger.warning( + "Llama3.2 models may struggle to emit valid pythonic" + " tool calls") + parser = ToolParserManager.get_tool_parser(tool_parser_name) + except Exception as e: + raise TypeError("Error: --enable-auto-tool-choice requires " + f"tool_parser:'{tool_parser_name}' which has not " + "been registered") from e + return parser + + def _get_reasoning_parser( + self, + reasoning_parser_name: str, + ) -> Optional[Callable[[AnyTokenizer], ReasoningParser]]: + """Get the reasoning parser based on the name.""" + parser = None + if not reasoning_parser_name: + return None + try: + parser = (ReasoningParserManager.get_reasoning_parser( + reasoning_parser_name)) + assert parser is not None + except Exception as e: + raise TypeError( + f"{reasoning_parser_name=} has not been registered") from e + return parser + def clamp_prompt_logprobs( prompt_logprobs: Union[PromptLogprobs, diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index f46a66efd4a..e8a00883fb3 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -6,10 +6,15 @@ import time from collections.abc import AsyncGenerator, AsyncIterator from http import HTTPStatus -from typing import Callable, Final, Optional, Union +from typing import Final, Optional, Union import jinja2 from fastapi import Request +from openai.types.chat import (ChatCompletionAssistantMessageParam, + ChatCompletionMessageToolCallParam, + ChatCompletionToolMessageParam) +from openai.types.chat.chat_completion_message_tool_call_param import ( + Function as FunctionCallTool) from openai.types.responses import (ResponseFunctionToolCall, ResponseOutputMessage, ResponseOutputText, ToolChoiceFunction) @@ -32,10 +37,8 @@ # yapf: enable from vllm.entrypoints.openai.serving_engine import OpenAIServing from vllm.entrypoints.openai.serving_models import OpenAIServingModels -from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager from vllm.logger import init_logger from vllm.outputs import RequestOutput -from vllm.reasoning import ReasoningParser, ReasoningParserManager from vllm.sampling_params import SamplingParams from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.utils import random_fc_uuid, random_uuid @@ -72,30 +75,14 @@ def __init__( self.enable_auto_tools = enable_auto_tools self.expand_tools_even_if_tool_choice_none = ( expand_tools_even_if_tool_choice_none) - self.tool_parser: Optional[Callable[[AnyTokenizer], ToolParser]] = None - if self.enable_auto_tools: - try: - self.tool_parser = ToolParserManager.get_tool_parser( - tool_parser) - except Exception as e: - raise TypeError("Error: --enable-auto-tool-choice requires " - f"tool_parser:'{tool_parser}' which has not " - "been registered") from e self.chat_template = chat_template self.chat_template_content_format: Final = chat_template_content_format - self.reasoning_parser: Optional[Callable[[AnyTokenizer], - ReasoningParser]] = None - if reasoning_parser: - try: - self.reasoning_parser = ( - ReasoningParserManager.get_reasoning_parser( - reasoning_parser)) - assert self.reasoning_parser is not None - except Exception as e: - raise TypeError( - f"{reasoning_parser=} has not been registered") from e + self.reasoning_parser = self._get_reasoning_parser( + reasoning_parser_name=reasoning_parser) + self.tool_parser = self._get_tool_parser( + tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools) self.enable_prompt_tokens_details = enable_prompt_tokens_details self.enable_force_include_usage = enable_force_include_usage self.default_sampling_params = ( @@ -480,25 +467,28 @@ def _construct_input_messages( for item in request.input: if item.get("type") == "function_call": # Append the function call as a tool call. - messages.append({ - "role": - "assistant", - "tool_calls": [{ - "id": item.get("call_id"), - "function": { - "name": item.get("name"), - "arguments": item.get("arguments", "{}"), - }, - "type": "function", - }] - }) + messages.append( + ChatCompletionAssistantMessageParam( + role="assistant", + tool_calls=[ + ChatCompletionMessageToolCallParam( + id=item.get("call_id"), + function=FunctionCallTool( + name=item.get("name"), + arguments=item.get("arguments", "{}"), + ), + type="function", + ) + ], + )) elif item.get("type") == "function_call_output": # Append the function call output as a tool message. - messages.append({ - "role": "tool", - "content": item.get("output", ""), - "tool_call_id": item.get("call_id"), - }) + messages.append( + ChatCompletionToolMessageParam( + role="tool", + content=item.get("output", ""), + tool_call_id=item.get("call_id"), + )) else: messages.append(item) # type: ignore return messages From a981a72c160f3feeca854ec9d819fa809ca730ff Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 15 Jul 2025 07:06:00 +0000 Subject: [PATCH 12/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index b9a07a542d8..12b8dd4f8a8 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1059,12 +1059,12 @@ def _get_model_name(self, def _get_tool_parser( self, - tool_parser_name: str, + tool_parser_name: Optional[str] = None, enable_auto_tools: bool = False ) -> Optional[Callable[[AnyTokenizer], ToolParser]]: """Get the tool parser based on the name.""" parser = None - if not enable_auto_tools: + if not enable_auto_tools or tool_parser_name is None: return parser logger.info( "\"auto\" tool choice has been enabled please note that while" From 9d9cd2a40980bc7d3951ed60c0165813ba934fcf Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 15 Jul 2025 16:51:49 +0000 Subject: [PATCH 13/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/serving_chat.py | 3 --- vllm/entrypoints/openai/serving_responses.py | 22 ++------------------ 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index f2786305235..70a6b3b8982 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -85,9 +85,6 @@ def __init__( self.tool_parser = self._get_tool_parser( tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools) - self.expand_tools_even_if_tool_choice_none = ( - expand_tools_even_if_tool_choice_none) - self.enable_prompt_tokens_details = enable_prompt_tokens_details self.enable_force_include_usage = enable_force_include_usage self.default_sampling_params = ( diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index e8a00883fb3..ec2ab9db0ca 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -73,8 +73,6 @@ def __init__( enable_force_include_usage=enable_force_include_usage, ) self.enable_auto_tools = enable_auto_tools - self.expand_tools_even_if_tool_choice_none = ( - expand_tools_even_if_tool_choice_none) self.chat_template = chat_template self.chat_template_content_format: Final = chat_template_content_format @@ -145,20 +143,6 @@ async def create_responses( tokenizer = await self.engine_client.get_tokenizer(lora_request) if request.tools is None: tool_dicts = None - elif (request.tool_choice == "none" - and not self.expand_tools_even_if_tool_choice_none): - if len(request.tools) > 0: - logger.warning_once( - "Tools are specified but tool_choice is set to 'none' " - "and --expand-tools-even-if-tool-choice-none is not " - "enabled. Tool definitions will be excluded from the " - "prompt. This behavior will change in vLLM v0.10 where " - "tool definitions will be included by default even " - "with tool_choice='none'. To adopt the new behavior " - "now, use --expand-tools-even-if-tool-choice-none. " - "To suppress this warning, either remove tools from " - "the request or set tool_choice to a different value.") - tool_dicts = None else: tool_dicts = [tool.model_dump() for tool in request.tools] _, request_prompts, engine_prompts = await self._preprocess_chat( @@ -331,15 +315,13 @@ async def responses_full_generator( ) outputs = [] function_calls: list[FunctionCall] = [] - if (not self.enable_auto_tools or not self.tool_parser): + if not self.enable_auto_tools or not self.tool_parser: # Tools are not enabled if reasoning_item: outputs.append(reasoning_item) if message_item: outputs.append(message_item) - elif (request.tool_choice == "none" and \ - not self.expand_tools_even_if_tool_choice_none) or \ - request.tool_choice is None: + elif request.tool_choice is None: # No tool calls. if reasoning_item: outputs.append(reasoning_item) From a4a8d9269116c215bcdf9db45846819a096a0a2c Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Wed, 16 Jul 2025 03:12:22 +0000 Subject: [PATCH 14/14] [Frontend] OpenAI Responses API supports Tool/Function calling Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/protocol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 2667b9da36e..f5bea4ec897 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -353,7 +353,7 @@ def validate_prompt(cls, data): raise ValueError("prompt template is not supported") return data - @model_validator(mode="before") + @model_validator(mode="before") def check_cache_salt_support(cls, data): if data.get("cache_salt") is not None: if not envs.VLLM_USE_V1: