[Frontend] OpenAI Responses API supports Tool/Function calling

chaunceyjiang · chaunceyjiang · commit eda843d575ea · 2025-07-15T16:46:21.000Z
Signed-off-by: chaunceyjiang &lt;chaunceyjiang@gmail.com&gt;
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
@@ -346,10 +346,8 @@ def validate_prompt(cls, data):
 
     def _get_guided_json_from_tool(
             self) -> Optional[Union[str, dict, BaseModel]]:
-        print(
-            f"Tool choice: {self.tool_choice}, type: {type(self.tool_choice)}")
         # user has chosen to use a named tool
-        if type(self.tool_choice) is ToolChoiceFunction:
+        if isinstance(self.tool_choice, ToolChoiceFunction):
             tool_name = self.tool_choice.name
             tools = {tool.name: tool for tool in \
                 self.tools if tool.type == "function"}
@@ -413,8 +411,6 @@ def get_tool_schema_defs(tools: list[ToolChoiceFunction]) -> dict:
             json_schema_defs = get_tool_schema_defs(self.tools)
             if json_schema_defs:
                 json_schema["$defs"] = json_schema_defs
-            print("Using tool choice 'required' for guided json decoding.")
-            print(f"JSON schema: {json_schema}")
             return json_schema
 
         return None
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
@@ -337,6 +337,7 @@ async def responses_full_generator(
             elif request.tool_choice is None or request.tool_choice == "none":
                 pass
             elif request.tool_choice == "required":
+                assert content is not None
                 tool_calls = TypeAdapter(
                     list[FunctionDefinition]).validate_json(content)
                 function_calls.extend([
@@ -364,16 +365,17 @@ async def responses_full_generator(
                     "Unknown tool choice: %s. "
                     "Using 'none' as the default tool choice.",
                     request.tool_choice)
-            output = [
-                ResponseFunctionToolCall(
-                    id=f"fc_{random_fc_uuid()}",
-                    call_id=f"call_{random_uuid()}",
-                    type="function_call",
-                    status="completed",
-                    name=tool_call.name,
-                    arguments=tool_call.arguments,
-                ) for tool_call in function_calls
-            ]
+            if function_calls:
+                output = [
+                    ResponseFunctionToolCall(
+                        id=f"fc_{random_fc_uuid()}",
+                        call_id=f"call_{random_uuid()}",
+                        type="function_call",
+                        status="completed",
+                        name=tool_call.name,
+                        arguments=tool_call.arguments,
+                    ) for tool_call in function_calls
+                ]
         # If no tool call is generated, we still need to return an output.
         if reasoning_content and output is None:
             output = ResponseReasoningItem(
diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@@ -8,7 +8,8 @@
 
 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                               DeltaMessage,
-                                              ExtractedToolCallInformation)
+                                              ExtractedToolCallInformation,
+                                              ResponsesRequest)
 from vllm.logger import init_logger
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 from vllm.utils import import_from_path, is_list_of
@@ -39,15 +40,17 @@ def vocab(self) -> dict[str, int]:
         return self.model_tokenizer.get_vocab()
 
     def adjust_request(
-            self, request: ChatCompletionRequest) -> ChatCompletionRequest:
+        self, request: Union[ChatCompletionRequest, ResponsesRequest]
+    ) -> ChatCompletionRequest:
         """
         Static method that used to adjust the request parameters.
         """
         return request
 
     def extract_tool_calls(
-            self, model_output: str,
-            request: ChatCompletionRequest) -> ExtractedToolCallInformation:
+        self, model_output: str, request: Union[ChatCompletionRequest,
+                                                ResponsesRequest]
+    ) -> ExtractedToolCallInformation:
         """
         Static method that should be implemented for extracting tool calls from
         a complete model-generated string.
@@ -66,7 +69,7 @@ def extract_tool_calls_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
-        request: ChatCompletionRequest,
+        request: Union[ChatCompletionRequest, ResponsesRequest],
     ) -> Union[DeltaMessage, None]:
         """
         Instance method that should be implemented for extracting tool calls