Skip to content

Commit 773466f

Browse files
committed
[Frontend] OpenAI Responses API supports Tool/Function calling
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
1 parent 613fb38 commit 773466f

17 files changed

+223
-90
lines changed

tests/v1/entrypoints/openai/responses/conftest.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,13 @@ def default_server_args():
1515
"--max-model-len",
1616
"8192",
1717
"--enforce-eager", # For faster startup.
18+
"--enable-auto-tool-choice",
19+
"--guided-decoding-backend",
20+
"xgrammar",
21+
"--tool-call-parser",
22+
"hermes",
1823
"--reasoning-parser",
19-
"deepseek_r1",
24+
"qwen3",
2025
]
2126

2227

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
import json
5+
6+
import openai # use the official client for correctness check
7+
import pytest
8+
9+
10+
MODEL_NAME = "Qwen/Qwen3-0.6B"
11+
12+
@pytest.mark.asyncio
13+
@pytest.mark.parametrize("model_name", [MODEL_NAME])
14+
@pytest.mark.parametrize("tool_choice", ["auto", "required"])
15+
async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
16+
tool_choice: str):
17+
tools = [
18+
{
19+
"type": "function",
20+
"function": {
21+
"name": "get_current_weather",
22+
"description": "Get the current weather in a given location",
23+
"parameters": {
24+
"type": "object",
25+
"properties": {
26+
"city": {
27+
"type": "string",
28+
"description":
29+
"The city to find the weather for, e.g. 'Vienna'",
30+
"default": "Vienna",
31+
},
32+
"country": {
33+
"type":
34+
"string",
35+
"description":
36+
"The country that the city is in, e.g. 'Austria'",
37+
},
38+
"unit": {
39+
"type": "string",
40+
"description":
41+
"The unit to fetch the temperature in",
42+
"enum": ["celsius", "fahrenheit"],
43+
},
44+
"options": {
45+
"$ref": "#/$defs/WeatherOptions",
46+
"description":
47+
"Optional parameters for weather query",
48+
},
49+
},
50+
"required": ["country", "unit"],
51+
"$defs": {
52+
"WeatherOptions": {
53+
"title": "WeatherOptions",
54+
"type": "object",
55+
"additionalProperties": False,
56+
"properties": {
57+
"unit": {
58+
"type": "string",
59+
"enum": ["celsius", "fahrenheit"],
60+
"default": "celsius",
61+
"description": "Temperature unit",
62+
"title": "Temperature Unit",
63+
},
64+
"include_forecast": {
65+
"type": "boolean",
66+
"default": False,
67+
"description":
68+
"Whether to include a 24-hour forecast",
69+
"title": "Include Forecast",
70+
},
71+
"language": {
72+
"type": "string",
73+
"default": "zh-CN",
74+
"description": "Language of the response",
75+
"title": "Language",
76+
"enum": ["zh-CN", "en-US", "ja-JP"],
77+
},
78+
},
79+
},
80+
},
81+
},
82+
},
83+
},
84+
{
85+
"type": "function",
86+
"function": {
87+
"name": "get_forecast",
88+
"description": "Get the weather forecast for a given location",
89+
"parameters": {
90+
"type": "object",
91+
"properties": {
92+
"city": {
93+
"type": "string",
94+
"description":
95+
"The city to get the forecast for, e.g. 'Vienna'",
96+
"default": "Vienna",
97+
},
98+
"country": {
99+
"type":
100+
"string",
101+
"description":
102+
"The country that the city is in, e.g. 'Austria'",
103+
},
104+
"days": {
105+
"type":
106+
"integer",
107+
"description":
108+
"Number of days to get the forecast for (1-7)",
109+
},
110+
"unit": {
111+
"type": "string",
112+
"description":
113+
"The unit to fetch the temperature in",
114+
"enum": ["celsius", "fahrenheit"],
115+
},
116+
},
117+
"required": ["country", "days", "unit"],
118+
},
119+
},
120+
},
121+
]
122+
123+
prompt = [{
124+
"role":
125+
"user",
126+
"content":
127+
"Can you tell me what the current weather is in Berlin and the "\
128+
"forecast for the next 5 days, in fahrenheit?",
129+
},]
130+
response = client.responses.create(
131+
model=model_name,
132+
input=prompt,
133+
tools=tools,
134+
tool_choice=tool_choice,
135+
)
136+
137+
assert len(response.output) >= 1
138+
tool_call = response.output[0]
139+
140+
assert tool_call.type == "function_call"
141+
assert json.loads(tool_call.arguments) is not None
142+
143+
@pytest.mark.asyncio
144+
async def test_named_tool_use(client: openai.AsyncOpenAI, sample_json_schema):
145+
pass

vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def vocab(self) -> dict[str, int]:
4141

4242
def adjust_request(
4343
self, request: Union[ChatCompletionRequest, ResponsesRequest]
44-
) -> ChatCompletionRequest:
44+
) -> Union[ChatCompletionRequest, ResponsesRequest]:
4545
"""
4646
Static method that used to adjust the request parameters.
4747
"""

vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,9 @@
77
import regex as re
88

99
from vllm.entrypoints.chat_utils import random_tool_call_id
10-
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
11-
DeltaFunctionCall, DeltaMessage,
12-
DeltaToolCall,
13-
ExtractedToolCallInformation,
14-
FunctionCall, ToolCall)
10+
from vllm.entrypoints.openai.protocol import (
11+
ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall,
12+
ExtractedToolCallInformation, FunctionCall, ToolCall, ResponsesRequest)
1513
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
1614
ToolParser, ToolParserManager)
1715
from vllm.logger import init_logger
@@ -71,7 +69,7 @@ def __init__(self, tokenizer: AnyTokenizer):
7169
def extract_tool_calls(
7270
self,
7371
model_output: str,
74-
request: ChatCompletionRequest,
72+
request: Union[ChatCompletionRequest, ResponsesRequest],
7573
) -> ExtractedToolCallInformation:
7674

7775
# sanity check; avoid unnecessary processing

vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,9 @@
1111
from partial_json_parser.core.options import Allow
1212

1313
from vllm.entrypoints.chat_utils import random_tool_call_id
14-
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
15-
DeltaFunctionCall, DeltaMessage,
16-
DeltaToolCall,
17-
ExtractedToolCallInformation,
18-
FunctionCall, ToolCall)
14+
from vllm.entrypoints.openai.protocol import (
15+
ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall,
16+
ExtractedToolCallInformation, FunctionCall, ToolCall, ResponsesRequest)
1917
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
2018
ToolParser, ToolParserManager)
2119
from vllm.entrypoints.openai.tool_parsers.utils import (consume_space,
@@ -47,8 +45,9 @@ def __init__(self, tokenizer: AnyTokenizer):
4745
self.tool_call_regex = re.compile(r"<function_call>\s*")
4846

4947
def extract_tool_calls(
50-
self, model_output: str,
51-
request: ChatCompletionRequest) -> ExtractedToolCallInformation:
48+
self, model_output: str, request: Union[ChatCompletionRequest,
49+
ResponsesRequest]
50+
) -> ExtractedToolCallInformation:
5251
if self.tool_start_token not in model_output:
5352
return ExtractedToolCallInformation(tools_called=False,
5453
tool_calls=[],

vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@
99
from partial_json_parser.core.options import Allow
1010

1111
from vllm.entrypoints.chat_utils import random_tool_call_id
12-
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
13-
DeltaFunctionCall, DeltaMessage,
14-
DeltaToolCall,
15-
ExtractedToolCallInformation,
16-
FunctionCall, ToolCall)
12+
from vllm.entrypoints.openai.protocol import (
13+
ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall,
14+
ExtractedToolCallInformation, FunctionCall, ToolCall, ResponsesRequest)
1715
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
1816
ToolParser, ToolParserManager)
1917
from vllm.entrypoints.openai.tool_parsers.utils import (consume_space,
@@ -45,8 +43,9 @@ def __init__(self, tokenizer: AnyTokenizer):
4543
self.bot_string = "<tool_call>"
4644

4745
def extract_tool_calls(
48-
self, model_output: str,
49-
request: ChatCompletionRequest) -> ExtractedToolCallInformation:
46+
self, model_output: str, request: Union[ChatCompletionRequest,
47+
ResponsesRequest]
48+
) -> ExtractedToolCallInformation:
5049
stripped = model_output.strip()\
5150
.removeprefix(self.bot_token)\
5251
.removeprefix(self.bot_string)\

vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,9 @@
1010
from partial_json_parser.core.options import Allow
1111

1212
from vllm.entrypoints.chat_utils import random_tool_call_id
13-
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
14-
DeltaFunctionCall, DeltaMessage,
15-
DeltaToolCall,
16-
ExtractedToolCallInformation,
17-
FunctionCall, ToolCall)
13+
from vllm.entrypoints.openai.protocol import (
14+
ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall,
15+
ExtractedToolCallInformation, FunctionCall, ToolCall, ResponsesRequest)
1816
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
1917
ToolParser, ToolParserManager)
2018
from vllm.logger import init_logger
@@ -64,7 +62,7 @@ def __init__(self, tokenizer: AnyTokenizer):
6462
def extract_tool_calls(
6563
self,
6664
model_output: str,
67-
request: ChatCompletionRequest,
65+
request: Union[ChatCompletionRequest, ResponsesRequest],
6866
) -> ExtractedToolCallInformation:
6967

7068
# sanity check; avoid unnecessary processing

vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@
99
from partial_json_parser.core.options import Allow
1010

1111
from vllm.entrypoints.chat_utils import random_tool_call_id
12-
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
13-
DeltaFunctionCall, DeltaMessage,
14-
DeltaToolCall,
15-
ExtractedToolCallInformation,
16-
FunctionCall, ToolCall)
12+
from vllm.entrypoints.openai.protocol import (
13+
ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall,
14+
ExtractedToolCallInformation, FunctionCall, ToolCall, ResponsesRequest)
1715
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
1816
ToolParser, ToolParserManager)
1917
from vllm.entrypoints.openai.tool_parsers.utils import (
@@ -183,7 +181,7 @@ def extract_tool_calls_streaming(
183181
def extract_tool_calls(
184182
self,
185183
model_output: str,
186-
request: ChatCompletionRequest,
184+
request: Union[ChatCompletionRequest, ResponsesRequest],
187185
) -> ExtractedToolCallInformation:
188186
text = model_output
189187
tools = request.tools

vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,9 @@
1010
from partial_json_parser.core.options import Allow
1111

1212
from vllm.entrypoints.chat_utils import random_tool_call_id
13-
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
14-
DeltaFunctionCall, DeltaMessage,
15-
DeltaToolCall,
16-
ExtractedToolCallInformation,
17-
FunctionCall, ToolCall)
13+
from vllm.entrypoints.openai.protocol import (
14+
ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall,
15+
ExtractedToolCallInformation, FunctionCall, ToolCall, ResponsesRequest)
1816
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
1917
from vllm.entrypoints.openai.tool_parsers.utils import (
2018
extract_intermediate_diff)

vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,9 @@
77

88
import regex as re
99

10-
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
11-
DeltaFunctionCall, DeltaMessage,
12-
DeltaToolCall,
13-
ExtractedToolCallInformation,
14-
FunctionCall, ToolCall)
10+
from vllm.entrypoints.openai.protocol import (
11+
ChatCompletionRequest, DeltaFunctionCall, DeltaMessage, DeltaToolCall,
12+
ExtractedToolCallInformation, FunctionCall, ToolCall, ResponsesRequest)
1513
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
1614
ToolParser, ToolParserManager)
1715
from vllm.logger import init_logger
@@ -70,7 +68,7 @@ def __init__(self, tokenizer: AnyTokenizer):
7068
def extract_tool_calls(
7169
self,
7270
model_output: str,
73-
request: ChatCompletionRequest,
71+
request: Union[ChatCompletionRequest, ResponsesRequest],
7472
) -> ExtractedToolCallInformation:
7573

7674
# sanity check; avoid unnecessary processing
@@ -374,4 +372,4 @@ def extract_tool_calls_streaming(
374372

375373
except Exception:
376374
logger.exception("Error trying to handle streaming tool call.")
377-
return None # do not stream a delta. skip this token ID.
375+
return None # do not stream a delta. skip this token ID.

0 commit comments

Comments
 (0)