Skip to content

Commit 613fb38

Browse files
committed
[Frontend] OpenAI Responses API supports Tool/Function calling
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
1 parent f9d100b commit 613fb38

File tree

3 files changed

+21
-20
lines changed

3 files changed

+21
-20
lines changed

vllm/entrypoints/openai/protocol.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,8 @@ def validate_prompt(cls, data):
345345

346346
def _get_guided_json_from_tool(
347347
self) -> Optional[Union[str, dict, BaseModel]]:
348-
print(
349-
f"Tool choice: {self.tool_choice}, type: {type(self.tool_choice)}")
350348
# user has chosen to use a named tool
351-
if type(self.tool_choice) is ToolChoiceFunction:
349+
if isinstance(self.tool_choice, ToolChoiceFunction):
352350
tool_name = self.tool_choice.name
353351
tools = {tool.name: tool for tool in \
354352
self.tools if tool.type == "function"}
@@ -412,8 +410,6 @@ def get_tool_schema_defs(tools: list[ToolChoiceFunction]) -> dict:
412410
json_schema_defs = get_tool_schema_defs(self.tools)
413411
if json_schema_defs:
414412
json_schema["$defs"] = json_schema_defs
415-
print("Using tool choice 'required' for guided json decoding.")
416-
print(f"JSON schema: {json_schema}")
417413
return json_schema
418414

419415
return None

vllm/entrypoints/openai/serving_responses.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ async def responses_full_generator(
338338
elif request.tool_choice is None or request.tool_choice == "none":
339339
pass
340340
elif request.tool_choice == "required":
341+
assert content is not None
341342
tool_calls = TypeAdapter(
342343
list[FunctionDefinition]).validate_json(content)
343344
function_calls.extend([
@@ -365,16 +366,17 @@ async def responses_full_generator(
365366
"Unknown tool choice: %s. "
366367
"Using 'none' as the default tool choice.",
367368
request.tool_choice)
368-
output = [
369-
ResponseFunctionToolCall(
370-
id=f"fc_{random_fc_uuid()}",
371-
call_id=f"call_{random_uuid()}",
372-
type="function_call",
373-
status="completed",
374-
name=tool_call.name,
375-
arguments=tool_call.arguments,
376-
) for tool_call in function_calls
377-
]
369+
if function_calls:
370+
output = [
371+
ResponseFunctionToolCall(
372+
id=f"fc_{random_fc_uuid()}",
373+
call_id=f"call_{random_uuid()}",
374+
type="function_call",
375+
status="completed",
376+
name=tool_call.name,
377+
arguments=tool_call.arguments,
378+
) for tool_call in function_calls
379+
]
378380
# If no tool call is generated, we still need to return an output.
379381
if reasoning_content and output is None:
380382
output = ResponseReasoningItem(

vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
1010
DeltaMessage,
11-
ExtractedToolCallInformation)
11+
ExtractedToolCallInformation,
12+
ResponsesRequest)
1213
from vllm.logger import init_logger
1314
from vllm.transformers_utils.tokenizer import AnyTokenizer
1415
from vllm.utils import import_from_path, is_list_of
@@ -39,15 +40,17 @@ def vocab(self) -> dict[str, int]:
3940
return self.model_tokenizer.get_vocab()
4041

4142
def adjust_request(
42-
self, request: ChatCompletionRequest) -> ChatCompletionRequest:
43+
self, request: Union[ChatCompletionRequest, ResponsesRequest]
44+
) -> ChatCompletionRequest:
4345
"""
4446
Static method that used to adjust the request parameters.
4547
"""
4648
return request
4749

4850
def extract_tool_calls(
49-
self, model_output: str,
50-
request: ChatCompletionRequest) -> ExtractedToolCallInformation:
51+
self, model_output: str, request: Union[ChatCompletionRequest,
52+
ResponsesRequest]
53+
) -> ExtractedToolCallInformation:
5154
"""
5255
Static method that should be implemented for extracting tool calls from
5356
a complete model-generated string.
@@ -66,7 +69,7 @@ def extract_tool_calls_streaming(
6669
previous_token_ids: Sequence[int],
6770
current_token_ids: Sequence[int],
6871
delta_token_ids: Sequence[int],
69-
request: ChatCompletionRequest,
72+
request: Union[ChatCompletionRequest, ResponsesRequest],
7073
) -> Union[DeltaMessage, None]:
7174
"""
7275
Instance method that should be implemented for extracting tool calls

0 commit comments

Comments
 (0)