Skip to content

Commit eda843d

Browse files
committed
[Frontend] OpenAI Responses API supports Tool/Function calling
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
1 parent dfac70c commit eda843d

File tree

3 files changed

+21
-20
lines changed

3 files changed

+21
-20
lines changed

vllm/entrypoints/openai/protocol.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,8 @@ def validate_prompt(cls, data):
346346

347347
def _get_guided_json_from_tool(
348348
self) -> Optional[Union[str, dict, BaseModel]]:
349-
print(
350-
f"Tool choice: {self.tool_choice}, type: {type(self.tool_choice)}")
351349
# user has chosen to use a named tool
352-
if type(self.tool_choice) is ToolChoiceFunction:
350+
if isinstance(self.tool_choice, ToolChoiceFunction):
353351
tool_name = self.tool_choice.name
354352
tools = {tool.name: tool for tool in \
355353
self.tools if tool.type == "function"}
@@ -413,8 +411,6 @@ def get_tool_schema_defs(tools: list[ToolChoiceFunction]) -> dict:
413411
json_schema_defs = get_tool_schema_defs(self.tools)
414412
if json_schema_defs:
415413
json_schema["$defs"] = json_schema_defs
416-
print("Using tool choice 'required' for guided json decoding.")
417-
print(f"JSON schema: {json_schema}")
418414
return json_schema
419415

420416
return None

vllm/entrypoints/openai/serving_responses.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ async def responses_full_generator(
337337
elif request.tool_choice is None or request.tool_choice == "none":
338338
pass
339339
elif request.tool_choice == "required":
340+
assert content is not None
340341
tool_calls = TypeAdapter(
341342
list[FunctionDefinition]).validate_json(content)
342343
function_calls.extend([
@@ -364,16 +365,17 @@ async def responses_full_generator(
364365
"Unknown tool choice: %s. "
365366
"Using 'none' as the default tool choice.",
366367
request.tool_choice)
367-
output = [
368-
ResponseFunctionToolCall(
369-
id=f"fc_{random_fc_uuid()}",
370-
call_id=f"call_{random_uuid()}",
371-
type="function_call",
372-
status="completed",
373-
name=tool_call.name,
374-
arguments=tool_call.arguments,
375-
) for tool_call in function_calls
376-
]
368+
if function_calls:
369+
output = [
370+
ResponseFunctionToolCall(
371+
id=f"fc_{random_fc_uuid()}",
372+
call_id=f"call_{random_uuid()}",
373+
type="function_call",
374+
status="completed",
375+
name=tool_call.name,
376+
arguments=tool_call.arguments,
377+
) for tool_call in function_calls
378+
]
377379
# If no tool call is generated, we still need to return an output.
378380
if reasoning_content and output is None:
379381
output = ResponseReasoningItem(

vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
1010
DeltaMessage,
11-
ExtractedToolCallInformation)
11+
ExtractedToolCallInformation,
12+
ResponsesRequest)
1213
from vllm.logger import init_logger
1314
from vllm.transformers_utils.tokenizer import AnyTokenizer
1415
from vllm.utils import import_from_path, is_list_of
@@ -39,15 +40,17 @@ def vocab(self) -> dict[str, int]:
3940
return self.model_tokenizer.get_vocab()
4041

4142
def adjust_request(
42-
self, request: ChatCompletionRequest) -> ChatCompletionRequest:
43+
self, request: Union[ChatCompletionRequest, ResponsesRequest]
44+
) -> ChatCompletionRequest:
4345
"""
4446
Static method that used to adjust the request parameters.
4547
"""
4648
return request
4749

4850
def extract_tool_calls(
49-
self, model_output: str,
50-
request: ChatCompletionRequest) -> ExtractedToolCallInformation:
51+
self, model_output: str, request: Union[ChatCompletionRequest,
52+
ResponsesRequest]
53+
) -> ExtractedToolCallInformation:
5154
"""
5255
Static method that should be implemented for extracting tool calls from
5356
a complete model-generated string.
@@ -66,7 +69,7 @@ def extract_tool_calls_streaming(
6669
previous_token_ids: Sequence[int],
6770
current_token_ids: Sequence[int],
6871
delta_token_ids: Sequence[int],
69-
request: ChatCompletionRequest,
72+
request: Union[ChatCompletionRequest, ResponsesRequest],
7073
) -> Union[DeltaMessage, None]:
7174
"""
7275
Instance method that should be implemented for extracting tool calls

0 commit comments

Comments
 (0)