Skip to content

Commit cd7b3d0

Browse files
committed
[Frontend] OpenAI Responses API supports Tool/Function calling
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
1 parent c06373d commit cd7b3d0

File tree

3 files changed

+86
-76
lines changed

3 files changed

+86
-76
lines changed

vllm/entrypoints/openai/serving_chat.py

Lines changed: 9 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import time
77
from collections.abc import AsyncGenerator, AsyncIterator
88
from collections.abc import Sequence as GenericSequence
9-
from typing import Callable, Final, Optional, Union
9+
from typing import Final, Optional, Union
1010

1111
import jinja2
1212
import partial_json_parser
@@ -31,13 +31,12 @@
3131
from vllm.entrypoints.openai.serving_engine import (OpenAIServing,
3232
clamp_prompt_logprobs)
3333
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
34-
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
34+
from vllm.entrypoints.openai.tool_parsers import ToolParser
3535
from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import (
3636
MistralToolCall)
3737
from vllm.entrypoints.utils import get_max_tokens
3838
from vllm.logger import init_logger
3939
from vllm.outputs import CompletionOutput, RequestOutput
40-
from vllm.reasoning import ReasoningParser, ReasoningParserManager
4140
from vllm.sampling_params import BeamSearchParams, SamplingParams
4241
from vllm.sequence import Logprob
4342
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
@@ -79,39 +78,14 @@ def __init__(
7978
self.chat_template = chat_template
8079
self.chat_template_content_format: Final = chat_template_content_format
8180

82-
# set up tool use
8381
self.enable_auto_tools: bool = enable_auto_tools
84-
if self.enable_auto_tools:
85-
logger.info(
86-
"\"auto\" tool choice has been enabled please note that while"
87-
" the parallel_tool_calls client option is preset for "
88-
"compatibility reasons, it will be ignored.")
89-
90-
self.reasoning_parser: Optional[Callable[[AnyTokenizer],
91-
ReasoningParser]] = None
92-
if reasoning_parser:
93-
try:
94-
self.reasoning_parser = (
95-
ReasoningParserManager.get_reasoning_parser(
96-
reasoning_parser))
97-
assert self.reasoning_parser is not None
98-
except Exception as e:
99-
raise TypeError(
100-
f"{reasoning_parser=} has not been registered") from e
101-
self.tool_parser: Optional[Callable[[AnyTokenizer], ToolParser]] = None
102-
if self.enable_auto_tools:
103-
try:
104-
if (tool_parser == "pythonic" and
105-
model_config.model.startswith("meta-llama/Llama-3.2")):
106-
logger.warning(
107-
"Llama3.2 models may struggle to emit valid pythonic"
108-
" tool calls")
109-
self.tool_parser = ToolParserManager.get_tool_parser(
110-
tool_parser)
111-
except Exception as e:
112-
raise TypeError("Error: --enable-auto-tool-choice requires "
113-
f"tool_parser:'{tool_parser}' which has not "
114-
"been registered") from e
82+
# set up reasoning parser
83+
self.reasoning_parser = self._get_reasoning_parser(
84+
reasoning_parser_name=reasoning_parser)
85+
# set up tool use
86+
self.tool_parser = self._get_tool_parser(
87+
tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools)
88+
11589
self.expand_tools_even_if_tool_choice_none = (
11690
expand_tools_even_if_tool_choice_none)
11791

vllm/entrypoints/openai/serving_engine.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
TranscriptionResponse,
6363
TranslationRequest)
6464
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
65-
from vllm.entrypoints.openai.tool_parsers import ToolParser
65+
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
6666
# yapf: enable
6767
from vllm.inputs.data import EmbedsPrompt as EngineEmbedsPrompt
6868
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
@@ -74,6 +74,7 @@
7474
from vllm.outputs import PoolingRequestOutput, RequestOutput
7575
from vllm.pooling_params import PoolingParams
7676
from vllm.prompt_adapter.request import PromptAdapterRequest
77+
from vllm.reasoning import ReasoningParser, ReasoningParserManager
7778
from vllm.sampling_params import BeamSearchParams, SamplingParams
7879
from vllm.sequence import Logprob, PromptLogprobs
7980
from vllm.tracing import (contains_trace_headers, extract_trace_headers,
@@ -1052,6 +1053,51 @@ def _get_model_name(self,
10521053
return self.models.base_model_paths[0].name
10531054
return model_name
10541055

1056+
def _get_tool_parser(
1057+
self,
1058+
tool_parser_name: str,
1059+
enable_auto_tools: bool = False
1060+
) -> Optional[Callable[[AnyTokenizer], ToolParser]]:
1061+
"""Get the tool parser based on the name."""
1062+
parser = None
1063+
if not enable_auto_tools:
1064+
return parser
1065+
logger.info(
1066+
"\"auto\" tool choice has been enabled please note that while"
1067+
" the parallel_tool_calls client option is preset for "
1068+
"compatibility reasons, it will be ignored.")
1069+
"""Get the tool parser based on the name."""
1070+
try:
1071+
if (tool_parser_name == "pythonic"
1072+
and self.model_config.model.startswith(
1073+
"meta-llama/Llama-3.2")):
1074+
logger.warning(
1075+
"Llama3.2 models may struggle to emit valid pythonic"
1076+
" tool calls")
1077+
parser = ToolParserManager.get_tool_parser(tool_parser_name)
1078+
except Exception as e:
1079+
raise TypeError("Error: --enable-auto-tool-choice requires "
1080+
f"tool_parser:'{tool_parser_name}' which has not "
1081+
"been registered") from e
1082+
return parser
1083+
1084+
def _get_reasoning_parser(
1085+
self,
1086+
reasoning_parser_name: str,
1087+
) -> Optional[Callable[[AnyTokenizer], ReasoningParser]]:
1088+
"""Get the reasoning parser based on the name."""
1089+
parser = None
1090+
if not reasoning_parser_name:
1091+
return None
1092+
try:
1093+
parser = (ReasoningParserManager.get_reasoning_parser(
1094+
reasoning_parser_name))
1095+
assert parser is not None
1096+
except Exception as e:
1097+
raise TypeError(
1098+
f"{reasoning_parser_name=} has not been registered") from e
1099+
return parser
1100+
10551101

10561102
def clamp_prompt_logprobs(
10571103
prompt_logprobs: Union[PromptLogprobs,

vllm/entrypoints/openai/serving_responses.py

Lines changed: 30 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,15 @@
66
import time
77
from collections.abc import AsyncGenerator, AsyncIterator
88
from http import HTTPStatus
9-
from typing import Callable, Final, Optional, Union
9+
from typing import Final, Optional, Union
1010

1111
import jinja2
1212
from fastapi import Request
13+
from openai.types.chat import (ChatCompletionAssistantMessageParam,
14+
ChatCompletionMessageToolCallParam,
15+
ChatCompletionToolMessageParam)
16+
from openai.types.chat.chat_completion_message_tool_call_param import (
17+
Function as FunctionCallTool)
1318
from openai.types.responses import (ResponseFunctionToolCall,
1419
ResponseOutputMessage, ResponseOutputText,
1520
ToolChoiceFunction)
@@ -32,10 +37,8 @@
3237
# yapf: enable
3338
from vllm.entrypoints.openai.serving_engine import OpenAIServing
3439
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
35-
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
3640
from vllm.logger import init_logger
3741
from vllm.outputs import RequestOutput
38-
from vllm.reasoning import ReasoningParser, ReasoningParserManager
3942
from vllm.sampling_params import SamplingParams
4043
from vllm.transformers_utils.tokenizer import AnyTokenizer
4144
from vllm.utils import random_fc_uuid, random_uuid
@@ -73,30 +76,14 @@ def __init__(
7376
self.enable_auto_tools = enable_auto_tools
7477
self.expand_tools_even_if_tool_choice_none = (
7578
expand_tools_even_if_tool_choice_none)
76-
self.tool_parser: Optional[Callable[[AnyTokenizer], ToolParser]] = None
77-
if self.enable_auto_tools:
78-
try:
79-
self.tool_parser = ToolParserManager.get_tool_parser(
80-
tool_parser)
81-
except Exception as e:
82-
raise TypeError("Error: --enable-auto-tool-choice requires "
83-
f"tool_parser:'{tool_parser}' which has not "
84-
"been registered") from e
8579
self.chat_template = chat_template
8680
self.chat_template_content_format: Final = chat_template_content_format
8781

88-
self.reasoning_parser: Optional[Callable[[AnyTokenizer],
89-
ReasoningParser]] = None
90-
if reasoning_parser:
91-
try:
92-
self.reasoning_parser = (
93-
ReasoningParserManager.get_reasoning_parser(
94-
reasoning_parser))
95-
assert self.reasoning_parser is not None
96-
except Exception as e:
97-
raise TypeError(
98-
f"{reasoning_parser=} has not been registered") from e
82+
self.reasoning_parser = self._get_reasoning_parser(
83+
reasoning_parser_name=reasoning_parser)
9984

85+
self.tool_parser = self._get_tool_parser(
86+
tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools)
10087
self.enable_prompt_tokens_details = enable_prompt_tokens_details
10188
self.enable_force_include_usage = enable_force_include_usage
10289
self.default_sampling_params = (
@@ -481,25 +468,28 @@ def _construct_input_messages(
481468
for item in request.input:
482469
if item.get("type") == "function_call":
483470
# Append the function call as a tool call.
484-
messages.append({
485-
"role":
486-
"assistant",
487-
"tool_calls": [{
488-
"id": item.get("call_id"),
489-
"function": {
490-
"name": item.get("name"),
491-
"arguments": item.get("arguments", "{}"),
492-
},
493-
"type": "function",
494-
}]
495-
})
471+
messages.append(
472+
ChatCompletionAssistantMessageParam(
473+
role="assistant",
474+
tool_calls=[
475+
ChatCompletionMessageToolCallParam(
476+
id=item.get("call_id"),
477+
function=FunctionCallTool(
478+
name=item.get("name"),
479+
arguments=item.get("arguments", "{}"),
480+
),
481+
type="function",
482+
)
483+
],
484+
))
496485
elif item.get("type") == "function_call_output":
497486
# Append the function call output as a tool message.
498-
messages.append({
499-
"role": "tool",
500-
"content": item.get("output", ""),
501-
"tool_call_id": item.get("call_id"),
502-
})
487+
messages.append(
488+
ChatCompletionToolMessageParam(
489+
role="tool",
490+
content=item.get("output", ""),
491+
tool_call_id=item.get("call_id"),
492+
))
503493
else:
504494
messages.append(item) # type: ignore
505495
return messages

0 commit comments

Comments
 (0)