Skip to content

Commit 42d98f9

Browse files
committed
[Frontend] OpenAI Responses API supports Tool/Function calling
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
1 parent 5a9392c commit 42d98f9

File tree

3 files changed

+88
-76
lines changed

3 files changed

+88
-76
lines changed

vllm/entrypoints/openai/serving_chat.py

Lines changed: 11 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import time
77
from collections.abc import AsyncGenerator, AsyncIterator
88
from collections.abc import Sequence as GenericSequence
9-
from typing import Callable, Final, Optional, Union
9+
from typing import Final, Optional, Union
1010

1111
import jinja2
1212
import partial_json_parser
@@ -31,13 +31,12 @@
3131
from vllm.entrypoints.openai.serving_engine import (OpenAIServing,
3232
clamp_prompt_logprobs)
3333
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
34-
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
34+
from vllm.entrypoints.openai.tool_parsers import ToolParser
3535
from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import (
3636
MistralToolCall)
3737
from vllm.entrypoints.utils import get_max_tokens
3838
from vllm.logger import init_logger
3939
from vllm.outputs import CompletionOutput, RequestOutput
40-
from vllm.reasoning import ReasoningParser, ReasoningParserManager
4140
from vllm.sampling_params import BeamSearchParams, SamplingParams
4241
from vllm.sequence import Logprob
4342
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
@@ -78,39 +77,16 @@ def __init__(
7877
self.chat_template = chat_template
7978
self.chat_template_content_format: Final = chat_template_content_format
8079

81-
# set up tool use
8280
self.enable_auto_tools: bool = enable_auto_tools
83-
if self.enable_auto_tools:
84-
logger.info(
85-
"\"auto\" tool choice has been enabled please note that while"
86-
" the parallel_tool_calls client option is preset for "
87-
"compatibility reasons, it will be ignored.")
88-
89-
self.reasoning_parser: Optional[Callable[[AnyTokenizer],
90-
ReasoningParser]] = None
91-
if reasoning_parser:
92-
try:
93-
self.reasoning_parser = (
94-
ReasoningParserManager.get_reasoning_parser(
95-
reasoning_parser))
96-
assert self.reasoning_parser is not None
97-
except Exception as e:
98-
raise TypeError(
99-
f"{reasoning_parser=} has not been registered") from e
100-
self.tool_parser: Optional[Callable[[AnyTokenizer], ToolParser]] = None
101-
if self.enable_auto_tools:
102-
try:
103-
if (tool_parser == "pythonic" and
104-
model_config.model.startswith("meta-llama/Llama-3.2")):
105-
logger.warning(
106-
"Llama3.2 models may struggle to emit valid pythonic"
107-
" tool calls")
108-
self.tool_parser = ToolParserManager.get_tool_parser(
109-
tool_parser)
110-
except Exception as e:
111-
raise TypeError("Error: --enable-auto-tool-choice requires "
112-
f"tool_parser:'{tool_parser}' which has not "
113-
"been registered") from e
81+
# set up reasoning parser
82+
self.reasoning_parser = self._get_reasoning_parser(
83+
reasoning_parser_name=reasoning_parser)
84+
# set up tool use
85+
self.tool_parser = self._get_tool_parser(
86+
tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools)
87+
88+
self.expand_tools_even_if_tool_choice_none = (
89+
expand_tools_even_if_tool_choice_none)
11490

11591
self.enable_prompt_tokens_details = enable_prompt_tokens_details
11692
self.enable_force_include_usage = enable_force_include_usage

vllm/entrypoints/openai/serving_engine.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
TranscriptionResponse,
5858
TranslationRequest)
5959
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
60-
from vllm.entrypoints.openai.tool_parsers import ToolParser
60+
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
6161
# yapf: enable
6262
from vllm.inputs.data import EmbedsPrompt as EngineEmbedsPrompt
6363
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
@@ -69,6 +69,7 @@
6969
from vllm.outputs import PoolingRequestOutput, RequestOutput
7070
from vllm.pooling_params import PoolingParams
7171
from vllm.prompt_adapter.request import PromptAdapterRequest
72+
from vllm.reasoning import ReasoningParser, ReasoningParserManager
7273
from vllm.sampling_params import BeamSearchParams, SamplingParams
7374
from vllm.sequence import Logprob, PromptLogprobs
7475
from vllm.tracing import (contains_trace_headers, extract_trace_headers,
@@ -1056,6 +1057,51 @@ def _get_model_name(self,
10561057
return self.models.base_model_paths[0].name
10571058
return model_name
10581059

1060+
def _get_tool_parser(
1061+
self,
1062+
tool_parser_name: str,
1063+
enable_auto_tools: bool = False
1064+
) -> Optional[Callable[[AnyTokenizer], ToolParser]]:
1065+
"""Get the tool parser based on the name."""
1066+
parser = None
1067+
if not enable_auto_tools:
1068+
return parser
1069+
logger.info(
1070+
"\"auto\" tool choice has been enabled please note that while"
1071+
" the parallel_tool_calls client option is preset for "
1072+
"compatibility reasons, it will be ignored.")
1073+
"""Get the tool parser based on the name."""
1074+
try:
1075+
if (tool_parser_name == "pythonic"
1076+
and self.model_config.model.startswith(
1077+
"meta-llama/Llama-3.2")):
1078+
logger.warning(
1079+
"Llama3.2 models may struggle to emit valid pythonic"
1080+
" tool calls")
1081+
parser = ToolParserManager.get_tool_parser(tool_parser_name)
1082+
except Exception as e:
1083+
raise TypeError("Error: --enable-auto-tool-choice requires "
1084+
f"tool_parser:'{tool_parser_name}' which has not "
1085+
"been registered") from e
1086+
return parser
1087+
1088+
def _get_reasoning_parser(
1089+
self,
1090+
reasoning_parser_name: str,
1091+
) -> Optional[Callable[[AnyTokenizer], ReasoningParser]]:
1092+
"""Get the reasoning parser based on the name."""
1093+
parser = None
1094+
if not reasoning_parser_name:
1095+
return None
1096+
try:
1097+
parser = (ReasoningParserManager.get_reasoning_parser(
1098+
reasoning_parser_name))
1099+
assert parser is not None
1100+
except Exception as e:
1101+
raise TypeError(
1102+
f"{reasoning_parser_name=} has not been registered") from e
1103+
return parser
1104+
10591105

10601106
def clamp_prompt_logprobs(
10611107
prompt_logprobs: Union[PromptLogprobs,

vllm/entrypoints/openai/serving_responses.py

Lines changed: 30 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,15 @@
66
import time
77
from collections.abc import AsyncGenerator, AsyncIterator
88
from http import HTTPStatus
9-
from typing import Callable, Final, Optional, Union
9+
from typing import Final, Optional, Union
1010

1111
import jinja2
1212
from fastapi import Request
13+
from openai.types.chat import (ChatCompletionAssistantMessageParam,
14+
ChatCompletionMessageToolCallParam,
15+
ChatCompletionToolMessageParam)
16+
from openai.types.chat.chat_completion_message_tool_call_param import (
17+
Function as FunctionCallTool)
1318
from openai.types.responses import (ResponseFunctionToolCall,
1419
ResponseOutputMessage, ResponseOutputText,
1520
ToolChoiceFunction)
@@ -32,10 +37,8 @@
3237
# yapf: enable
3338
from vllm.entrypoints.openai.serving_engine import OpenAIServing
3439
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
35-
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
3640
from vllm.logger import init_logger
3741
from vllm.outputs import RequestOutput
38-
from vllm.reasoning import ReasoningParser, ReasoningParserManager
3942
from vllm.sampling_params import SamplingParams
4043
from vllm.transformers_utils.tokenizer import AnyTokenizer
4144
from vllm.utils import random_fc_uuid, random_uuid
@@ -72,30 +75,14 @@ def __init__(
7275
self.enable_auto_tools = enable_auto_tools
7376
self.expand_tools_even_if_tool_choice_none = (
7477
expand_tools_even_if_tool_choice_none)
75-
self.tool_parser: Optional[Callable[[AnyTokenizer], ToolParser]] = None
76-
if self.enable_auto_tools:
77-
try:
78-
self.tool_parser = ToolParserManager.get_tool_parser(
79-
tool_parser)
80-
except Exception as e:
81-
raise TypeError("Error: --enable-auto-tool-choice requires "
82-
f"tool_parser:'{tool_parser}' which has not "
83-
"been registered") from e
8478
self.chat_template = chat_template
8579
self.chat_template_content_format: Final = chat_template_content_format
8680

87-
self.reasoning_parser: Optional[Callable[[AnyTokenizer],
88-
ReasoningParser]] = None
89-
if reasoning_parser:
90-
try:
91-
self.reasoning_parser = (
92-
ReasoningParserManager.get_reasoning_parser(
93-
reasoning_parser))
94-
assert self.reasoning_parser is not None
95-
except Exception as e:
96-
raise TypeError(
97-
f"{reasoning_parser=} has not been registered") from e
81+
self.reasoning_parser = self._get_reasoning_parser(
82+
reasoning_parser_name=reasoning_parser)
9883

84+
self.tool_parser = self._get_tool_parser(
85+
tool_parser_name=tool_parser, enable_auto_tools=enable_auto_tools)
9986
self.enable_prompt_tokens_details = enable_prompt_tokens_details
10087
self.enable_force_include_usage = enable_force_include_usage
10188
self.default_sampling_params = (
@@ -480,25 +467,28 @@ def _construct_input_messages(
480467
for item in request.input:
481468
if item.get("type") == "function_call":
482469
# Append the function call as a tool call.
483-
messages.append({
484-
"role":
485-
"assistant",
486-
"tool_calls": [{
487-
"id": item.get("call_id"),
488-
"function": {
489-
"name": item.get("name"),
490-
"arguments": item.get("arguments", "{}"),
491-
},
492-
"type": "function",
493-
}]
494-
})
470+
messages.append(
471+
ChatCompletionAssistantMessageParam(
472+
role="assistant",
473+
tool_calls=[
474+
ChatCompletionMessageToolCallParam(
475+
id=item.get("call_id"),
476+
function=FunctionCallTool(
477+
name=item.get("name"),
478+
arguments=item.get("arguments", "{}"),
479+
),
480+
type="function",
481+
)
482+
],
483+
))
495484
elif item.get("type") == "function_call_output":
496485
# Append the function call output as a tool message.
497-
messages.append({
498-
"role": "tool",
499-
"content": item.get("output", ""),
500-
"tool_call_id": item.get("call_id"),
501-
})
486+
messages.append(
487+
ChatCompletionToolMessageParam(
488+
role="tool",
489+
content=item.get("output", ""),
490+
tool_call_id=item.get("call_id"),
491+
))
502492
else:
503493
messages.append(item) # type: ignore
504494
return messages

0 commit comments

Comments
 (0)