Skip to content

Commit 313ae8c

Browse files
authored
[Deprecation] Remove everything scheduled for removal in v0.10.0 (#20979)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
1 parent c847e34 commit 313ae8c

File tree

8 files changed

+2
-120
lines changed

8 files changed

+2
-120
lines changed

docs/features/tool_calling.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,7 @@ When tool_choice='required' is set, the model is guaranteed to generate one or m
103103

104104
vLLM supports the `tool_choice='none'` option in the chat completion API. When this option is set, the model will not generate any tool calls and will respond with regular text content only, even if tools are defined in the request.
105105

106-
By default, when `tool_choice='none'` is specified, vLLM excludes tool definitions from the prompt to optimize context usage. To include tool definitions even with `tool_choice='none'`, use the `--expand-tools-even-if-tool-choice-none` option.
107-
108-
Note: This behavior will change in v0.10.0, where tool definitions will be included by default even with `tool_choice='none'`.
106+
However, when `tool_choice='none'` is specified, vLLM includes tool definitions from the prompt.
109107

110108
## Automatic Function Calling
111109

vllm/config.py

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from pydantic.dataclasses import dataclass
2727
from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE
2828
from torch.distributed import ProcessGroup, ReduceOp
29-
from typing_extensions import Self, deprecated, runtime_checkable
29+
from typing_extensions import Self, runtime_checkable
3030

3131
import vllm.envs as envs
3232
from vllm import version
@@ -3659,18 +3659,6 @@ def get_served_model_name(model: str,
36593659
class DecodingConfig:
36603660
"""Dataclass which contains the decoding strategy of the engine."""
36613661

3662-
@property
3663-
@deprecated(
3664-
"`guided_decoding_backend` is deprecated and has been renamed to "
3665-
"`backend`. This will be removed in v0.10.0. Please use the "
3666-
"`backend` argument instead.")
3667-
def guided_decoding_backend(self) -> GuidedDecodingBackend:
3668-
return self.backend
3669-
3670-
@guided_decoding_backend.setter
3671-
def guided_decoding_backend(self, value: GuidedDecodingBackend):
3672-
self.backend = value
3673-
36743662
backend: GuidedDecodingBackend = "auto" if envs.VLLM_USE_V1 else "xgrammar"
36753663
"""Which engine will be used for guided decoding (JSON schema / regex etc)
36763664
by default. With "auto", we will make opinionated choices based on request
@@ -3713,9 +3701,6 @@ def compute_hash(self) -> str:
37133701
return hash_str
37143702

37153703
def __post_init__(self):
3716-
if ":" in self.backend:
3717-
self._extract_backend_options()
3718-
37193704
if envs.VLLM_USE_V1:
37203705
valid_guided_backends = get_args(GuidedDecodingBackendV1)
37213706
else:
@@ -3731,24 +3716,6 @@ def __post_init__(self):
37313716
raise ValueError("disable_additional_properties is only supported "
37323717
"for the guidance backend.")
37333718

3734-
@deprecated(
3735-
"Passing guided decoding backend options inside backend in the format "
3736-
"'backend:...' is deprecated. This will be removed in v0.10.0. Please "
3737-
"use the dedicated arguments '--disable-fallback', "
3738-
"'--disable-any-whitespace' and '--disable-additional-properties' "
3739-
"instead.")
3740-
def _extract_backend_options(self):
3741-
"""Extract backend options from the backend string."""
3742-
backend, options = self.backend.split(":")
3743-
self.backend = cast(GuidedDecodingBackend, backend)
3744-
options_set = set(options.strip().split(","))
3745-
if "no-fallback" in options_set:
3746-
self.disable_fallback = True
3747-
if "disable-any-whitespace" in options_set:
3748-
self.disable_any_whitespace = True
3749-
if "no-additional-properties" in options_set:
3750-
self.disable_additional_properties = True
3751-
37523719

37533720
DetailedTraceModules = Literal["model", "worker", "all"]
37543721

vllm/engine/arg_utils.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import json
1010
import sys
1111
import threading
12-
import warnings
1312
from dataclasses import MISSING, dataclass, fields, is_dataclass
1413
from itertools import permutations
1514
from typing import (TYPE_CHECKING, Annotated, Any, Callable, Dict, List,
@@ -434,7 +433,6 @@ class EngineArgs:
434433

435434
speculative_config: Optional[Dict[str, Any]] = None
436435

437-
qlora_adapter_name_or_path: Optional[str] = None
438436
show_hidden_metrics_for_version: Optional[str] = \
439437
ObservabilityConfig.show_hidden_metrics_for_version
440438
otlp_traces_endpoint: Optional[str] = \
@@ -468,7 +466,6 @@ class EngineArgs:
468466

469467
additional_config: dict[str, Any] = \
470468
get_field(VllmConfig, "additional_config")
471-
enable_reasoning: Optional[bool] = None # DEPRECATED
472469
reasoning_parser: str = DecodingConfig.reasoning_backend
473470

474471
use_tqdm_on_load: bool = LoadConfig.use_tqdm_on_load
@@ -486,13 +483,6 @@ def __post_init__(self):
486483
if isinstance(self.compilation_config, (int, dict)):
487484
self.compilation_config = CompilationConfig.from_cli(
488485
str(self.compilation_config))
489-
if self.qlora_adapter_name_or_path is not None:
490-
warnings.warn(
491-
"The `qlora_adapter_name_or_path` is deprecated "
492-
"and will be removed in v0.10.0. ",
493-
DeprecationWarning,
494-
stacklevel=2,
495-
)
496486
# Setup plugins
497487
from vllm.plugins import load_general_plugins
498488
load_general_plugins()
@@ -605,14 +595,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
605595
**load_kwargs["ignore_patterns"])
606596
load_group.add_argument("--use-tqdm-on-load",
607597
**load_kwargs["use_tqdm_on_load"])
608-
load_group.add_argument(
609-
"--qlora-adapter-name-or-path",
610-
type=str,
611-
default=None,
612-
help="The `--qlora-adapter-name-or-path` has no effect, do not set"
613-
" it, and it will be removed in v0.10.0.",
614-
deprecated=True,
615-
)
616598
load_group.add_argument('--pt-load-map-location',
617599
**load_kwargs["pt_load_map_location"])
618600

@@ -633,15 +615,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
633615
guided_decoding_group.add_argument(
634616
"--guided-decoding-disable-additional-properties",
635617
**guided_decoding_kwargs["disable_additional_properties"])
636-
guided_decoding_group.add_argument(
637-
"--enable-reasoning",
638-
action=argparse.BooleanOptionalAction,
639-
deprecated=True,
640-
help="[DEPRECATED] The `--enable-reasoning` flag is deprecated as "
641-
"of v0.9.0. Use `--reasoning-parser` to specify the reasoning "
642-
"parser backend instead. This flag (`--enable-reasoning`) will be "
643-
"removed in v0.10.0. When `--reasoning-parser` is specified, "
644-
"reasoning mode is automatically enabled.")
645618
guided_decoding_group.add_argument(
646619
"--reasoning-parser",
647620
# This choices is a special case because it's not static

vllm/entrypoints/openai/api_server.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,8 +1514,6 @@ async def init_app_state(
15141514
chat_template_content_format=args.chat_template_content_format,
15151515
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
15161516
enable_auto_tools=args.enable_auto_tool_choice,
1517-
expand_tools_even_if_tool_choice_none=args.
1518-
expand_tools_even_if_tool_choice_none,
15191517
tool_parser=args.tool_call_parser,
15201518
reasoning_parser=args.reasoning_parser,
15211519
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
@@ -1531,8 +1529,6 @@ async def init_app_state(
15311529
chat_template_content_format=args.chat_template_content_format,
15321530
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
15331531
enable_auto_tools=args.enable_auto_tool_choice,
1534-
expand_tools_even_if_tool_choice_none=args.
1535-
expand_tools_even_if_tool_choice_none,
15361532
tool_parser=args.tool_call_parser,
15371533
reasoning_parser=args.reasoning_parser,
15381534
enable_prompt_tokens_details=args.enable_prompt_tokens_details,

vllm/entrypoints/openai/cli_args.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -182,13 +182,6 @@ class FrontendArgs:
182182
"""If set to True, enable tracking server_load_metrics in the app state."""
183183
enable_force_include_usage: bool = False
184184
"""If set to True, including usage on every request."""
185-
expand_tools_even_if_tool_choice_none: bool = False
186-
"""Include tool definitions in prompts even when `tool_choice='none'`.
187-
188-
This is a transitional option that will be removed in v0.10.0. In
189-
v0.10.0, tool definitions will always be included regardless of
190-
`tool_choice` setting. Use this flag to test the upcoming behavior
191-
before the breaking change."""
192185

193186
@staticmethod
194187
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
@@ -225,11 +218,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
225218
valid_tool_parsers = list(ToolParserManager.tool_parsers.keys())
226219
frontend_kwargs["tool_call_parser"]["choices"] = valid_tool_parsers
227220

228-
# Special case for expand-tools-even-if-tool-choice-none because of
229-
# the deprecation field
230-
frontend_kwargs["expand_tools_even_if_tool_choice_none"]\
231-
["deprecated"] = True
232-
233221
frontend_group = parser.add_argument_group(
234222
title="Frontend",
235223
description=FrontendArgs.__doc__,

vllm/entrypoints/openai/serving_chat.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ def __init__(
6363
return_tokens_as_token_ids: bool = False,
6464
reasoning_parser: str = "",
6565
enable_auto_tools: bool = False,
66-
expand_tools_even_if_tool_choice_none: bool = False,
6766
tool_parser: Optional[str] = None,
6867
enable_prompt_tokens_details: bool = False,
6968
enable_force_include_usage: bool = False,
@@ -112,8 +111,6 @@ def __init__(
112111
raise TypeError("Error: --enable-auto-tool-choice requires "
113112
f"tool_parser:'{tool_parser}' which has not "
114113
"been registered") from e
115-
self.expand_tools_even_if_tool_choice_none = (
116-
expand_tools_even_if_tool_choice_none)
117114

118115
self.enable_prompt_tokens_details = enable_prompt_tokens_details
119116
self.enable_force_include_usage = enable_force_include_usage
@@ -182,20 +179,6 @@ async def create_chat_completion(
182179

183180
if request.tools is None:
184181
tool_dicts = None
185-
elif (request.tool_choice == "none"
186-
and not self.expand_tools_even_if_tool_choice_none):
187-
if len(request.tools) > 0:
188-
logger.warning_once(
189-
"Tools are specified but tool_choice is set to 'none' "
190-
"and --expand-tools-even-if-tool-choice-none is not "
191-
"enabled. Tool definitions will be excluded from the "
192-
"prompt. This behavior will change in vLLM v0.10 where "
193-
"tool definitions will be included by default even "
194-
"with tool_choice='none'. To adopt the new behavior "
195-
"now, use --expand-tools-even-if-tool-choice-none. "
196-
"To suppress this warning, either remove tools from "
197-
"the request or set tool_choice to a different value.")
198-
tool_dicts = None
199182
else:
200183
tool_dicts = [tool.model_dump() for tool in request.tools]
201184

vllm/entrypoints/openai/serving_responses.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ def __init__(
5151
return_tokens_as_token_ids: bool = False,
5252
reasoning_parser: str = "",
5353
enable_auto_tools: bool = False,
54-
expand_tools_even_if_tool_choice_none: bool = False,
5554
tool_parser: Optional[str] = None,
5655
enable_prompt_tokens_details: bool = False,
5756
enable_force_include_usage: bool = False,

vllm/sampling_params.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import msgspec
1111
from pydantic import BaseModel
12-
from typing_extensions import deprecated
1312

1413
from vllm.logger import init_logger
1514
from vllm.logits_process import LogitsProcessor
@@ -84,27 +83,6 @@ def __post_init__(self):
8483
"You can only use one kind of guided decoding but multiple are "
8584
f"specified: {self.__dict__}")
8685

87-
if self.backend is not None and ":" in self.backend:
88-
self._extract_backend_options()
89-
90-
@deprecated(
91-
"Passing guided decoding backend options inside backend in the format "
92-
"'backend:...' is deprecated. This will be removed in v0.10.0. Please "
93-
"use the dedicated arguments '--disable-fallback', "
94-
"'--disable-any-whitespace' and '--disable-additional-properties' "
95-
"instead.")
96-
def _extract_backend_options(self):
97-
"""Extract backend options from the backend string."""
98-
assert isinstance(self.backend, str)
99-
self.backend, options = self.backend.split(":")
100-
options_set = set(options.strip().split(","))
101-
if "no-fallback" in options_set:
102-
self.disable_fallback = True
103-
if "disable-any-whitespace" in options_set:
104-
self.disable_any_whitespace = True
105-
if "no-additional-properties" in options_set:
106-
self.disable_additional_properties = True
107-
10886

10987
class RequestOutputKind(Enum):
11088
# Return entire output so far in every RequestOutput

0 commit comments

Comments
 (0)