Skip to content

Commit 45f68a8

Browse files
aarnphmnjhill
andcommitted
revert: remove dataclass initialization and update warning messages
Co-authored-by: Nick Hill <nhill@redhat.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
1 parent 8a99739 commit 45f68a8

File tree

2 files changed

+16
-35
lines changed

2 files changed

+16
-35
lines changed

vllm/v1/engine/processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
154154

155155
if self.model_config.skip_tokenizer_init and self.decoding_config:
156156
raise ValueError(
157-
"'skip_tokenizer_init' is specified during engine startup. This implies that the model doesn't contain sufficient files to setup tokenizers, which structured outputs requires tokenizers to work. Specifying structured outputs parameters will not be supported in conjunction with 'skip_tokenizer_init'." # noqa: E501
157+
"Structured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'" # noqa: E501
158158
)
159159

160160
engine_level_backend = self.decoding_config.backend

vllm/v1/structured_output/__init__.py

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
from concurrent.futures import ThreadPoolExecutor
77
from typing import TYPE_CHECKING, Optional
88

9-
from pydantic import ConfigDict, Field
10-
from pydantic.dataclasses import dataclass
11-
129
from vllm.config import VllmConfig
1310
from vllm.logger import init_logger
1411
from vllm.reasoning import ReasoningParserManager
@@ -32,39 +29,23 @@
3229
logger = init_logger(__name__)
3330

3431

35-
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
3632
class StructuredOutputManager:
3733
"""Engine-level manager for structured output requests."""
38-
vllm_config: VllmConfig
39-
40-
backend: Optional[StructuredOutputBackend] = Field(
41-
default=None,
42-
init=False,
43-
repr=False,
44-
)
45-
reasoner: Optional[ReasoningParser] = Field(
46-
default=None,
47-
init=False,
48-
repr=False,
49-
)
50-
_grammar_bitmask: Optional[torch.Tensor] = Field(
51-
default=None,
52-
init=False,
53-
repr=False,
54-
)
55-
_full_mask: torch.Tensor = Field(
56-
default_factory=lambda: torch.tensor(-1, dtype=torch.int32),
57-
init=False,
58-
repr=False,
59-
)
60-
61-
def __post_init__(self):
34+
35+
def __init__(self, vllm_config: VllmConfig):
36+
self.backend: Optional[StructuredOutputBackend] = None
37+
self.reasoner: Optional[ReasoningParser] = None
38+
self.vllm_config = vllm_config
39+
40+
self._grammar_bitmask: Optional[torch.Tensor] = None
41+
self._full_mask = torch.tensor(-1, dtype=torch.int32)
42+
6243
if not self.vllm_config.model_config.skip_tokenizer_init:
63-
# The default max_workers if not specified is the number
64-
# of CPUs * 5, which is way too high since these tasks are
65-
# CPU-bound, not I/O bound. We also know we would never dominate
66-
# CPU usage with just grammar compilation, so we set it to half
67-
# the number of CPUs.
44+
# The default max_workers if not specified is the number of
45+
# CPUs * 5, which is way too high since these tasks are CPU-bound,
46+
# not I/O bound. We also know we would never dominate CPU usage
47+
# with just grammar compilation, so we set it to half the number
48+
# of CPUs.
6849
max_workers = max(1, (multiprocessing.cpu_count() + 1) // 2)
6950
self.executor = ThreadPoolExecutor(max_workers=max_workers)
7051
self.tokenizer = init_tokenizer_from_configs(
@@ -73,7 +54,7 @@ def __post_init__(self):
7354
lora_config=self.vllm_config.lora_config,
7455
).get_lora_tokenizer(None)
7556
reasoning_backend = \
76-
self.vllm_config.decoding_config.reasoning_backend
57+
self.vllm_config.decoding_config.reasoning_backend
7758
if reasoning_backend:
7859
reasoner_cls = ReasoningParserManager.get_reasoning_parser(
7960
reasoning_backend)

0 commit comments

Comments
 (0)