revert: remove dataclass initialization and update warning messages

aarnphm · njhill · aarnphm · commit 45f68a8e9095 · 2025-07-02T07:26:03.000-04:00
Co-authored-by: Nick Hill &lt;nhill@redhat.com&gt;
Signed-off-by: Aaron Pham &lt;contact@aarnphm.xyz&gt;
diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py
@@ -154,7 +154,7 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
 
         if self.model_config.skip_tokenizer_init and self.decoding_config:
             raise ValueError(
-                "'skip_tokenizer_init' is specified during engine startup. This implies that the model doesn't contain sufficient files to setup tokenizers, which structured outputs requires tokenizers to work. Specifying structured outputs parameters will not be supported in conjunction with 'skip_tokenizer_init'."  # noqa: E501
+                "Structured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'"  # noqa: E501
             )
 
         engine_level_backend = self.decoding_config.backend
diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py
@@ -6,9 +6,6 @@
 from concurrent.futures import ThreadPoolExecutor
 from typing import TYPE_CHECKING, Optional
 
-from pydantic import ConfigDict, Field
-from pydantic.dataclasses import dataclass
-
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.reasoning import ReasoningParserManager
@@ -32,39 +29,23 @@
 logger = init_logger(__name__)
 
 
-@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
 class StructuredOutputManager:
     """Engine-level manager for structured output requests."""
-    vllm_config: VllmConfig
-
-    backend: Optional[StructuredOutputBackend] = Field(
-        default=None,
-        init=False,
-        repr=False,
-    )
-    reasoner: Optional[ReasoningParser] = Field(
-        default=None,
-        init=False,
-        repr=False,
-    )
-    _grammar_bitmask: Optional[torch.Tensor] = Field(
-        default=None,
-        init=False,
-        repr=False,
-    )
-    _full_mask: torch.Tensor = Field(
-        default_factory=lambda: torch.tensor(-1, dtype=torch.int32),
-        init=False,
-        repr=False,
-    )
-
-    def __post_init__(self):
+
+    def __init__(self, vllm_config: VllmConfig):
+        self.backend: Optional[StructuredOutputBackend] = None
+        self.reasoner: Optional[ReasoningParser] = None
+        self.vllm_config = vllm_config
+
+        self._grammar_bitmask: Optional[torch.Tensor] = None
+        self._full_mask = torch.tensor(-1, dtype=torch.int32)
+
         if not self.vllm_config.model_config.skip_tokenizer_init:
-            # The default max_workers if not specified is the number
-            # of CPUs * 5, which is way too high since these tasks are
-            # CPU-bound, not I/O bound. We also know we would never dominate
-            # CPU usage with just grammar compilation, so we set it to half
-            # the number of CPUs.
+            # The default max_workers if not specified is the number of
+            # CPUs * 5, which is way too high since these tasks are CPU-bound,
+            # not I/O bound. We also know we would never dominate CPU usage
+            # with just grammar compilation, so we set it to half the number
+            # of CPUs.
             max_workers = max(1, (multiprocessing.cpu_count() + 1) // 2)
             self.executor = ThreadPoolExecutor(max_workers=max_workers)
             self.tokenizer = init_tokenizer_from_configs(
@@ -73,7 +54,7 @@ def __post_init__(self):
                 lora_config=self.vllm_config.lora_config,
             ).get_lora_tokenizer(None)
             reasoning_backend = \
-                self.vllm_config.decoding_config.reasoning_backend
+                    self.vllm_config.decoding_config.reasoning_backend
             if reasoning_backend:
                 reasoner_cls = ReasoningParserManager.get_reasoning_parser(
                     reasoning_backend)

Original file line number	Diff line number	Diff line change
`@@ -154,7 +154,7 @@ def _validate_structured_output(self, params: SamplingParams) -> None:`
`154`	`154`
`155`	`155`	`if self.model_config.skip_tokenizer_init and self.decoding_config:`
`156`	`156`	`raise ValueError(`
`157`		`- "'skip_tokenizer_init' is specified during engine startup. This implies that the model doesn't contain sufficient files to setup tokenizers, which structured outputs requires tokenizers to work. Specifying structured outputs parameters will not be supported in conjunction with 'skip_tokenizer_init'." # noqa: E501`
	`157`	`+ "Structured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'" # noqa: E501`
`158`	`158`	`)`
`159`	`159`
`160`	`160`	`engine_level_backend = self.decoding_config.backend`