[Misc] Do not print async output warning for v1 (#21151)

WoosukKwon · web-flow · commit 54cf1cae6222 · 2025-07-17T21:57:02.000-07:00
Signed-off-by: Woosuk Kwon &lt;woosuk.kwon@berkeley.edu&gt;
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
@@ -99,7 +99,7 @@ def get_device_total_memory(cls, device_id: int = 0) -> int:
 
     @classmethod
     def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
-        if enforce_eager:
+        if enforce_eager and not envs.VLLM_USE_V1:
             logger.warning(
                 "To see benefits of async output processing, enable CUDA "
                 "graph. Since, enforce-eager is enabled, async output "
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -299,7 +299,7 @@ def get_device_total_memory(cls, device_id: int = 0) -> int:
 
     @classmethod
     def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
-        if enforce_eager:
+        if enforce_eager and not envs.VLLM_USE_V1:
             logger.warning(
                 "To see benefits of async output processing, enable CUDA "
                 "graph. Since, enforce-eager is enabled, async output "