Merge remote-tracking branch 'origin/main' into upstream_merge_2025_05_21

gshtras · gshtras · commit d5e35a9aac31 · 2025-05-27T16:29:48.000Z
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
@@ -203,6 +203,7 @@ def __init__(
             quant_config, Fp8Config) or (isinstance(quant_config, QuarkConfig)
                                          and quant_config.is_fp8_w8a8())
         self.attn_fp8_out = (envs.VLLM_USE_ROCM_CUSTOM_PAGED_ATTN_FP8_OUT
+                             and envs.VLLM_USE_TRITON_FLASH_ATTN
                              and current_platform.is_fp8_fnuz() and use_fp8)
         if envs.VLLM_USE_V1 and not envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION:
             self.attn_fp8_out = False