We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a31e5d8 commit 91a5600Copy full SHA for 91a5600
vllm/model_executor/models/llama.py
@@ -214,6 +214,7 @@ def __init__(
214
quant_config, Fp8Config) or (isinstance(quant_config, QuarkConfig)
215
and quant_config.is_fp8_w8a8())
216
self.attn_fp8_out = (envs.VLLM_USE_ROCM_CUSTOM_PAGED_ATTN_FP8_OUT
217
+ and envs.VLLM_USE_TRITON_FLASH_ATTN
218
and current_platform.is_fp8_fnuz() and use_fp8)
219
if envs.VLLM_USE_V1 and not envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION:
220
self.attn_fp8_out = False
0 commit comments