We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 1c450a5 + 91a5600 commit d5e35a9Copy full SHA for d5e35a9
vllm/model_executor/models/llama.py
@@ -203,6 +203,7 @@ def __init__(
203
quant_config, Fp8Config) or (isinstance(quant_config, QuarkConfig)
204
and quant_config.is_fp8_w8a8())
205
self.attn_fp8_out = (envs.VLLM_USE_ROCM_CUSTOM_PAGED_ATTN_FP8_OUT
206
+ and envs.VLLM_USE_TRITON_FLASH_ATTN
207
and current_platform.is_fp8_fnuz() and use_fp8)
208
if envs.VLLM_USE_V1 and not envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION:
209
self.attn_fp8_out = False
0 commit comments