Caching the env variable in the __init__

gshtras · gshtras · commit 9b131ae56b0f · 2025-05-19T17:42:06.000Z
Signed-off-by: Gregory Shtrasberg &lt;Gregory.Shtrasberg@amd.com&gt;
diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py
@@ -127,6 +127,7 @@ def __init__(
                                       "TritonAttentionImpl")
 
         self.fp8_dtype = current_platform.fp8_dtype()
+        self.use_prefill_decode_attn = envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION
 
     def forward(
         self,
@@ -168,7 +169,7 @@ def forward(
         # performance to make sure it does not introduce any overhead.
 
         num_queries_per_kv = query.shape[1] // key.shape[1]
-        use_prefill_decode_attn = envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION or (
+        use_prefill_decode_attn = self.use_prefill_decode_attn or (
             (num_queries_per_kv & (num_queries_per_kv - 1)) != 0)
 
         num_actual_tokens = attn_metadata.num_actual_tokens