fix flash attention benchmark failures

y-sq · facebook-github-bot · commit d835d3dd3ded · 2025-05-09T15:46:11.000-07:00
Reviewed By: xuzhao9

Differential Revision: D74487012

fbshipit-source-id: 4e5114f56b5115959a50b56b212a142adeef6b5d
diff --git a/tritonbench/operators/flash_attention/operator.py b/tritonbench/operators/flash_attention/operator.py
@@ -545,7 +545,11 @@ def get_ctx_vals():
             shapes = ctx_vals
         requires_grad = True
         for shape in shapes:
-            BATCH, H, N_CTX, N_CTX_KV, D_HEAD = shape
+            if len(shape) == 5:
+                BATCH, H, N_CTX, N_CTX_KV, D_HEAD = shape
+            else:
+                BATCH, H, N_CTX, D_HEAD = shape
+                N_CTX_KV = N_CTX
             q = torch.randn(
                 (BATCH, H, N_CTX, D_HEAD),
                 dtype=self.dtype,