get triton tests to pass

LucasWilkinson · LucasWilkinson · commit 69d54e297de6 · 2025-07-09T21:58:49.000Z
Signed-off-by: Lucas Wilkinson &lt;lwilkins@redhat.com&gt;
diff --git a/tests/v1/attention/test_attention_backends.py b/tests/v1/attention/test_attention_backends.py
@@ -187,10 +187,10 @@ def create_and_prepopulate_kv_cache(
 class MockAttentionLayer:
     """A mock attention layer for testing."""
 
-    def __init__(self):
-        self._q_scale = torch.tensor(1.0)
-        self._k_scale = torch.tensor(1.0)
-        self._v_scale = torch.tensor(1.0)
+    def __init__(self, device: torch.device):
+        self._q_scale = torch.tensor(1.0, device=device)
+        self._k_scale = torch.tensor(1.0, device=device)
+        self._v_scale = torch.tensor(1.0, device=device)
         # Add float versions for flashinfer
         self._k_scale_float = 1.0
         self._v_scale_float = 1.0
@@ -258,7 +258,7 @@ def mock_get_per_layer_parameters(vllm_config):
     )
 
     # Create mock layer and output buffer
-    mock_layer = MockAttentionLayer()
+    mock_layer = MockAttentionLayer(device)
     output = torch.empty_like(query)
 
     # Run forward pass
diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py
@@ -114,7 +114,7 @@ def get_attention_backend(backend_name: _Backend):
         _Backend.FLEX_ATTENTION:
         "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend",
         _Backend.TRITON_ATTN_VLLM_V1:
-        "vllm.v1.attention.backends.triton_attn.TritonAttnBackend",
+        "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend",
     }
 
     if backend_name not in backend_map:

Original file line number	Diff line number	Diff line change
`@@ -114,7 +114,7 @@ def get_attention_backend(backend_name: _Backend):`
`114`	`114`	`_Backend.FLEX_ATTENTION:`
`115`	`115`	`"vllm.v1.attention.backends.flex_attention.FlexAttentionBackend",`
`116`	`116`	`_Backend.TRITON_ATTN_VLLM_V1:`
`117`		`- "vllm.v1.attention.backends.triton_attn.TritonAttnBackend",`
	`117`	`+ "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend",`
`118`	`118`	`}`
`119`	`119`
`120`	`120`	`if backend_name not in backend_map:`