Update tests/v1/worker/test_gpu_model_runner.py

tdoublep · gemini-code-assist[bot] · web-flow · commit eaba4fc067be · 2025-07-21T12:49:24.000+02:00
Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py
@@ -825,7 +825,12 @@ def test_hybrid_attention_mamba_tensor_shapes(monkeypatch):
             vllm_ctx[layer].kv_cache[0][1][
                 blocks1, :] = ssm_blocks_constant.detach().clone()
 
-        # verify attention contents is unchanged
+        # verify attention and mamba contents are correct
         for layer in [layer_0, layer_1]:
             assert torch.equal(vllm_ctx[layer].kv_cache[0][blocks0, :],
                                attn_blocks_constant)
+        for layer in [layer_2, layer_3, layer_4, layer_5]:
+            assert torch.equal(vllm_ctx[layer].kv_cache[0][0][blocks1, :],
+                               conv_blocks_constant)
+            assert torch.equal(vllm_ctx[layer].kv_cache[0][1][blocks1, :],
+                               ssm_blocks_constant)