[TPU][Bugfix] fix test_pallas (#20666)

yaochengji · web-flow · commit eb58f5953de8 · 2025-07-09T09:32:48.000-07:00
Signed-off-by: Chengji Yao &lt;chengjiyao@google.com&gt;
diff --git a/tests/v1/tpu/test_pallas.py b/tests/v1/tpu/test_pallas.py
@@ -50,6 +50,7 @@ class FakeAttentionLayer:
     slot_mapping = torch.zeros((3, num_tokens), dtype=torch.int64)
     max_num_reqs = 8
     max_num_blocks_per_req = 8
+    num_kv_update_slices = torch.tensor([num_tokens], dtype=torch.int32)
     block_tables = torch.zeros((max_num_reqs, max_num_blocks_per_req),
                                dtype=torch.int32)
     context_lens = torch.ones((max_num_reqs, ), dtype=torch.int32)
@@ -65,6 +66,7 @@ class FakeAttentionLayer:
         context_lens=context_lens,
         query_start_loc=query_start_loc,
         num_seqs=num_seqs,
+        num_kv_update_slices=num_kv_update_slices,
         num_slices_per_kv_cache_update_block=8,
     )