We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4ac9c33 commit eb58f59Copy full SHA for eb58f59
tests/v1/tpu/test_pallas.py
@@ -50,6 +50,7 @@ class FakeAttentionLayer:
50
slot_mapping = torch.zeros((3, num_tokens), dtype=torch.int64)
51
max_num_reqs = 8
52
max_num_blocks_per_req = 8
53
+ num_kv_update_slices = torch.tensor([num_tokens], dtype=torch.int32)
54
block_tables = torch.zeros((max_num_reqs, max_num_blocks_per_req),
55
dtype=torch.int32)
56
context_lens = torch.ones((max_num_reqs, ), dtype=torch.int32)
@@ -65,6 +66,7 @@ class FakeAttentionLayer:
65
66
context_lens=context_lens,
67
query_start_loc=query_start_loc,
68
num_seqs=num_seqs,
69
+ num_kv_update_slices=num_kv_update_slices,
70
num_slices_per_kv_cache_update_block=8,
71
)
72
0 commit comments