Skip to content

Commit 5d65c32

Browse files
fix
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
1 parent e8418ab commit 5d65c32

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ def _prepare_inputs(
707707

708708
# Fill unused with -1. Needed for reshape_and_cache in full cuda
709709
# graph mode.
710-
slot_mapping[total_num_scheduled_tokens:].fill_(-1)
710+
blk_table.slot_mapping[total_num_scheduled_tokens:].fill_(-1)
711711

712712
common_attn_metadata = CommonAttentionMetadata(
713713
query_start_loc=self.query_start_loc[:num_reqs + 1],

0 commit comments

Comments
 (0)