fix

LucasWilkinson · LucasWilkinson · commit 5d65c327e509 · 2025-07-18T14:14:26.000-04:00
Signed-off-by: Lucas Wilkinson &lt;lwilkins@redhat.com&gt;
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -707,7 +707,7 @@ def _prepare_inputs(
 
             # Fill unused with -1. Needed for reshape_and_cache in full cuda
             # graph mode.
-            slot_mapping[total_num_scheduled_tokens:].fill_(-1)
+            blk_table.slot_mapping[total_num_scheduled_tokens:].fill_(-1)
 
             common_attn_metadata = CommonAttentionMetadata(
                 query_start_loc=self.query_start_loc[:num_reqs + 1],