fix token selection indexing

leo-cf-tian · leo-cf-tian · commit 96e95c7713b3 · 2025-06-04T17:37:44.000Z
Signed-off-by: Leo Tian &lt;leo.tian@centml.ai&gt;
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -1438,8 +1438,9 @@ def execute_model(
 
             # Fill with -1 first (or PLACEHOLDER_ID)
             # tokens selected for every row (valid or not)
-            selected_tokens = valid_sampled_token_ids_gpu[:batch,
-                                                          last_valid_indices]
+            selected_tokens = torch.gather(
+                valid_sampled_token_ids_gpu, 1,
+                last_valid_indices.unsqueeze(1)).squeeze(1)
 
             next_token_ids_gpu = torch.where(
                 last_valid_indices != -1, selected_tokens,