include all state updates

leo-cf-tian · leo-cf-tian · commit c89f9ca2fe30 · 2025-05-15T19:47:45.000Z
Signed-off-by: Leo Tian &lt;leo.tian@centml.ai&gt;
diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
@@ -171,12 +171,6 @@ def propose(
                                            hidden_states, attn_metadata,
                                            batch_size)
 
-            # Increment the sequence lengths.
-            attn_metadata.max_seq_len += 1
-            # Consider max model length.
-            attn_metadata.max_seq_len = min(attn_metadata.max_seq_len,
-                                            self.max_model_len)
-
             # copy inputs to buffer for cudagraph
             # Run the model.
             with set_forward_context(attn_metadata,
@@ -231,6 +225,12 @@ def advance_speculative_state(self, draft_token_ids: torch.Tensor,
             BLOCK_SIZE=1024,
             PADDING_SLOT_ID=PADDING_SLOT_ID)
 
+        # Increment the sequence lengths.
+        attn_metadata.max_seq_len += 1
+        # Consider max model length.
+        attn_metadata.max_seq_len = min(attn_metadata.max_seq_len,
+                                        self.max_model_len)
+
     @staticmethod
     def prepare_inputs(
         # [batch_size + 1]