Skip to content

Commit c89f9ca

Browse files
committed
include all state updates
Signed-off-by: Leo Tian <leo.tian@centml.ai>
1 parent 61c0b12 commit c89f9ca

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

vllm/v1/spec_decode/eagle.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,6 @@ def propose(
171171
hidden_states, attn_metadata,
172172
batch_size)
173173

174-
# Increment the sequence lengths.
175-
attn_metadata.max_seq_len += 1
176-
# Consider max model length.
177-
attn_metadata.max_seq_len = min(attn_metadata.max_seq_len,
178-
self.max_model_len)
179-
180174
# copy inputs to buffer for cudagraph
181175
# Run the model.
182176
with set_forward_context(attn_metadata,
@@ -231,6 +225,12 @@ def advance_speculative_state(self, draft_token_ids: torch.Tensor,
231225
BLOCK_SIZE=1024,
232226
PADDING_SLOT_ID=PADDING_SLOT_ID)
233227

228+
# Increment the sequence lengths.
229+
attn_metadata.max_seq_len += 1
230+
# Consider max model length.
231+
attn_metadata.max_seq_len = min(attn_metadata.max_seq_len,
232+
self.max_model_len)
233+
234234
@staticmethod
235235
def prepare_inputs(
236236
# [batch_size + 1]

0 commit comments

Comments
 (0)