We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 466166d commit 433f3d8Copy full SHA for 433f3d8
vllm/model_executor/layers/sampler.py
@@ -1197,8 +1197,7 @@ def _get_next_prompt_tokens(
1197
prompt_tokens = seq_data.prompt_token_ids
1198
# +1 because we are looking for a next prompt token.
1199
next_token_index_start = computed_len + 1
1200
- next_token_index_end = min(computed_len + query_len + 1,
1201
- len(prompt_tokens))
+ next_token_index_end = min(computed_len + query_len, len(prompt_tokens))
1202
next_prompt_tokens = prompt_tokens[
1203
next_token_index_start:next_token_index_end]
1204
return next_prompt_tokens
0 commit comments