We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 466166d commit b2f069eCopy full SHA for b2f069e
vllm/model_executor/layers/sampler.py
@@ -1197,7 +1197,7 @@ def _get_next_prompt_tokens(
1197
prompt_tokens = seq_data.prompt_token_ids
1198
# +1 because we are looking for a next prompt token.
1199
next_token_index_start = computed_len + 1
1200
- next_token_index_end = min(computed_len + query_len + 1,
+ next_token_index_end = min(computed_len + query_len,
1201
len(prompt_tokens))
1202
next_prompt_tokens = prompt_tokens[
1203
next_token_index_start:next_token_index_end]
0 commit comments