File tree Expand file tree Collapse file tree 2 files changed +5
-3
lines changed Expand file tree Collapse file tree 2 files changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -32,7 +32,7 @@ async def pick_best_beam(self, responses: list[
32
32
ranking_scores = self .ranking_computer .compute (
33
33
heads_tensor , debug_info
34
34
)
35
- scores * = ranking_scores
35
+ scores + = ranking_scores
36
36
37
37
for i in range (len (responses )):
38
38
debug_info [i ].final_score = scores [i ]
Original file line number Diff line number Diff line change 2
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
3
4
4
import asyncio
5
+ import math
5
6
import time
6
7
from collections .abc import AsyncGenerator , AsyncIterator
7
8
from collections .abc import Sequence as GenericSequence
13
14
from typing_extensions import assert_never
14
15
15
16
from vllm .beam .beam import BeamScorer
16
- from vllm .beam .filtering import BeamValidator
17
+ from vllm .beam .filtering import _CHUNK_SIZE , BeamValidator
17
18
from vllm .beam .metrics import report_metrics
18
19
from vllm .beam .penalty import MEOW_CLASSI_IDX , PenaltyComputer
19
20
from vllm .config import ModelConfig
@@ -102,13 +103,14 @@ async def _process_prefix(request: CompletionRequest):
102
103
async def _should_stop (final ):
103
104
return final .choices [0 ].finish_reason == "stop" or final .choices [0 ].is_filtered
104
105
106
+ max_chunks = math .ceil (request .max_tokens / _CHUNK_SIZE )
105
107
async def _chunk_generator ():
106
108
num_chunks = 0
107
109
should_stop = False
108
110
output = None
109
111
110
112
# TODO(@tanuj): calc created tokens
111
- while num_chunks < 4 and not should_stop :
113
+ while num_chunks < max_chunks and not should_stop :
112
114
num_chunks += 1
113
115
beams = await self .beam_validator .get_n_valid_beams (create_completion = self .create_completion , request = request , raw_request = raw_request )
114
116
final = await self .beam_scorer .pick_best_beam (beams )
You can’t perform that action at this time.
0 commit comments