File tree Expand file tree Collapse file tree 2 files changed +5
-3
lines changed Expand file tree Collapse file tree 2 files changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -32,7 +32,7 @@ async def pick_best_beam(self, responses: list[
32
32
ranking_scores = self .ranking_computer .compute (
33
33
heads_tensor , debug_info
34
34
)
35
- scores * = ranking_scores
35
+ scores + = ranking_scores
36
36
37
37
for i in range (len (responses )):
38
38
debug_info [i ].final_score = scores [i ]
Original file line number Diff line number Diff line change 1
1
# SPDX-License-Identifier: Apache-2.0
2
2
3
3
import asyncio
4
+ import math
4
5
import time
5
6
from collections .abc import AsyncGenerator , AsyncIterator
6
7
from collections .abc import Sequence as GenericSequence
11
12
from fastapi import Request
12
13
13
14
from vllm .beam .beam import BeamScorer
14
- from vllm .beam .filtering import BeamValidator
15
+ from vllm .beam .filtering import _CHUNK_SIZE , BeamValidator
15
16
from vllm .beam .metrics import report_metrics
16
17
from vllm .beam .penalty import MEOW_CLASSI_IDX , PenaltyComputer
17
18
from vllm .config import ModelConfig
@@ -97,13 +98,14 @@ async def _process_prefix(request: CompletionRequest):
97
98
async def _should_stop (final ):
98
99
return final .choices [0 ].finish_reason == "stop" or final .choices [0 ].is_filtered
99
100
101
+ max_chunks = math .ceil (request .max_tokens / _CHUNK_SIZE )
100
102
async def _chunk_generator ():
101
103
num_chunks = 0
102
104
should_stop = False
103
105
output = None
104
106
105
107
# TODO(@tanuj): calc created tokens
106
- while num_chunks < 4 and not should_stop :
108
+ while num_chunks < max_chunks and not should_stop :
107
109
num_chunks += 1
108
110
beams = await self .beam_validator .get_n_valid_beams (create_completion = self .create_completion , request = request , raw_request = raw_request )
109
111
final = await self .beam_scorer .pick_best_beam (beams )
You can’t perform that action at this time.
0 commit comments