Skip to content

Commit fb7bed1

Browse files
author
Your Name
committed
fixes
1 parent 722e099 commit fb7bed1

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

vllm/beam/beam.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ async def pick_best_beam(self, responses: list[
3232
ranking_scores = self.ranking_computer.compute(
3333
heads_tensor, debug_info
3434
)
35-
scores *= ranking_scores
35+
scores += ranking_scores
3636

3737
for i in range(len(responses)):
3838
debug_info[i].final_score = scores[i]

vllm/entrypoints/openai/serving_completion.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

44
import asyncio
5+
import math
56
import time
67
from collections.abc import AsyncGenerator, AsyncIterator
78
from collections.abc import Sequence as GenericSequence
@@ -13,7 +14,7 @@
1314
from typing_extensions import assert_never
1415

1516
from vllm.beam.beam import BeamScorer
16-
from vllm.beam.filtering import BeamValidator
17+
from vllm.beam.filtering import _CHUNK_SIZE, BeamValidator
1718
from vllm.beam.metrics import report_metrics
1819
from vllm.beam.penalty import MEOW_CLASSI_IDX, PenaltyComputer
1920
from vllm.config import ModelConfig
@@ -102,13 +103,14 @@ async def _process_prefix(request: CompletionRequest):
102103
async def _should_stop(final):
103104
return final.choices[0].finish_reason == "stop" or final.choices[0].is_filtered
104105

106+
max_chunks = math.ceil(request.max_tokens / _CHUNK_SIZE)
105107
async def _chunk_generator():
106108
num_chunks = 0
107109
should_stop = False
108110
output = None
109111

110112
# TODO(@tanuj): calc created tokens
111-
while num_chunks < 4 and not should_stop:
113+
while num_chunks < max_chunks and not should_stop:
112114
num_chunks += 1
113115
beams = await self.beam_validator.get_n_valid_beams(create_completion=self.create_completion, request=request, raw_request=raw_request)
114116
final = await self.beam_scorer.pick_best_beam(beams)

0 commit comments

Comments
 (0)