Skip to content

Commit 373bb61

Browse files
author
Your Name
committed
fix
1 parent 9ffe4e0 commit 373bb61

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

vllm/beam/filtering.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ def __init__(self, classi_idx, classifier_names):
3737
async def get_n_valid_beams(self, create_completion: Callable,
3838
request: CompletionRequest,
3939
chunk_num: int,
40-
raw_request: Optional[Request] = None) -> list[
41-
Union[AsyncGenerator[str, None], CompletionResponseChoice, ErrorResponse]]:
40+
raw_request: Optional[Request] = None) -> CompletionResponse | ErrorResponse:
4241
request.stream = False
4342
original_n = request.n
4443
request.n = request.n if request.n > 1 else _DEFAULT_BEAM_SIZE
@@ -70,10 +69,12 @@ async def get_n_valid_beams(self, create_completion: Callable,
7069

7170
filtered_res = [r for r, valid in zip(res, beam_validator_res) if valid]
7271
logger.debug("Filtered count: %d", len(filtered_res))
72+
73+
raw_res.choices = filtered_res
7374
if len(filtered_res) == 0:
74-
return res
75+
return raw_res
7576

76-
return filtered_res
77+
return raw_res
7778

7879
def validate(self, responses: list[CompletionResponseChoice | ErrorResponse],
7980
debug_infos_G: list[BeamDebugInfo] = None):

vllm/entrypoints/openai/serving_completion.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,14 +131,15 @@ async def _chunk_generator():
131131
yield f"data: {beams.model_dump_json()}\n\n"
132132
break
133133

134-
final = await self.beam_scorer.pick_best_beam(beams)
134+
final = await self.beam_scorer.pick_best_beam(beams.choices)
135135
request.prompt = final.text
136136
should_stop = await _should_stop(final)
137137
final.text = final.text[input_str_len:]
138138
output = final.text
139+
beams.choices = [final]
139140
if self.request_logger:
140141
logger.info(f"yielding chunk {num_chunks} text: {final.text}")
141-
yield f"data: {final.model_dump_json()}\n\n"
142+
yield f"data: {beams.model_dump_json()}\n\n"
142143

143144
if should_stop:
144145
break

0 commit comments

Comments
 (0)