Skip to content

Commit d18c40a

Browse files
author
Your Name
committed
serving completion
1 parent f0c43e7 commit d18c40a

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

vllm/entrypoints/openai/serving_completion.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,26 @@ async def create_completion_with_chunkwise_beam(
8484
"""
8585
Chunkwise beam search hack
8686
"""
87+
8788
async def _process_prefix(request: CompletionRequest):
8889
og_max_tokens = request.max_tokens
8990
og_n = request.n
90-
request.max_tokens = 1
91+
request.max_tokens = 0
9192
request.n = 1
93+
request.echo = True
94+
request.stream = False
9295
res = await self.create_completion(
9396
request,
9497
raw_request=raw_request,
9598
)
9699
request.max_tokens = og_max_tokens
97100
request.n = og_n
101+
request.echo = False
102+
request.stream = True
98103
return res
99104

100105
res = await _process_prefix(request)
101-
input_str_len = len(request.prompt)
106+
input_str_len = len(res.choices[0].text)
102107

103108
async def _should_stop(final):
104109
return final.choices[0].finish_reason == "stop" or final.choices[0].is_filtered
@@ -118,6 +123,8 @@ async def _chunk_generator():
118123
should_stop = await _should_stop(final)
119124
final.choices[0].text = final.choices[0].text[input_str_len:]
120125
output = final.choices[0].text
126+
if self.request_logger:
127+
logger.info(f"yielding chunk {num_chunks} text: {final.choices[0].text}")
121128
yield f"data: {final.model_dump_json()}\n\n"
122129

123130
if should_stop:

0 commit comments

Comments
 (0)