Skip to content

Commit 647d71f

Browse files
author
Your Name
committed
serving completion
1 parent 409c13b commit 647d71f

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

vllm/entrypoints/openai/serving_completion.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,21 +87,26 @@ async def create_completion_with_chunkwise_beam(
8787
"""
8888
Chunkwise beam search hack
8989
"""
90+
9091
async def _process_prefix(request: CompletionRequest):
9192
og_max_tokens = request.max_tokens
9293
og_n = request.n
93-
request.max_tokens = 1
94+
request.max_tokens = 0
9495
request.n = 1
96+
request.echo = True
97+
request.stream = False
9598
res = await self.create_completion(
9699
request,
97100
raw_request=raw_request,
98101
)
99102
request.max_tokens = og_max_tokens
100103
request.n = og_n
104+
request.echo = False
105+
request.stream = True
101106
return res
102107

103108
res = await _process_prefix(request)
104-
input_str_len = len(request.prompt)
109+
input_str_len = len(res.choices[0].text)
105110

106111
async def _should_stop(final):
107112
return final.choices[0].finish_reason == "stop" or final.choices[0].is_filtered
@@ -121,6 +126,8 @@ async def _chunk_generator():
121126
should_stop = await _should_stop(final)
122127
final.choices[0].text = final.choices[0].text[input_str_len:]
123128
output = final.choices[0].text
129+
if self.request_logger:
130+
logger.info(f"yielding chunk {num_chunks} text: {final.choices[0].text}")
124131
yield f"data: {final.model_dump_json()}\n\n"
125132

126133
if should_stop:

0 commit comments

Comments
 (0)