Skip to content

Commit 0e4c87b

Browse files
author
Your Name
committed
completion
1 parent 9c13f73 commit 0e4c87b

File tree

1 file changed

+7
-12
lines changed

1 file changed

+7
-12
lines changed

vllm/entrypoints/openai/serving_completion.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,10 @@
4444
from vllm.transformers_utils.tokenizer import AnyTokenizer
4545
from vllm.utils import merge_async_iterators
4646

47-
from numba.np.old_arraymath import numpy_unwrap
4847

4948
logger = init_logger(__name__)
5049

5150

52-
_CHUNK_SIZE = 16
53-
5451
class OpenAIServingCompletion(OpenAIServing):
5552

5653
def __init__(
@@ -104,27 +101,25 @@ async def _process_prefix(request: CompletionRequest):
104101
res = await _process_prefix(request)
105102
input_str_len = len(request.prompt)
106103

104+
async def _should_stop(final):
105+
return final.choices[0].finish_reason == "stop" or final.choices[0].is_filtered
106+
107107
async def _chunk_generator():
108108
num_chunks = 0
109-
eom = False
109+
should_stop = False
110110

111-
while num_chunks < 4 and not eom:
111+
while num_chunks < 4 and not should_stop:
112112
num_chunks += 1
113113
beams = await self.beam_validator.get_n_valid_beams(create_completion=self.create_completion, request=request, raw_request=raw_request)
114114
final = await self.beam_scorer.collapse_beams(beams, num_chunks)
115115
request.prompt = final.choices[0].text
116-
eom = final.choices[0].finish_reason == "stop"
116+
should_stop = await _should_stop(final)
117117
final.choices[0].text = final.choices[0].text[input_str_len:]
118118
yield f"data: {final.model_dump_json()}\n\n"
119119

120-
if eom:
120+
if should_stop:
121121
return
122122

123-
# Final chunk with trimmed text
124-
if final:
125-
final.choices[0].text = final.choices[0].text[input_str_len:]
126-
yield f"data: {final.model_dump_json()}\n\n"
127-
128123
yield "data: [DONE]\n\n"
129124

130125
return _chunk_generator()

0 commit comments

Comments
 (0)