Skip to content

Commit 8a9110d

Browse files
author
Your Name
committed
completion
1 parent edade0d commit 8a9110d

File tree

1 file changed

+7
-12
lines changed

1 file changed

+7
-12
lines changed

vllm/entrypoints/openai/serving_completion.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,10 @@
3838
from vllm.transformers_utils.tokenizer import AnyTokenizer
3939
from vllm.utils import merge_async_iterators
4040

41-
from numba.np.old_arraymath import numpy_unwrap
4241

4342
logger = init_logger(__name__)
4443

4544

46-
_CHUNK_SIZE = 16
47-
4845
class OpenAIServingCompletion(OpenAIServing):
4946

5047
def __init__(
@@ -96,27 +93,25 @@ async def _process_prefix(request: CompletionRequest):
9693
res = await _process_prefix(request)
9794
input_str_len = len(request.prompt)
9895

96+
async def _should_stop(final):
97+
return final.choices[0].finish_reason == "stop" or final.choices[0].is_filtered
98+
9999
async def _chunk_generator():
100100
num_chunks = 0
101-
eom = False
101+
should_stop = False
102102

103-
while num_chunks < 4 and not eom:
103+
while num_chunks < 4 and not should_stop:
104104
num_chunks += 1
105105
beams = await self.beam_validator.get_n_valid_beams(create_completion=self.create_completion, request=request, raw_request=raw_request)
106106
final = await self.beam_scorer.collapse_beams(beams, num_chunks)
107107
request.prompt = final.choices[0].text
108-
eom = final.choices[0].finish_reason == "stop"
108+
should_stop = await _should_stop(final)
109109
final.choices[0].text = final.choices[0].text[input_str_len:]
110110
yield f"data: {final.model_dump_json()}\n\n"
111111

112-
if eom:
112+
if should_stop:
113113
return
114114

115-
# Final chunk with trimmed text
116-
if final:
117-
final.choices[0].text = final.choices[0].text[input_str_len:]
118-
yield f"data: {final.model_dump_json()}\n\n"
119-
120115
yield "data: [DONE]\n\n"
121116

122117
return _chunk_generator()

0 commit comments

Comments
 (0)