Skip to content

Commit 55bc634

Browse files
author
Your Name
committed
yield last chunk if it's usage
1 parent 6c7451c commit 55bc634

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

vllm/entrypoints/openai/serving_completion.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ async def completion_stream_generator(
274274
else:
275275
include_usage, include_continuous_usage = False, False
276276

277+
chunk = None
277278
try:
278279
async for prompt_idx, res in result_generator:
279280
prompt_token_ids = res.prompt_token_ids
@@ -393,6 +394,12 @@ async def completion_stream_generator(
393394
choices=[],
394395
usage=final_usage_info,
395396
)
397+
398+
# if accumulate, send the usage info attached to last chunk instead
399+
if request.accumulate:
400+
chunk.usage = final_usage_info
401+
final_usage_chunk = chunk
402+
396403
final_usage_data = (final_usage_chunk.model_dump_json(
397404
exclude_unset=False, exclude_none=True))
398405
yield f"data: {final_usage_data}\n\n"

0 commit comments

Comments
 (0)