Skip to content

Commit a992216

Browse files
Merge pull request #13 from character-tech/tanuj/usage
yield last chunk if it's usage
2 parents c65512b + cbd64d4 commit a992216

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

vllm/entrypoints/openai/serving_completion.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ async def completion_stream_generator(
313313
else:
314314
include_usage, include_continuous_usage = False, False
315315

316+
chunk = None
316317
try:
317318
async for prompt_idx, res in result_generator:
318319
prompt_token_ids = res.prompt_token_ids
@@ -441,6 +442,12 @@ async def completion_stream_generator(
441442
choices=[],
442443
usage=final_usage_info,
443444
)
445+
446+
# if accumulate, send the usage info attached to last chunk instead
447+
if request.accumulate and chunk is not None:
448+
chunk.usage = final_usage_info
449+
final_usage_chunk = chunk
450+
444451
final_usage_data = (final_usage_chunk.model_dump_json(
445452
exclude_unset=False, exclude_none=True))
446453
yield f"data: {final_usage_data}\n\n"

0 commit comments

Comments
 (0)