We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6c7451c commit 55bc634Copy full SHA for 55bc634
vllm/entrypoints/openai/serving_completion.py
@@ -274,6 +274,7 @@ async def completion_stream_generator(
274
else:
275
include_usage, include_continuous_usage = False, False
276
277
+ chunk = None
278
try:
279
async for prompt_idx, res in result_generator:
280
prompt_token_ids = res.prompt_token_ids
@@ -393,6 +394,12 @@ async def completion_stream_generator(
393
394
choices=[],
395
usage=final_usage_info,
396
)
397
+
398
+ # if accumulate, send the usage info attached to last chunk instead
399
+ if request.accumulate:
400
+ chunk.usage = final_usage_info
401
+ final_usage_chunk = chunk
402
403
final_usage_data = (final_usage_chunk.model_dump_json(
404
exclude_unset=False, exclude_none=True))
405
yield f"data: {final_usage_data}\n\n"
0 commit comments