From 806a10142b398538b8de1a33098bc4150cc9b0fd Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 3 Jul 2025 00:45:26 +0000 Subject: [PATCH 1/3] yield last chunk if it's usage --- vllm/entrypoints/openai/serving_completion.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 6c9c29b7144..1e5584dab7a 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -323,6 +323,7 @@ async def completion_stream_generator( else: include_usage, include_continuous_usage = False, False + chunk = None try: async for prompt_idx, res in result_generator: prompt_token_ids = res.prompt_token_ids @@ -439,6 +440,12 @@ async def completion_stream_generator( choices=[], usage=final_usage_info, ) + + # if accumulate, send the usage info attached to last chunk instead + if request.accumulate: + chunk.usage = final_usage_info + final_usage_chunk = chunk + final_usage_data = (final_usage_chunk.model_dump_json( exclude_unset=False, exclude_none=True)) yield f"data: {final_usage_data}\n\n" From a51fc028ca3f6ab6e034364b96cef3400c2a384f Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 3 Jul 2025 00:46:12 +0000 Subject: [PATCH 2/3] chunk --- vllm/entrypoints/openai/serving_completion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 1e5584dab7a..f8879fa7bf9 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -442,7 +442,7 @@ async def completion_stream_generator( ) # if accumulate, send the usage info attached to last chunk instead - if request.accumulate: + if request.accumulate and chunk is not None: chunk.usage = final_usage_info final_usage_chunk = chunk From a663810d10e55d2dff2646c8e91bbe0fbf257f9b Mon Sep 17 00:00:00 2001 From: "tanuj.tiwari" Date: Mon, 7 Jul 2025 15:28:57 -0700 Subject: [PATCH 3/3] chore: drop whitespace-only changes --- vllm/entrypoints/openai/serving_completion.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index f8879fa7bf9..6c9c29b7144 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -323,7 +323,6 @@ async def completion_stream_generator( else: include_usage, include_continuous_usage = False, False - chunk = None try: async for prompt_idx, res in result_generator: prompt_token_ids = res.prompt_token_ids @@ -440,12 +439,6 @@ async def completion_stream_generator( choices=[], usage=final_usage_info, ) - - # if accumulate, send the usage info attached to last chunk instead - if request.accumulate and chunk is not None: - chunk.usage = final_usage_info - final_usage_chunk = chunk - final_usage_data = (final_usage_chunk.model_dump_json( exclude_unset=False, exclude_none=True)) yield f"data: {final_usage_data}\n\n"