Skip to content

Commit a03a7c3

Browse files
author
Your Name
committed
add support for accumulate in vllm
1 parent 96ee8bb commit a03a7c3

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

vllm/entrypoints/openai/serving_completion.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,9 @@ async def completion_stream_generator(
315315
previous_num_tokens = [0] * num_choices * num_prompts
316316
has_echoed = [False] * num_choices * num_prompts
317317
num_prompt_tokens = [0] * num_prompts
318+
accumulated_text = [""] * num_choices * num_prompts
319+
accumulated_tokens = [[] * num_choices * num_prompts]
320+
accumulated_logprobs = [[] * num_choices * num_prompts]
318321

319322
stream_options = request.stream_options
320323
if stream_options:
@@ -370,6 +373,16 @@ async def completion_stream_generator(
370373
*(output.logprobs or []),
371374
]
372375
has_echoed[i] = True
376+
elif request.accumulate:
377+
i = output.index + prompt_idx * num_choices
378+
# return the accumulated response
379+
accumulated_text[i] += output.text
380+
accumulated_tokens[i].extend(output.token_ids)
381+
accumulated_logprobs[i].extend(output.logprobs or [])
382+
383+
delta_text = accumulated_text[i]
384+
delta_token_ids = accumulated_tokens[i]
385+
out_logprobs = accumulated_logprobs[i]
373386
else:
374387
# return just the delta
375388
delta_text = output.text

0 commit comments

Comments
 (0)