Skip to content

Commit 4e895ac

Browse files
author
Your Name
committed
Add arrival_time parameter to AsyncLLMEngine and EngineClient for request handling
1 parent 6369426 commit 4e895ac

File tree

2 files changed

+4
-0
lines changed

2 files changed

+4
-0
lines changed

vllm/engine/async_llm_engine.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,7 @@ async def generate(
905905
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
906906
priority: int = 0,
907907
data_parallel_rank: Optional[int] = None,
908+
arrival_time: Optional[float] = None,
908909
) -> AsyncGenerator[RequestOutput, None]:
909910
"""Generate outputs for a request.
910911
@@ -926,6 +927,7 @@ async def generate(
926927
Only applicable with priority scheduling.
927928
data_parallel_rank: The (global) data parallel rank that must
928929
handle this request. Only applicable if DP is enabled.
930+
arrival_time: The arrival time for the request
929931
Yields:
930932
The output `RequestOutput` objects from the LLMEngine
931933
for the request.
@@ -986,6 +988,7 @@ async def generate(
986988
prompt_adapter_request=prompt_adapter_request,
987989
priority=priority,
988990
data_parallel_rank=data_parallel_rank,
991+
arrival_time=arrival_time,
989992
):
990993
output = LLMEngine.validate_output(output, RequestOutput)
991994
if buffer is None:

vllm/engine/protocol.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def generate(
5959
trace_headers: Optional[Mapping[str, str]] = None,
6060
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
6161
priority: int = 0,
62+
arrival_time: Optional[float] = None,
6263
) -> AsyncGenerator[RequestOutput, None]:
6364
"""Generate outputs for a request."""
6465
...

0 commit comments

Comments
 (0)