File tree Expand file tree Collapse file tree 2 files changed +4
-0
lines changed Expand file tree Collapse file tree 2 files changed +4
-0
lines changed Original file line number Diff line number Diff line change @@ -905,6 +905,7 @@ async def generate(
905
905
prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
906
906
priority : int = 0 ,
907
907
data_parallel_rank : Optional [int ] = None ,
908
+ arrival_time : Optional [float ] = None ,
908
909
) -> AsyncGenerator [RequestOutput , None ]:
909
910
"""Generate outputs for a request.
910
911
@@ -926,6 +927,7 @@ async def generate(
926
927
Only applicable with priority scheduling.
927
928
data_parallel_rank: The (global) data parallel rank that must
928
929
handle this request. Only applicable if DP is enabled.
930
+ arrival_time: The arrival time for the request
929
931
Yields:
930
932
The output `RequestOutput` objects from the LLMEngine
931
933
for the request.
@@ -986,6 +988,7 @@ async def generate(
986
988
prompt_adapter_request = prompt_adapter_request ,
987
989
priority = priority ,
988
990
data_parallel_rank = data_parallel_rank ,
991
+ arrival_time = arrival_time ,
989
992
):
990
993
output = LLMEngine .validate_output (output , RequestOutput )
991
994
if buffer is None :
Original file line number Diff line number Diff line change @@ -59,6 +59,7 @@ def generate(
59
59
trace_headers : Optional [Mapping [str , str ]] = None ,
60
60
prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
61
61
priority : int = 0 ,
62
+ arrival_time : Optional [float ] = None ,
62
63
) -> AsyncGenerator [RequestOutput , None ]:
63
64
"""Generate outputs for a request."""
64
65
...
You can’t perform that action at this time.
0 commit comments