File tree Expand file tree Collapse file tree 4 files changed +14
-11
lines changed Expand file tree Collapse file tree 4 files changed +14
-11
lines changed Original file line number Diff line number Diff line change 8
8
from fastapi .responses import JSONResponse , Response
9
9
10
10
import vllm .entrypoints .api_server
11
+ import vllm .envs as envs
11
12
from vllm .engine .arg_utils import AsyncEngineArgs
12
13
from vllm .engine .async_llm_engine import AsyncLLMEngine
13
14
from vllm .utils import FlexibleArgumentParser
@@ -46,9 +47,8 @@ def stats() -> Response:
46
47
engine_args = AsyncEngineArgs .from_cli_args (args )
47
48
engine = AsyncLLMEngineWithStats .from_engine_args (engine_args )
48
49
vllm .entrypoints .api_server .engine = engine
49
- uvicorn .run (
50
- app ,
51
- host = args .host ,
52
- port = args .port ,
53
- log_level = "debug" ,
54
- timeout_keep_alive = vllm .entrypoints .api_server .TIMEOUT_KEEP_ALIVE )
50
+ uvicorn .run (app ,
51
+ host = args .host ,
52
+ port = args .port ,
53
+ log_level = "debug" ,
54
+ timeout_keep_alive = envs .VLLM_HTTP_TIMEOUT_KEEP_ALIVE )
Original file line number Diff line number Diff line change 17
17
from fastapi import FastAPI , Request
18
18
from fastapi .responses import JSONResponse , Response , StreamingResponse
19
19
20
+ import vllm .envs as envs
20
21
from vllm .engine .arg_utils import AsyncEngineArgs
21
22
from vllm .engine .async_llm_engine import AsyncLLMEngine
22
23
from vllm .entrypoints .launcher import serve_http
29
30
30
31
logger = init_logger ("vllm.entrypoints.api_server" )
31
32
32
- TIMEOUT_KEEP_ALIVE = 5 # seconds.
33
33
app = FastAPI ()
34
34
engine = None
35
35
@@ -134,7 +134,7 @@ async def run_server(args: Namespace,
134
134
host = args .host ,
135
135
port = args .port ,
136
136
log_level = args .log_level ,
137
- timeout_keep_alive = TIMEOUT_KEEP_ALIVE ,
137
+ timeout_keep_alive = envs . VLLM_HTTP_TIMEOUT_KEEP_ALIVE ,
138
138
ssl_keyfile = args .ssl_keyfile ,
139
139
ssl_certfile = args .ssl_certfile ,
140
140
ssl_ca_certs = args .ssl_ca_certs ,
Original file line number Diff line number Diff line change 103
103
from vllm .v1 .metrics .prometheus import get_prometheus_registry
104
104
from vllm .version import __version__ as VLLM_VERSION
105
105
106
- TIMEOUT_KEEP_ALIVE = 5 # seconds
107
-
108
106
prometheus_multiproc_dir : tempfile .TemporaryDirectory
109
107
110
108
# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
@@ -1360,7 +1358,7 @@ async def run_server_worker(listen_address,
1360
1358
# NOTE: When the 'disable_uvicorn_access_log' value is True,
1361
1359
# no access log will be output.
1362
1360
access_log = not args .disable_uvicorn_access_log ,
1363
- timeout_keep_alive = TIMEOUT_KEEP_ALIVE ,
1361
+ timeout_keep_alive = envs . VLLM_HTTP_TIMEOUT_KEEP_ALIVE ,
1364
1362
ssl_keyfile = args .ssl_keyfile ,
1365
1363
ssl_certfile = args .ssl_certfile ,
1366
1364
ssl_ca_certs = args .ssl_ca_certs ,
Original file line number Diff line number Diff line change 71
71
VERBOSE : bool = False
72
72
VLLM_ALLOW_LONG_MAX_MODEL_LEN : bool = False
73
73
VLLM_RPC_TIMEOUT : int = 10000 # ms
74
+ VLLM_HTTP_TIMEOUT_KEEP_ALIVE : int = 5 # seconds
74
75
VLLM_PLUGINS : Optional [list [str ]] = None
75
76
VLLM_LORA_RESOLVER_CACHE_DIR : Optional [str ] = None
76
77
VLLM_TORCH_PROFILER_DIR : Optional [str ] = None
@@ -557,6 +558,10 @@ def get_vllm_port() -> Optional[int]:
557
558
"VLLM_RPC_TIMEOUT" :
558
559
lambda : int (os .getenv ("VLLM_RPC_TIMEOUT" , "10000" )),
559
560
561
+ # Timeout in seconds for keeping HTTP connections alive in API server
562
+ "VLLM_HTTP_TIMEOUT_KEEP_ALIVE" :
563
+ lambda : int (os .environ .get ("VLLM_HTTP_TIMEOUT_KEEP_ALIVE" , "5" )),
564
+
560
565
# a list of plugin names to load, separated by commas.
561
566
# if this is not set, it means all plugins will be loaded
562
567
# if this is set to an empty string, no plugins will be loaded
You can’t perform that action at this time.
0 commit comments