Skip to content

Commit 31f58be

Browse files
[Frontend] Make TIMEOUT_KEEP_ALIVE configurable through env var (#18472)
Signed-off-by: liusiqian <liusiqian@tal.com>
1 parent ebb2f38 commit 31f58be

File tree

4 files changed

+14
-11
lines changed

4 files changed

+14
-11
lines changed

tests/async_engine/api_server_async_engine.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from fastapi.responses import JSONResponse, Response
99

1010
import vllm.entrypoints.api_server
11+
import vllm.envs as envs
1112
from vllm.engine.arg_utils import AsyncEngineArgs
1213
from vllm.engine.async_llm_engine import AsyncLLMEngine
1314
from vllm.utils import FlexibleArgumentParser
@@ -46,9 +47,8 @@ def stats() -> Response:
4647
engine_args = AsyncEngineArgs.from_cli_args(args)
4748
engine = AsyncLLMEngineWithStats.from_engine_args(engine_args)
4849
vllm.entrypoints.api_server.engine = engine
49-
uvicorn.run(
50-
app,
51-
host=args.host,
52-
port=args.port,
53-
log_level="debug",
54-
timeout_keep_alive=vllm.entrypoints.api_server.TIMEOUT_KEEP_ALIVE)
50+
uvicorn.run(app,
51+
host=args.host,
52+
port=args.port,
53+
log_level="debug",
54+
timeout_keep_alive=envs.VLLM_HTTP_TIMEOUT_KEEP_ALIVE)

vllm/entrypoints/api_server.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from fastapi import FastAPI, Request
1818
from fastapi.responses import JSONResponse, Response, StreamingResponse
1919

20+
import vllm.envs as envs
2021
from vllm.engine.arg_utils import AsyncEngineArgs
2122
from vllm.engine.async_llm_engine import AsyncLLMEngine
2223
from vllm.entrypoints.launcher import serve_http
@@ -29,7 +30,6 @@
2930

3031
logger = init_logger("vllm.entrypoints.api_server")
3132

32-
TIMEOUT_KEEP_ALIVE = 5 # seconds.
3333
app = FastAPI()
3434
engine = None
3535

@@ -134,7 +134,7 @@ async def run_server(args: Namespace,
134134
host=args.host,
135135
port=args.port,
136136
log_level=args.log_level,
137-
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
137+
timeout_keep_alive=envs.VLLM_HTTP_TIMEOUT_KEEP_ALIVE,
138138
ssl_keyfile=args.ssl_keyfile,
139139
ssl_certfile=args.ssl_certfile,
140140
ssl_ca_certs=args.ssl_ca_certs,

vllm/entrypoints/openai/api_server.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,6 @@
103103
from vllm.v1.metrics.prometheus import get_prometheus_registry
104104
from vllm.version import __version__ as VLLM_VERSION
105105

106-
TIMEOUT_KEEP_ALIVE = 5 # seconds
107-
108106
prometheus_multiproc_dir: tempfile.TemporaryDirectory
109107

110108
# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
@@ -1360,7 +1358,7 @@ async def run_server_worker(listen_address,
13601358
# NOTE: When the 'disable_uvicorn_access_log' value is True,
13611359
# no access log will be output.
13621360
access_log=not args.disable_uvicorn_access_log,
1363-
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
1361+
timeout_keep_alive=envs.VLLM_HTTP_TIMEOUT_KEEP_ALIVE,
13641362
ssl_keyfile=args.ssl_keyfile,
13651363
ssl_certfile=args.ssl_certfile,
13661364
ssl_ca_certs=args.ssl_ca_certs,

vllm/envs.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
VERBOSE: bool = False
7272
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
7373
VLLM_RPC_TIMEOUT: int = 10000 # ms
74+
VLLM_HTTP_TIMEOUT_KEEP_ALIVE: int = 5 # seconds
7475
VLLM_PLUGINS: Optional[list[str]] = None
7576
VLLM_LORA_RESOLVER_CACHE_DIR: Optional[str] = None
7677
VLLM_TORCH_PROFILER_DIR: Optional[str] = None
@@ -557,6 +558,10 @@ def get_vllm_port() -> Optional[int]:
557558
"VLLM_RPC_TIMEOUT":
558559
lambda: int(os.getenv("VLLM_RPC_TIMEOUT", "10000")),
559560

561+
# Timeout in seconds for keeping HTTP connections alive in API server
562+
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE":
563+
lambda: int(os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")),
564+
560565
# a list of plugin names to load, separated by commas.
561566
# if this is not set, it means all plugins will be loaded
562567
# if this is set to an empty string, no plugins will be loaded

0 commit comments

Comments
 (0)