1
+
1
2
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
2
3
# to run the OpenAI compatible server.
3
4
@@ -62,12 +63,16 @@ ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly
62
63
ARG PIP_KEYRING_PROVIDER=disabled
63
64
ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER}
64
65
66
+ # Flag enables build-in KV-connector dependency libs into docker images
67
+ ARG INSTALL_KV_CONNECTORS=false
68
+
65
69
# ################### BASE BUILD IMAGE ####################
66
70
# prepare basic build environment
67
71
FROM ${BUILD_BASE_IMAGE} AS base
68
72
ARG CUDA_VERSION
69
73
ARG PYTHON_VERSION
70
74
ARG TARGETPLATFORM
75
+ ARG INSTALL_KV_CONNECTORS=false
71
76
ENV DEBIAN_FRONTEND=noninteractive
72
77
73
78
ARG DEADSNAKES_MIRROR_URL
@@ -276,6 +281,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
276
281
FROM ${FINAL_BASE_IMAGE} AS vllm-base
277
282
ARG CUDA_VERSION
278
283
ARG PYTHON_VERSION
284
+ ARG INSTALL_KV_CONNECTORS=false
279
285
WORKDIR /vllm-workspace
280
286
ENV DEBIAN_FRONTEND=noninteractive
281
287
ARG TARGETPLATFORM
@@ -485,6 +491,7 @@ RUN mv mkdocs.yaml test_docs/
485
491
# base openai image with additional requirements, for any subsequent openai-style images
486
492
FROM vllm-base AS vllm-openai-base
487
493
ARG TARGETPLATFORM
494
+ ARG INSTALL_KV_CONNECTORS=false
488
495
489
496
ARG PIP_INDEX_URL UV_INDEX_URL
490
497
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
@@ -493,8 +500,13 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
493
500
# Reference: https://github.com/astral-sh/uv/pull/1694
494
501
ENV UV_HTTP_TIMEOUT=500
495
502
503
+ COPY requirements/kv_connectors.txt requirements/kv_connectors.txt
504
+
496
505
# install additional dependencies for openai api server
497
506
RUN --mount=type=cache,target=/root/.cache/uv \
507
+ if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \
508
+ uv pip install --system -r requirements/kv_connectors.txt; \
509
+ fi; \
498
510
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
499
511
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
500
512
else \
0 commit comments