From 56eed9da5b93e4adc422adae8fb9f34f05dfe932 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 6 Jun 2025 13:43:11 -0400 Subject: [PATCH 01/42] ci(vllm-tensorizer): Update vLLM source commit in build pipeline This commit updates the default VLLM_COMMIT_HASH used in the GitHub Actions workflow for the vllm-tensorizer image. This change points the build to a more recent commit of the vLLM project. --- .github/workflows/vllm-tensorizer.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index 14d2c7db..be87290d 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -18,6 +18,6 @@ jobs: with: image-name: vllm-tensorizer folder: vllm-tensorizer - tag-suffix: ${{ inputs.commit || '19307ba71ddeb7e1cc6aec3c1baa8b50d59c1beb'}} + tag-suffix: ${{ inputs.commit || '85e2b7bb1380dd7096e0d6b64b0d7633b0a9db4a'}} build-args: | - COMMIT_HASH=${{ inputs.commit || '19307ba71ddeb7e1cc6aec3c1baa8b50d59c1beb'}} \ No newline at end of file + COMMIT_HASH=${{ inputs.commit || '85e2b7bb1380dd7096e0d6b64b0d7633b0a9db4a'}} \ No newline at end of file From 1b8b7bb0b0b2b11508270cdb45fed5b99fbfcf26 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 6 Jun 2025 14:18:09 -0400 Subject: [PATCH 02/42] build(vllm-tensorizer): Update `torch-extras` base image --- vllm-tensorizer/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 9c4df618..c3db4a03 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,5 +1,4 @@ -ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-22.04-58a49a2-base-cuda12.1.1-torch2.1.2-vision0.16.2-audio2.1.2-flash_attn2.4.2" - +ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.0-vision0.22.0-audio2.7.0-abi1" FROM scratch as freezer WORKDIR / COPY --chmod=755 freeze.sh / From face617bf4f923831b1b65fccc934a563953ecf7 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 6 Jun 2025 16:22:33 -0400 Subject: [PATCH 03/42] chore: Add .idea/ to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 9d90afc5..fee2d8d4 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,6 @@ flycheck_*.el .env* .environment .environment* + +# JetBrains Idea files +.idea/ From 0ca82289ab6ee5ad74293df4243803b12789c4e6 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 9 Jun 2025 09:11:15 -0400 Subject: [PATCH 04/42] fix(vllm-tensorizer): Remove redundant CUDA dev package installation --- vllm-tensorizer/Dockerfile | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index c3db4a03..bdab60ed 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -10,23 +10,23 @@ ARG MAX_JOBS="" # Dependencies requiring NVCC are built ahead of time in a separate stage # so that the ~2 GiB dev library installations don't have to be included # in the final image. -RUN export \ - CUDA_MAJOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f1) \ - CUDA_MINOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f2) && \ - export \ - CUDA_PACKAGE_VERSION="${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" && \ - apt-get -qq update && apt-get install -y --no-install-recommends \ - cuda-nvcc-${CUDA_PACKAGE_VERSION} \ - cuda-nvml-dev-${CUDA_PACKAGE_VERSION} \ - libcurand-dev-${CUDA_PACKAGE_VERSION} \ - libcublas-dev-${CUDA_PACKAGE_VERSION} \ - libcusparse-dev-${CUDA_PACKAGE_VERSION} \ - libcusolver-dev-${CUDA_PACKAGE_VERSION} \ - cuda-nvprof-${CUDA_PACKAGE_VERSION} \ - cuda-profiler-api-${CUDA_PACKAGE_VERSION} \ - libaio-dev \ - ninja-build && \ - apt-get clean +#RUN export \ +# CUDA_MAJOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f1) \ +# CUDA_MINOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f2) && \ +# export \ +# CUDA_PACKAGE_VERSION="${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" && \ +# apt-get -qq update && apt-get install -y --no-install-recommends \ +# cuda-nvcc-${CUDA_PACKAGE_VERSION} \ +# cuda-nvml-dev-${CUDA_PACKAGE_VERSION} \ +# libcurand-dev-${CUDA_PACKAGE_VERSION} \ +# libcublas-dev-${CUDA_PACKAGE_VERSION} \ +# libcusparse-dev-${CUDA_PACKAGE_VERSION} \ +# libcusolver-dev-${CUDA_PACKAGE_VERSION} \ +# cuda-nvprof-${CUDA_PACKAGE_VERSION} \ +# cuda-profiler-api-${CUDA_PACKAGE_VERSION} \ +# libaio-dev \ +# ninja-build && \ +# apt-get clean RUN ldconfig From 1512fdf04680843aa6cb9d378a143a09550060c3 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 9 Jun 2025 12:28:52 -0400 Subject: [PATCH 05/42] fix(vllm-tensorizer): install setuptools_scm and cmake for vLLM build --- vllm-tensorizer/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index bdab60ed..3bea5deb 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -32,9 +32,9 @@ RUN ldconfig RUN apt-get -qq update && \ apt-get -qq install -y --no-install-recommends \ - python3-pip git ninja-build && \ + python3-pip git ninja-build cmake && \ apt-get clean && \ - pip3 install -U --no-cache-dir pip packaging setuptools wheel + pip3 install -U --no-cache-dir pip packaging setuptools wheel setuptools_scm FROM alpine/git:2.36.3 as vllm-downloader WORKDIR /git From 1ccf357b530a6138e0b383614c4e3120481189f0 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 9 Jun 2025 13:08:12 -0400 Subject: [PATCH 06/42] fix(vllm-tensorizer): update triton version to 2.58.0 --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 3bea5deb..cb1c38ee 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -69,7 +69,7 @@ RUN --mount=type=bind,from=freezer,target=/tmp/frozen \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt RUN python3 -m pip install --no-cache-dir \ - "fschat[model_worker] == 0.2.30" "triton == 2.1.0" \ + "fschat[model_worker] == 0.2.30" "triton == 2.58.0" \ -c /tmp/constraints.txt RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \ From b32bae5a4b36aabf696256f0e5402caa15d3abd0 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 9 Jun 2025 13:21:22 -0400 Subject: [PATCH 07/42] fix(vllm-tensorizer): update triton version to 3.3.1 --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index cb1c38ee..d3c5dfdb 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -69,7 +69,7 @@ RUN --mount=type=bind,from=freezer,target=/tmp/frozen \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt RUN python3 -m pip install --no-cache-dir \ - "fschat[model_worker] == 0.2.30" "triton == 2.58.0" \ + "fschat[model_worker] == 0.2.30" "triton == 3.3.1" \ -c /tmp/constraints.txt RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \ From 86181c318966a17c0123e16e8090f472d93df83d Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 9 Jun 2025 14:07:04 -0400 Subject: [PATCH 08/42] fix(vllm-tensorizer): update triton version to 2.3.1 --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index d3c5dfdb..78b4d649 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -69,7 +69,7 @@ RUN --mount=type=bind,from=freezer,target=/tmp/frozen \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt RUN python3 -m pip install --no-cache-dir \ - "fschat[model_worker] == 0.2.30" "triton == 3.3.1" \ + "fschat[model_worker] == 0.2.30" "triton == 2.3.1" \ -c /tmp/constraints.txt RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \ From 3228eb79657b1ad46ed84537bb9dcaa99a91781e Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Tue, 10 Jun 2025 09:36:27 -0400 Subject: [PATCH 09/42] fix(vllm-tensorizer): remove explicit triton versioning --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 78b4d649..584bf253 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -69,7 +69,7 @@ RUN --mount=type=bind,from=freezer,target=/tmp/frozen \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt RUN python3 -m pip install --no-cache-dir \ - "fschat[model_worker] == 0.2.30" "triton == 2.3.1" \ + "fschat[model_worker] == 0.2.30" \ -c /tmp/constraints.txt RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \ From 5c52d8e6988f7d371628d1be0d2eaff37cc3301d Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Tue, 10 Jun 2025 11:41:07 -0400 Subject: [PATCH 10/42] feat(vllm-tensorizer): implement custom triton build and install for vllm compilation --- .github/workflows/vllm-tensorizer.yml | 3 ++- vllm-tensorizer/Dockerfile | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index be87290d..990561b0 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -20,4 +20,5 @@ jobs: folder: vllm-tensorizer tag-suffix: ${{ inputs.commit || '85e2b7bb1380dd7096e0d6b64b0d7633b0a9db4a'}} build-args: | - COMMIT_HASH=${{ inputs.commit || '85e2b7bb1380dd7096e0d6b64b0d7633b0a9db4a'}} \ No newline at end of file + COMMIT_HASH=${{ inputs.commit || '85e2b7bb1380dd7096e0d6b64b0d7633b0a9db4a'}} + TRITON_COMMIT=96316ce5 \ No newline at end of file diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 584bf253..000b8fab 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -46,8 +46,24 @@ RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \ git submodule update --init --recursive --jobs 8 \ --depth 1 --filter=blob:none +# Testing +FROM alpine/git:2.36.3 as triton-downloader +WORKDIR /git +ARG TRITON_COMMIT="96316ce5" +RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout https://github.com/openai/triton.git && \ + cd triton && \ + git checkout "${TRITON_COMMIT}" && \ + git submodule update --init --recursive --jobs 8 --depth 1 --filter=blob:none \ +# Testing end + FROM builder-base as vllm-builder WORKDIR /workspace + +# Testing +RUN --mount=type=bind,from=triton-builder,source=/wheels,target=/tmp/triton-wheels \ + python3 -m pip install --no-cache-dir /tmp/triton-wheels/*.whl && \ + rm -rf /tmp/triton-wheels \ +# Testing end RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \ --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \ @@ -61,6 +77,16 @@ WORKDIR /wheels FROM ${BASE_IMAGE} as base +# Testing +FROM builder-base as triton-builder +WORKDIR /workspace +RUN --mount=type=bind,from=triton-downloader,source=/git/triton,target=/workspace,rw \ + LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \ + python3 -m pip wheel -w /wheels \ + -v --no-cache-dir --no-build-isolation --no-deps \ + ./ +# Testing end + WORKDIR /workspace RUN apt-get -qq update && apt-get install -y --no-install-recommends curl && apt-get clean From 3a78a5654b03dfdff1b8ffb1acb4903e9bf2ac29 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Tue, 10 Jun 2025 11:58:04 -0400 Subject: [PATCH 11/42] fix(vllm-tensorizer): reorder build stages to resolve circular dependency --- vllm-tensorizer/Dockerfile | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 000b8fab..5cb89e33 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -36,6 +36,24 @@ RUN apt-get -qq update && \ apt-get clean && \ pip3 install -U --no-cache-dir pip packaging setuptools wheel setuptools_scm +# Testing +FROM alpine/git:2.36.3 as triton-downloader +WORKDIR /git +ARG TRITON_COMMIT="96316ce5" +RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout https://github.com/openai/triton.git && \ + cd triton && \ + git checkout "${TRITON_COMMIT}" && \ + git submodule update --init --recursive --jobs 8 --depth 1 --filter=blob:none \ + +FROM builder-base as triton-builder +WORKDIR /workspace +RUN --mount=type=bind,from=triton-downloader,source=/git/triton,target=/workspace,rw \ + LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \ + python3 -m pip wheel -w /wheels \ + -v --no-cache-dir --no-build-isolation --no-deps \ + ./ +# Testing end + FROM alpine/git:2.36.3 as vllm-downloader WORKDIR /git ARG COMMIT_HASH @@ -46,15 +64,6 @@ RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \ git submodule update --init --recursive --jobs 8 \ --depth 1 --filter=blob:none -# Testing -FROM alpine/git:2.36.3 as triton-downloader -WORKDIR /git -ARG TRITON_COMMIT="96316ce5" -RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout https://github.com/openai/triton.git && \ - cd triton && \ - git checkout "${TRITON_COMMIT}" && \ - git submodule update --init --recursive --jobs 8 --depth 1 --filter=blob:none \ -# Testing end FROM builder-base as vllm-builder WORKDIR /workspace @@ -77,16 +86,6 @@ WORKDIR /wheels FROM ${BASE_IMAGE} as base -# Testing -FROM builder-base as triton-builder -WORKDIR /workspace -RUN --mount=type=bind,from=triton-downloader,source=/git/triton,target=/workspace,rw \ - LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \ - python3 -m pip wheel -w /wheels \ - -v --no-cache-dir --no-build-isolation --no-deps \ - ./ -# Testing end - WORKDIR /workspace RUN apt-get -qq update && apt-get install -y --no-install-recommends curl && apt-get clean From a7e3e1983ccfc2ea1966bf17a06cc547524368f6 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Tue, 10 Jun 2025 14:37:30 -0400 Subject: [PATCH 12/42] fix(vllm-tensorizer): Remove accidental backslashes --- vllm-tensorizer/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 5cb89e33..9f07c5d5 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -43,7 +43,7 @@ ARG TRITON_COMMIT="96316ce5" RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout https://github.com/openai/triton.git && \ cd triton && \ git checkout "${TRITON_COMMIT}" && \ - git submodule update --init --recursive --jobs 8 --depth 1 --filter=blob:none \ + git submodule update --init --recursive --jobs 8 --depth 1 --filter=blob:none FROM builder-base as triton-builder WORKDIR /workspace @@ -71,7 +71,7 @@ WORKDIR /workspace # Testing RUN --mount=type=bind,from=triton-builder,source=/wheels,target=/tmp/triton-wheels \ python3 -m pip install --no-cache-dir /tmp/triton-wheels/*.whl && \ - rm -rf /tmp/triton-wheels \ + rm -rf /tmp/triton-wheels # Testing end RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \ --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ From 6dcafb51033c603f51a446ff32342d46b7cf7a9b Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Thu, 12 Jun 2025 17:02:25 -0400 Subject: [PATCH 13/42] feat(vllm-tensorizer): Add MAX_JOBS unset logic; remove custom triton build stages --- vllm-tensorizer/Dockerfile | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 9f07c5d5..331fa686 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -36,24 +36,6 @@ RUN apt-get -qq update && \ apt-get clean && \ pip3 install -U --no-cache-dir pip packaging setuptools wheel setuptools_scm -# Testing -FROM alpine/git:2.36.3 as triton-downloader -WORKDIR /git -ARG TRITON_COMMIT="96316ce5" -RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout https://github.com/openai/triton.git && \ - cd triton && \ - git checkout "${TRITON_COMMIT}" && \ - git submodule update --init --recursive --jobs 8 --depth 1 --filter=blob:none - -FROM builder-base as triton-builder -WORKDIR /workspace -RUN --mount=type=bind,from=triton-downloader,source=/git/triton,target=/workspace,rw \ - LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \ - python3 -m pip wheel -w /wheels \ - -v --no-cache-dir --no-build-isolation --no-deps \ - ./ -# Testing end - FROM alpine/git:2.36.3 as vllm-downloader WORKDIR /git ARG COMMIT_HASH @@ -68,14 +50,10 @@ RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \ FROM builder-base as vllm-builder WORKDIR /workspace -# Testing -RUN --mount=type=bind,from=triton-builder,source=/wheels,target=/tmp/triton-wheels \ - python3 -m pip install --no-cache-dir /tmp/triton-wheels/*.whl && \ - rm -rf /tmp/triton-wheels -# Testing end RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \ --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \ + if [ -z "$MAX_JOBS" ]; then unset MAX_JOBS; fi && \ LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \ python3 -m pip wheel -w /wheels \ -v --no-cache-dir --no-build-isolation --no-deps \ From 9a1a9c154a260eb53bf40787f27422c0c7df42db Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 09:58:16 -0400 Subject: [PATCH 14/42] fix(vllm-builder): Configure CUDA environment variables for vLLM compilation --- vllm-tensorizer/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 331fa686..39ee62ac 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -54,7 +54,8 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \ if [ -z "$MAX_JOBS" ]; then unset MAX_JOBS; fi && \ - LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \ + LIBRARY_PATH="/usr/local/cuda/lib64:${LIBRARY_PATH:+:$LIBRARY_PATH}" \ + CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \ python3 -m pip wheel -w /wheels \ -v --no-cache-dir --no-build-isolation --no-deps \ -c /tmp/frozen/constraints.txt \ From af198732b3dac5a9b187f5a7167571a04d5e5445 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 10:34:59 -0400 Subject: [PATCH 15/42] fix(vllm-tensorizer): Update vLLM commit to a newer version for PyTorch 2.7.0 compatibility --- .github/workflows/vllm-tensorizer.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index 990561b0..1afb6a89 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -18,7 +18,7 @@ jobs: with: image-name: vllm-tensorizer folder: vllm-tensorizer - tag-suffix: ${{ inputs.commit || '85e2b7bb1380dd7096e0d6b64b0d7633b0a9db4a'}} + tag-suffix: ${{ inputs.commit || '5e4f640d22b0e4b50f506ce913d01fd471ad8086'}} build-args: | - COMMIT_HASH=${{ inputs.commit || '85e2b7bb1380dd7096e0d6b64b0d7633b0a9db4a'}} + COMMIT_HASH=${{ inputs.commit || '5e4f640d22b0e4b50f506ce913d01fd471ad8086'}} TRITON_COMMIT=96316ce5 \ No newline at end of file From 93db31ba3ad4d67f7fbc5bf11f97603d7eee0cba Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 10:50:42 -0400 Subject: [PATCH 16/42] fix(vllm-tensorizer): Install missing `regex` module for vLLM build metadata --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 39ee62ac..1cc3d328 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -34,7 +34,7 @@ RUN apt-get -qq update && \ apt-get -qq install -y --no-install-recommends \ python3-pip git ninja-build cmake && \ apt-get clean && \ - pip3 install -U --no-cache-dir pip packaging setuptools wheel setuptools_scm + pip3 install -U --no-cache-dir pip packaging setuptools wheel setuptools_scm regex FROM alpine/git:2.36.3 as vllm-downloader WORKDIR /git From f865d51fd00ed28fbbc955ca4ece1cf67331eba1 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 11:10:23 -0400 Subject: [PATCH 17/42] feat(vllm-tensorizer): Switch to upstream vLLM for PyTorch 2.7.0 compatibility --- .github/workflows/vllm-tensorizer.yml | 4 ++-- vllm-tensorizer/Dockerfile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index 1afb6a89..88c0f7f7 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -18,7 +18,7 @@ jobs: with: image-name: vllm-tensorizer folder: vllm-tensorizer - tag-suffix: ${{ inputs.commit || '5e4f640d22b0e4b50f506ce913d01fd471ad8086'}} + tag-suffix: ${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} build-args: | - COMMIT_HASH=${{ inputs.commit || '5e4f640d22b0e4b50f506ce913d01fd471ad8086'}} + COMMIT_HASH=${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} TRITON_COMMIT=96316ce5 \ No newline at end of file diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 1cc3d328..e3902571 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -40,7 +40,7 @@ FROM alpine/git:2.36.3 as vllm-downloader WORKDIR /git ARG COMMIT_HASH RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \ - https://github.com/coreweave/vllm.git && \ + https://github.com/vllm-project/vllm.git && \ cd vllm && \ git checkout "${COMMIT_HASH}" && \ git submodule update --init --recursive --jobs 8 \ From ea0074b3c1d3004bbf29767d23538e8f0a36af69 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 11:22:36 -0400 Subject: [PATCH 18/42] feat(vllm-tensorizer): Downgrade vLLM to v0.9.0 for PyTorch 2.7.0 compatibility --- .github/workflows/vllm-tensorizer.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index 88c0f7f7..206c9a21 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -18,7 +18,7 @@ jobs: with: image-name: vllm-tensorizer folder: vllm-tensorizer - tag-suffix: ${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} + tag-suffix: ${{ inputs.commit || '58738772410c5e0d60b61db39538a9b313d2d7ad'}} build-args: | - COMMIT_HASH=${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} + COMMIT_HASH=${{ inputs.commit || '58738772410c5e0d60b61db39538a9b313d2d7ad'}} TRITON_COMMIT=96316ce5 \ No newline at end of file From 7631031a6cd153a5e172b9892859111966adbbcd Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 11:57:32 -0400 Subject: [PATCH 19/42] fix(vllm-tensorizer): Apply CMake patch for nvToolsExt linking issue --- .github/workflows/vllm-tensorizer.yml | 4 ++-- vllm-tensorizer/Dockerfile | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index 206c9a21..88c0f7f7 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -18,7 +18,7 @@ jobs: with: image-name: vllm-tensorizer folder: vllm-tensorizer - tag-suffix: ${{ inputs.commit || '58738772410c5e0d60b61db39538a9b313d2d7ad'}} + tag-suffix: ${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} build-args: | - COMMIT_HASH=${{ inputs.commit || '58738772410c5e0d60b61db39538a9b313d2d7ad'}} + COMMIT_HASH=${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} TRITON_COMMIT=96316ce5 \ No newline at end of file diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index e3902571..9fe65222 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -54,6 +54,22 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \ if [ -z "$MAX_JOBS" ]; then unset MAX_JOBS; fi && \ + sed -i '/^find_package(Torch REQUIRED)/i\ +find_package(CUDA REQUIRED)\n\ +find_library(NVTOOLSEXT_LIBRARY\n\ + NAMES nvToolsExt\n\ + PATH_SUFFIXES lib)\n\ +\n\ +if (NVTOOLSEXT_LIBRARY)\n\ + message(STATUS "Found nvToolsExt library: ${NVTOOLSEXT_LIBRARY}")\n\ +else()\n\ + message(FATAL_ERROR "Could not find nvToolsExt library")\n\ +endif()\n\ +add_library(CUDA::nvToolsExt SHARED IMPORTED)\n\ +set_target_properties(CUDA::nvToolsExt PROPERTIES\n\ + IMPORTED_LOCATION ${NVTOOLSEXT_LIBRARY}\n\ +)\n\ +\n' CMakeLists.txt && \ LIBRARY_PATH="/usr/local/cuda/lib64:${LIBRARY_PATH:+:$LIBRARY_PATH}" \ CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \ python3 -m pip wheel -w /wheels \ From c0b2d0c96bf89330173000f66bf9c151d02d27fb Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 12:12:07 -0400 Subject: [PATCH 20/42] fix(vllm-builder): Simplify find_library call in nvToolsExt CMake patch --- vllm-tensorizer/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 9fe65222..693be4de 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -58,7 +58,6 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw find_package(CUDA REQUIRED)\n\ find_library(NVTOOLSEXT_LIBRARY\n\ NAMES nvToolsExt\n\ - PATH_SUFFIXES lib)\n\ \n\ if (NVTOOLSEXT_LIBRARY)\n\ message(STATUS "Found nvToolsExt library: ${NVTOOLSEXT_LIBRARY}")\n\ From 17d917b702838e6452ced9dce3615989c2af10c2 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 12:14:23 -0400 Subject: [PATCH 21/42] fix(vllm-tensorizer): Add missing `)` --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 693be4de..81d4de18 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -56,7 +56,7 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw if [ -z "$MAX_JOBS" ]; then unset MAX_JOBS; fi && \ sed -i '/^find_package(Torch REQUIRED)/i\ find_package(CUDA REQUIRED)\n\ -find_library(NVTOOLSEXT_LIBRARY\n\ +find_library(NVTOOLSEXT_LIBRARY)\n\ NAMES nvToolsExt\n\ \n\ if (NVTOOLSEXT_LIBRARY)\n\ From 3bf996bb2f2b4fda3d35f3aa8c5cf846926144bc Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 12:31:26 -0400 Subject: [PATCH 22/42] fix(vllm-tensorizer): Remove Cmake patch --- vllm-tensorizer/Dockerfile | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 81d4de18..e3902571 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -54,21 +54,6 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \ if [ -z "$MAX_JOBS" ]; then unset MAX_JOBS; fi && \ - sed -i '/^find_package(Torch REQUIRED)/i\ -find_package(CUDA REQUIRED)\n\ -find_library(NVTOOLSEXT_LIBRARY)\n\ - NAMES nvToolsExt\n\ -\n\ -if (NVTOOLSEXT_LIBRARY)\n\ - message(STATUS "Found nvToolsExt library: ${NVTOOLSEXT_LIBRARY}")\n\ -else()\n\ - message(FATAL_ERROR "Could not find nvToolsExt library")\n\ -endif()\n\ -add_library(CUDA::nvToolsExt SHARED IMPORTED)\n\ -set_target_properties(CUDA::nvToolsExt PROPERTIES\n\ - IMPORTED_LOCATION ${NVTOOLSEXT_LIBRARY}\n\ -)\n\ -\n' CMakeLists.txt && \ LIBRARY_PATH="/usr/local/cuda/lib64:${LIBRARY_PATH:+:$LIBRARY_PATH}" \ CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \ python3 -m pip wheel -w /wheels \ From ac48891d6efdf0608c98e6654394bba819fb2f36 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 12:55:06 -0400 Subject: [PATCH 23/42] fix(vllm-tensorizer): Update base image to CUDA 12.8.1 to resolve build issues --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index e3902571..1ddf35b2 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.0-vision0.22.0-audio2.7.0-abi1" +ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.0-vision0.22.0-audio2.7.0-abi1" FROM scratch as freezer WORKDIR / COPY --chmod=755 freeze.sh / From ef1ebfcc254c93dbc21c6121d064690533e9ef72 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Fri, 13 Jun 2025 15:27:32 -0400 Subject: [PATCH 24/42] fix(vllm-tensorizer): Set `MAX_JOBS` to 2 to prevent OOM during Flash Attention compilation --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 1ddf35b2..ed3ec11b 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -5,7 +5,7 @@ COPY --chmod=755 freeze.sh / FROM ${BASE_IMAGE} as builder-base -ARG MAX_JOBS="" +ARG MAX_JOBS="2" # Dependencies requiring NVCC are built ahead of time in a separate stage # so that the ~2 GiB dev library installations don't have to be included From 5bf13ccf81dbf711a5a0a99064a54197bb927a39 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 10:26:18 -0400 Subject: [PATCH 25/42] fix(vllm-tensorizer): Increase MAX_JOBS to 8 for faster compilation, balancing OOM risk --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index ed3ec11b..522648b0 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -5,7 +5,7 @@ COPY --chmod=755 freeze.sh / FROM ${BASE_IMAGE} as builder-base -ARG MAX_JOBS="2" +ARG MAX_JOBS="8" # Dependencies requiring NVCC are built ahead of time in a separate stage # so that the ~2 GiB dev library installations don't have to be included From c0f6a0442cb7428765bb366ee310e27374572839 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 10:49:04 -0400 Subject: [PATCH 26/42] fix(vllm-tensorizer): Remove xformers constraint to resolve vLLM dependency conflict --- vllm-tensorizer/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 522648b0..32610b02 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -72,6 +72,8 @@ RUN apt-get -qq update && apt-get install -y --no-install-recommends curl && apt RUN --mount=type=bind,from=freezer,target=/tmp/frozen \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt +RUN sed -i '/xformers==/d' /tmp/constraints.txt + RUN python3 -m pip install --no-cache-dir \ "fschat[model_worker] == 0.2.30" \ -c /tmp/constraints.txt From a932752730dbba4ff8abd26e1620b9ebfafaf395 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 12:21:04 -0400 Subject: [PATCH 27/42] fix(vllm-tensorizer): Remove `fschat` installation to resolve `pydantic` conflict --- vllm-tensorizer/Dockerfile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 32610b02..599dfd3a 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -74,10 +74,6 @@ RUN --mount=type=bind,from=freezer,target=/tmp/frozen \ RUN sed -i '/xformers==/d' /tmp/constraints.txt -RUN python3 -m pip install --no-cache-dir \ - "fschat[model_worker] == 0.2.30" \ - -c /tmp/constraints.txt - RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \ python3 -m pip install --no-cache-dir /tmp/wheels/*.whl -c /tmp/constraints.txt && \ rm /tmp/constraints.txt From b872b3ef7824c77efb2f993934dd6fa5fe43ff4c Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 14:05:55 -0400 Subject: [PATCH 28/42] feat(vllm-tensorizer): Upgrade to PyTorch 2.7.1; Remove commented CUDA dev install; Set MAX_JOBS to 10 --- vllm-tensorizer/Dockerfile | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 599dfd3a..52aea47d 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,32 +1,11 @@ -ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.0-vision0.22.0-audio2.7.0-abi1" +ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.0-audio2.7.0-abi1" FROM scratch as freezer WORKDIR / COPY --chmod=755 freeze.sh / FROM ${BASE_IMAGE} as builder-base -ARG MAX_JOBS="8" - -# Dependencies requiring NVCC are built ahead of time in a separate stage -# so that the ~2 GiB dev library installations don't have to be included -# in the final image. -#RUN export \ -# CUDA_MAJOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f1) \ -# CUDA_MINOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f2) && \ -# export \ -# CUDA_PACKAGE_VERSION="${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" && \ -# apt-get -qq update && apt-get install -y --no-install-recommends \ -# cuda-nvcc-${CUDA_PACKAGE_VERSION} \ -# cuda-nvml-dev-${CUDA_PACKAGE_VERSION} \ -# libcurand-dev-${CUDA_PACKAGE_VERSION} \ -# libcublas-dev-${CUDA_PACKAGE_VERSION} \ -# libcusparse-dev-${CUDA_PACKAGE_VERSION} \ -# libcusolver-dev-${CUDA_PACKAGE_VERSION} \ -# cuda-nvprof-${CUDA_PACKAGE_VERSION} \ -# cuda-profiler-api-${CUDA_PACKAGE_VERSION} \ -# libaio-dev \ -# ninja-build && \ -# apt-get clean +ARG MAX_JOBS="10" RUN ldconfig From c645bd585ab105f2299267a04de946c64116bada Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 14:09:49 -0400 Subject: [PATCH 29/42] fix(vllm-tensorizer): Correct base image tag to align PyTorch, torchvision, and torchaudio versions --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 52aea47d..f73358b1 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.0-audio2.7.0-abi1" +ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.0-abi1" FROM scratch as freezer WORKDIR / COPY --chmod=755 freeze.sh / From 9f0eaf6c0e2b3d1edaedec70cf519b7fa37ec372 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 14:32:39 -0400 Subject: [PATCH 30/42] fix(vllm-tensorizer): Correct base image tag to align torchaudio version with PyTorch 2.7.1; uppercase all `as` keywords --- vllm-tensorizer/Dockerfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index f73358b1..b0e9872d 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,9 +1,9 @@ -ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.0-abi1" -FROM scratch as freezer +ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1" +FROM scratch AS freezer WORKDIR / COPY --chmod=755 freeze.sh / -FROM ${BASE_IMAGE} as builder-base +FROM ${BASE_IMAGE} AS builder-base ARG MAX_JOBS="10" @@ -15,7 +15,7 @@ RUN apt-get -qq update && \ apt-get clean && \ pip3 install -U --no-cache-dir pip packaging setuptools wheel setuptools_scm regex -FROM alpine/git:2.36.3 as vllm-downloader +FROM alpine/git:2.36.3 AS vllm-downloader WORKDIR /git ARG COMMIT_HASH RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \ @@ -26,7 +26,7 @@ RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \ --depth 1 --filter=blob:none -FROM builder-base as vllm-builder +FROM builder-base AS vllm-builder WORKDIR /workspace RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \ @@ -42,7 +42,7 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw WORKDIR /wheels -FROM ${BASE_IMAGE} as base +FROM ${BASE_IMAGE} AS base WORKDIR /workspace From ae288f5a1528de93d023de84f46b860e087eb4ab Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 14:45:36 -0400 Subject: [PATCH 31/42] fix(vllm-tensorizer): Use correct and existing base image tag for PyTorch 2.7.1 and CUDA 12.8.1 --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index b0e9872d..bd63b6e8 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:00b897e-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1" +ARG BASE_IMAGE="FROM ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-2bf6b9c-base-cuda12.8.1-ubuntu22.04-torch2.7.1-vision0.22.1-audio2.7.1-abi1" FROM scratch AS freezer WORKDIR / COPY --chmod=755 freeze.sh / From a1a8a22b73f6b16ffa6a551cb2564718ed52b623 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 14:49:49 -0400 Subject: [PATCH 32/42] fix(vllm-tensorizer): Use correct and existing base image tag for PyTorch 2.7.1 and CUDA 12.8.1 --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index bd63b6e8..526bad5b 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE="FROM ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-2bf6b9c-base-cuda12.8.1-ubuntu22.04-torch2.7.1-vision0.22.1-audio2.7.1-abi1" +ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-2bf6b9c-base-cuda12.8.1-ubuntu22.04-torch2.7.1-vision0.22.1-audio2.7.1-abi1" FROM scratch AS freezer WORKDIR / COPY --chmod=755 freeze.sh / From aea4d46982e5f7e0177bf1597ad02172cf6a1409 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 14:56:38 -0400 Subject: [PATCH 33/42] fix(vllm-tensorizer): Use 'nccl' compute base image to provide nvcc and CUDA dev tools --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 526bad5b..377c98f5 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-2bf6b9c-base-cuda12.8.1-ubuntu22.04-torch2.7.1-vision0.22.1-audio2.7.1-abi1" +ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-2bf6b9c-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1" FROM scratch AS freezer WORKDIR / COPY --chmod=755 freeze.sh / From ced54a193ca8a215e3f0c462d99a96cba047ffbf Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Mon, 16 Jun 2025 16:37:26 -0400 Subject: [PATCH 34/42] feat(vllm-tensorizer): Add use_existing_torch.py helper and related build flags for PyTorch compatibility; up `MAX_JOBS` to 16 --- vllm-tensorizer/Dockerfile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 377c98f5..675ff9c2 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -5,7 +5,7 @@ COPY --chmod=755 freeze.sh / FROM ${BASE_IMAGE} AS builder-base -ARG MAX_JOBS="10" +ARG MAX_JOBS="16" RUN ldconfig @@ -33,6 +33,15 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \ if [ -z "$MAX_JOBS" ]; then unset MAX_JOBS; fi && \ + python3 -m pip install --no-cache-dir py-cpuinfo && \ + if [ -f 'use_existing_torch.py' ]; then \ + python3 use_existing_torch.py; \ + else \ + git cat-file blob \ + e489ad7a210f4234db696d1f2749d5f3662fa65b:use_existing_torch.py \ + | python3 -; \ + fi && \ + USE_CUDNN=1 USE_CUSPARSELT=1 \ LIBRARY_PATH="/usr/local/cuda/lib64:${LIBRARY_PATH:+:$LIBRARY_PATH}" \ CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \ python3 -m pip wheel -w /wheels \ From 9effc1bd09b75d165d0b57081894e2c2a0af50f9 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Tue, 17 Jun 2025 10:41:54 -0400 Subject: [PATCH 35/42] feat(vllm-tensorizer): Update base image to CUDA 12.9.0, PyTorch 2.7.1 variant; revert xformers constraint removal --- vllm-tensorizer/Dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 675ff9c2..b2c84831 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,5 +1,4 @@ -ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-2bf6b9c-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1" -FROM scratch AS freezer +ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1" WORKDIR / COPY --chmod=755 freeze.sh / @@ -60,8 +59,6 @@ RUN apt-get -qq update && apt-get install -y --no-install-recommends curl && apt RUN --mount=type=bind,from=freezer,target=/tmp/frozen \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt -RUN sed -i '/xformers==/d' /tmp/constraints.txt - RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \ python3 -m pip install --no-cache-dir /tmp/wheels/*.whl -c /tmp/constraints.txt && \ rm /tmp/constraints.txt From 1057644c67f1ee1512205d446cf6d2e8ed1574bc Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Tue, 17 Jun 2025 10:49:44 -0400 Subject: [PATCH 36/42] fix(vllm-tensorizer): Undelete `FROM scratch AS freezer` --- vllm-tensorizer/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index b2c84831..a7777125 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -1,4 +1,5 @@ ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1" +FROM scratch AS freezer WORKDIR / COPY --chmod=755 freeze.sh / From 092946c887edc37f373a74dd468dad62203e35f4 Mon Sep 17 00:00:00 2001 From: Justin Perlman <69224148+JustinPerlman@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:33:39 -0400 Subject: [PATCH 37/42] fix(vllm-tensorizer): Remove leftover `TRITON_COMMIT` Co-authored-by: Eta <24918963+Eta0@users.noreply.github.com> --- .github/workflows/vllm-tensorizer.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index 88c0f7f7..478addc5 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -20,5 +20,4 @@ jobs: folder: vllm-tensorizer tag-suffix: ${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} build-args: | - COMMIT_HASH=${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} - TRITON_COMMIT=96316ce5 \ No newline at end of file + COMMIT_HASH=${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} \ No newline at end of file From 1b60e6110265403c74597a3996283f6c75268169 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Tue, 17 Jun 2025 14:53:18 -0400 Subject: [PATCH 38/42] fix(vllm-tensorizer): Improve Dockerfile ARG passing Co-authored-by: Eta --- .github/configurations/vllm-tensorizer.yml | 2 ++ .github/workflows/vllm-tensorizer.yml | 18 +++++++++++------- vllm-tensorizer/Dockerfile | 6 +++--- 3 files changed, 16 insertions(+), 10 deletions(-) create mode 100644 .github/configurations/vllm-tensorizer.yml diff --git a/.github/configurations/vllm-tensorizer.yml b/.github/configurations/vllm-tensorizer.yml new file mode 100644 index 00000000..5caaad6a --- /dev/null +++ b/.github/configurations/vllm-tensorizer.yml @@ -0,0 +1,2 @@ +vllm-commit: ['b6553be1bc75f046b00046a4ad7576364d03c835'] +base-image: ['ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1'] diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index 478addc5..3615e68f 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -1,9 +1,4 @@ on: - workflow_dispatch: - inputs: - commit: - description: 'Commit to build' - required: true push: paths: - "vllm-tensorizer/**" @@ -12,12 +7,21 @@ on: jobs: + get-config: + name: Get torch:base Config + uses: ./.github/workflows/read-configuration.yml + with: + path: ./.github/configurations/vllm-tensorizer.yml build: uses: ./.github/workflows/build.yml + needs: get-config + strategy: + matrix: ${{ fromJSON(needs.get-config.outputs.config) }} secrets: inherit with: image-name: vllm-tensorizer folder: vllm-tensorizer - tag-suffix: ${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} + tag-suffix: ${{ matrix.vllm-commit }} build-args: | - COMMIT_HASH=${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}} \ No newline at end of file + VLLM_COMMIT_HASH=${{ matrix.vllm-commit }} + BASE_IMAGE=${{ matrix.base-image }} diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index a7777125..7b78fa8f 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -17,11 +17,11 @@ RUN apt-get -qq update && \ FROM alpine/git:2.36.3 AS vllm-downloader WORKDIR /git -ARG COMMIT_HASH +ARG VLLM_COMMIT_HASH RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \ https://github.com/vllm-project/vllm.git && \ cd vllm && \ - git checkout "${COMMIT_HASH}" && \ + git checkout "${VLLM_COMMIT_HASH}" && \ git submodule update --init --recursive --jobs 8 \ --depth 1 --filter=blob:none @@ -65,4 +65,4 @@ RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \ rm /tmp/constraints.txt -EXPOSE 8080 \ No newline at end of file +EXPOSE 8080 From 26479bb91b6d77959dde9a4b634698050db0f26d Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Tue, 17 Jun 2025 14:56:39 -0400 Subject: [PATCH 39/42] style(vllm-tensorizer): Rename build stage Co-authored-by: Eta --- .github/workflows/vllm-tensorizer.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index 3615e68f..fa14adb3 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -8,7 +8,7 @@ on: jobs: get-config: - name: Get torch:base Config + name: Get vllm-tensorizer config uses: ./.github/workflows/read-configuration.yml with: path: ./.github/configurations/vllm-tensorizer.yml From ee71e132d3fe78172638171f1a95103a40929ff1 Mon Sep 17 00:00:00 2001 From: Justin Perlman Date: Tue, 17 Jun 2025 15:17:53 -0400 Subject: [PATCH 40/42] feat(vllm-tensorizer): Install OpenAI-compatible server dependencies Co-authored-by: Eta --- vllm-tensorizer/Dockerfile | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 7b78fa8f..fafc1382 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -55,14 +55,26 @@ FROM ${BASE_IMAGE} AS base WORKDIR /workspace -RUN apt-get -qq update && apt-get install -y --no-install-recommends curl && apt-get clean +RUN apt-get -qq update && apt-get install -y --no-install-recommends curl libsodium23 && apt-get clean RUN --mount=type=bind,from=freezer,target=/tmp/frozen \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \ - python3 -m pip install --no-cache-dir /tmp/wheels/*.whl -c /tmp/constraints.txt && \ - rm /tmp/constraints.txt + python3 -m pip install --no-cache-dir "$(printf '%s[tensorizer]' /tmp/wheels/*.whl)" -c /tmp/constraints.txt + +# Copied from vLLM's Dockerfile +ARG TARGETPLATFORM +RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + python3 -m pip install --no-cache-dir \ + accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' \ + boto3 runai-model-streamer runai-model-streamer[s3] -c /tmp/constraints.txt; \ + else \ + python3 -m pip install --no-cache-dir \ + accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.3' 'timm==0.9.10' \ + boto3 runai-model-streamer runai-model-streamer[s3] -c /tmp/constraints.txt; \ + fi && \ + rm /tmp/constraints.txt EXPOSE 8080 From 6ec3cc6aae5261a31701b2d770829d9487855e89 Mon Sep 17 00:00:00 2001 From: Eta Date: Tue, 17 Jun 2025 16:03:57 -0500 Subject: [PATCH 41/42] feat(vllm-tensorizer): Add `flashinfer` build, plus misc. minor changes This commit additionally renames VLLM_COMMIT_HASH to VLLM_COMMIT, makes git clones more efficient, and reformats some YAML lists. --- .github/configurations/vllm-tensorizer.yml | 8 +++- .github/workflows/vllm-tensorizer.yml | 3 +- vllm-tensorizer/Dockerfile | 47 ++++++++++++++++++---- 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/.github/configurations/vllm-tensorizer.yml b/.github/configurations/vllm-tensorizer.yml index 5caaad6a..39f2ad1e 100644 --- a/.github/configurations/vllm-tensorizer.yml +++ b/.github/configurations/vllm-tensorizer.yml @@ -1,2 +1,6 @@ -vllm-commit: ['b6553be1bc75f046b00046a4ad7576364d03c835'] -base-image: ['ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1'] +vllm-commit: + - 'b6553be1bc75f046b00046a4ad7576364d03c835' +flashinfer-commit: + - 'v0.2.6.post1' +base-image: + - 'ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1' diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml index fa14adb3..b73a3fe6 100644 --- a/.github/workflows/vllm-tensorizer.yml +++ b/.github/workflows/vllm-tensorizer.yml @@ -23,5 +23,6 @@ jobs: folder: vllm-tensorizer tag-suffix: ${{ matrix.vllm-commit }} build-args: | - VLLM_COMMIT_HASH=${{ matrix.vllm-commit }} + VLLM_COMMIT=${{ matrix.vllm-commit }} + FLASHINFER_COMMIT=${{ matrix.flashinfer-commit }} BASE_IMAGE=${{ matrix.base-image }} diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index fafc1382..05bc5eb8 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -15,20 +15,35 @@ RUN apt-get -qq update && \ apt-get clean && \ pip3 install -U --no-cache-dir pip packaging setuptools wheel setuptools_scm regex +# Create the /wheels directory +WORKDIR /wheels + +WORKDIR /workspace + + FROM alpine/git:2.36.3 AS vllm-downloader WORKDIR /git -ARG VLLM_COMMIT_HASH -RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \ - https://github.com/vllm-project/vllm.git && \ +ARG VLLM_COMMIT +RUN git clone --filter=tree:0 --no-single-branch --no-checkout \ + https://github.com/vllm-project/vllm && \ cd vllm && \ - git checkout "${VLLM_COMMIT_HASH}" && \ + git checkout "${VLLM_COMMIT}" && \ git submodule update --init --recursive --jobs 8 \ - --depth 1 --filter=blob:none + --depth 1 --filter=tree:0 -FROM builder-base AS vllm-builder -WORKDIR /workspace +FROM alpine/git:2.36.3 AS flashinfer-downloader +WORKDIR /git +ARG FLASHINFER_COMMIT +RUN git clone --filter=tree:0 --no-single-branch --no-checkout \ + https://github.com/flashinfer-ai/flashinfer && \ + cd flashinfer && \ + git checkout "${FLASHINFER_COMMIT}" && \ + git submodule update --init --recursive --jobs 8 \ + --depth 1 --filter=tree:0 + +FROM builder-base AS vllm-builder RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \ --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \ @@ -51,6 +66,21 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw WORKDIR /wheels + +FROM builder-base AS flashinfer-builder +RUN --mount=type=bind,from=flashinfer-downloader,source=/git/flashinfer,target=/workspace,rw \ + --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ + /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \ + export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST/7.0 /}" && \ + python3 -m flashinfer.aot && \ + python3 -m pip wheel -w /wheels \ + -v --no-cache-dir --no-build-isolation --no-deps \ + -c /tmp/frozen/constraints.txt \ + ./ + +WORKDIR /wheels + + FROM ${BASE_IMAGE} AS base WORKDIR /workspace @@ -63,6 +93,9 @@ RUN --mount=type=bind,from=freezer,target=/tmp/frozen \ RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \ python3 -m pip install --no-cache-dir "$(printf '%s[tensorizer]' /tmp/wheels/*.whl)" -c /tmp/constraints.txt +RUN --mount=type=bind,from=flashinfer-builder,source=/wheels,target=/tmp/wheels \ + python3 -m pip install --no-cache-dir /tmp/wheels/*.whl -c /tmp/constraints.txt + # Copied from vLLM's Dockerfile ARG TARGETPLATFORM From 219356795946da38dd40b3af9b3d334542364741 Mon Sep 17 00:00:00 2001 From: Eta Date: Tue, 17 Jun 2025 16:25:38 -0500 Subject: [PATCH 42/42] fix(vllm-tensorizer): Use POSIX `sh`-safe string substitution --- vllm-tensorizer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile index 05bc5eb8..8ce6ad15 100644 --- a/vllm-tensorizer/Dockerfile +++ b/vllm-tensorizer/Dockerfile @@ -71,7 +71,7 @@ FROM builder-base AS flashinfer-builder RUN --mount=type=bind,from=flashinfer-downloader,source=/git/flashinfer,target=/workspace,rw \ --mount=type=bind,from=freezer,target=/tmp/frozen,rw \ /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \ - export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST/7.0 /}" && \ + export TORCH_CUDA_ARCH_LIST="$(echo "${TORCH_CUDA_ARCH_LIST}" | sed 's@[67]\.0 \+@@g')" && \ python3 -m flashinfer.aot && \ python3 -m pip wheel -w /wheels \ -v --no-cache-dir --no-build-isolation --no-deps \