coreweave · Eta0 · Jun 18, 2025 · Jun 6, 2025 · Jun 6, 2025 · Jun 6, 2025
diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml
@@ -18,6 +18,7 @@ jobs:
     with:
       image-name: vllm-tensorizer
       folder: vllm-tensorizer
-      tag-suffix: ${{ inputs.commit || '19307ba71ddeb7e1cc6aec3c1baa8b50d59c1beb'}}
+      tag-suffix: ${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}}
       build-args: |
-        COMMIT_HASH=${{ inputs.commit || '19307ba71ddeb7e1cc6aec3c1baa8b50d59c1beb'}}
+        COMMIT_HASH=${{ inputs.commit || 'b6553be1bc75f046b00046a4ad7576364d03c835'}}
+        TRITON_COMMIT=96316ce5
diff --git a/.gitignore b/.gitignore
@@ -162,3 +162,6 @@ flycheck_*.el
 .env*
 .environment
 .environment*
+
+# JetBrains Idea files
+.idea/
diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile
@@ -1,66 +1,57 @@
-ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-22.04-58a49a2-base-cuda12.1.1-torch2.1.2-vision0.16.2-audio2.1.2-flash_attn2.4.2"
-
-FROM scratch as freezer
+ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1"
+FROM scratch AS freezer
 WORKDIR /
 COPY --chmod=755 freeze.sh /
 
-FROM ${BASE_IMAGE} as builder-base
-
-ARG MAX_JOBS=""
-
-# Dependencies requiring NVCC are built ahead of time in a separate stage
-# so that the ~2 GiB dev library installations don't have to be included
-# in the final image.
-RUN export \
-      CUDA_MAJOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f1) \
-      CUDA_MINOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f2) && \
-    export \
-      CUDA_PACKAGE_VERSION="${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" && \
-    apt-get -qq update && apt-get install -y --no-install-recommends \
-      cuda-nvcc-${CUDA_PACKAGE_VERSION} \
-      cuda-nvml-dev-${CUDA_PACKAGE_VERSION} \
-      libcurand-dev-${CUDA_PACKAGE_VERSION} \
-      libcublas-dev-${CUDA_PACKAGE_VERSION} \
-      libcusparse-dev-${CUDA_PACKAGE_VERSION} \
-      libcusolver-dev-${CUDA_PACKAGE_VERSION} \
-      cuda-nvprof-${CUDA_PACKAGE_VERSION} \
-      cuda-profiler-api-${CUDA_PACKAGE_VERSION} \
-      libaio-dev \
-      ninja-build && \
-    apt-get clean
+FROM ${BASE_IMAGE} AS builder-base
+
+ARG MAX_JOBS="16"
 
 RUN ldconfig
 
 RUN apt-get -qq update && \
     apt-get -qq install -y --no-install-recommends \
-      python3-pip git ninja-build && \
+      python3-pip git ninja-build cmake && \
     apt-get clean && \
-    pip3 install -U --no-cache-dir pip packaging setuptools wheel
+    pip3 install -U --no-cache-dir pip packaging setuptools wheel setuptools_scm regex
 
-FROM alpine/git:2.36.3 as vllm-downloader
+FROM alpine/git:2.36.3 AS vllm-downloader
 WORKDIR /git
 ARG COMMIT_HASH
 RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \
-      https://github.com/coreweave/vllm.git && \
+      https://github.com/vllm-project/vllm.git && \
     cd vllm && \
     git checkout "${COMMIT_HASH}" && \
     git submodule update --init --recursive --jobs 8 \
       --depth 1 --filter=blob:none
 
-FROM builder-base as vllm-builder
+
+FROM builder-base AS vllm-builder
 WORKDIR /workspace
+
 RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \
     --mount=type=bind,from=freezer,target=/tmp/frozen,rw \
     /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \
-    LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \
+    if [ -z "$MAX_JOBS" ]; then unset MAX_JOBS; fi && \
+    python3 -m pip install --no-cache-dir py-cpuinfo && \
+    if [ -f 'use_existing_torch.py' ]; then \
+      python3 use_existing_torch.py; \
+    else \
+      git cat-file blob \
+        e489ad7a210f4234db696d1f2749d5f3662fa65b:use_existing_torch.py \
+        | python3 -; \
+    fi && \
+    USE_CUDNN=1 USE_CUSPARSELT=1 \
+    LIBRARY_PATH="/usr/local/cuda/lib64:${LIBRARY_PATH:+:$LIBRARY_PATH}" \
+    CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
       python3 -m pip wheel -w /wheels \
       -v --no-cache-dir --no-build-isolation --no-deps \
       -c /tmp/frozen/constraints.txt \
       ./
 
 WORKDIR /wheels
 
-FROM ${BASE_IMAGE} as base
+FROM ${BASE_IMAGE} AS base
 
 WORKDIR /workspace
 
@@ -69,10 +60,6 @@ RUN apt-get -qq update && apt-get install -y --no-install-recommends curl && apt
 RUN --mount=type=bind,from=freezer,target=/tmp/frozen \
     /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt
 
-RUN python3 -m pip install --no-cache-dir \
-      "fschat[model_worker] == 0.2.30" "triton == 2.1.0" \
-      -c /tmp/constraints.txt
-
 RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \
     python3 -m pip install --no-cache-dir /tmp/wheels/*.whl -c /tmp/constraints.txt && \
     rm /tmp/constraints.txt