diff --git a/.github/configurations/vllm-tensorizer.yml b/.github/configurations/vllm-tensorizer.yml
new file mode 100644
index 00000000..39f2ad1e
--- /dev/null
+++ b/.github/configurations/vllm-tensorizer.yml
@@ -0,0 +1,6 @@
+vllm-commit:
+  - 'b6553be1bc75f046b00046a4ad7576364d03c835'
+flashinfer-commit:
+  - 'v0.2.6.post1'
+base-image:
+  - 'ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1'
diff --git a/.github/workflows/vllm-tensorizer.yml b/.github/workflows/vllm-tensorizer.yml
index 14d2c7db..b73a3fe6 100644
--- a/.github/workflows/vllm-tensorizer.yml
+++ b/.github/workflows/vllm-tensorizer.yml
@@ -1,9 +1,4 @@
 on:
-  workflow_dispatch:
-    inputs:
-      commit:
-        description: 'Commit to build'
-        required: true
   push:
     paths:
       - "vllm-tensorizer/**"
@@ -12,12 +7,22 @@ on:
 
 
 jobs:
+  get-config:
+    name: Get vllm-tensorizer config
+    uses: ./.github/workflows/read-configuration.yml
+    with:
+      path: ./.github/configurations/vllm-tensorizer.yml
   build:
     uses: ./.github/workflows/build.yml
+    needs: get-config
+    strategy:
+      matrix: ${{ fromJSON(needs.get-config.outputs.config) }}
     secrets: inherit
     with:
       image-name: vllm-tensorizer
       folder: vllm-tensorizer
-      tag-suffix: ${{ inputs.commit || '19307ba71ddeb7e1cc6aec3c1baa8b50d59c1beb'}}
+      tag-suffix: ${{ matrix.vllm-commit }}
       build-args: |
-        COMMIT_HASH=${{ inputs.commit || '19307ba71ddeb7e1cc6aec3c1baa8b50d59c1beb'}}
\ No newline at end of file
+        VLLM_COMMIT=${{ matrix.vllm-commit }}
+        FLASHINFER_COMMIT=${{ matrix.flashinfer-commit }}
+        BASE_IMAGE=${{ matrix.base-image }}
diff --git a/.gitignore b/.gitignore
index 9d90afc5..fee2d8d4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -162,3 +162,6 @@ flycheck_*.el
 .env*
 .environment
 .environment*
+
+# JetBrains Idea files
+.idea/
diff --git a/vllm-tensorizer/Dockerfile b/vllm-tensorizer/Dockerfile
index 9c4df618..8ce6ad15 100644
--- a/vllm-tensorizer/Dockerfile
+++ b/vllm-tensorizer/Dockerfile
@@ -1,58 +1,64 @@
-ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-22.04-58a49a2-base-cuda12.1.1-torch2.1.2-vision0.16.2-audio2.1.2-flash_attn2.4.2"
-
-FROM scratch as freezer
+ARG BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch-extras:es-compute-12.0-67208ca-nccl-cuda12.9.0-ubuntu22.04-nccl2.27.3-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1"
+FROM scratch AS freezer
 WORKDIR /
 COPY --chmod=755 freeze.sh /
 
-FROM ${BASE_IMAGE} as builder-base
-
-ARG MAX_JOBS=""
-
-# Dependencies requiring NVCC are built ahead of time in a separate stage
-# so that the ~2 GiB dev library installations don't have to be included
-# in the final image.
-RUN export \
-      CUDA_MAJOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f1) \
-      CUDA_MINOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f2) && \
-    export \
-      CUDA_PACKAGE_VERSION="${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}" && \
-    apt-get -qq update && apt-get install -y --no-install-recommends \
-      cuda-nvcc-${CUDA_PACKAGE_VERSION} \
-      cuda-nvml-dev-${CUDA_PACKAGE_VERSION} \
-      libcurand-dev-${CUDA_PACKAGE_VERSION} \
-      libcublas-dev-${CUDA_PACKAGE_VERSION} \
-      libcusparse-dev-${CUDA_PACKAGE_VERSION} \
-      libcusolver-dev-${CUDA_PACKAGE_VERSION} \
-      cuda-nvprof-${CUDA_PACKAGE_VERSION} \
-      cuda-profiler-api-${CUDA_PACKAGE_VERSION} \
-      libaio-dev \
-      ninja-build && \
-    apt-get clean
+FROM ${BASE_IMAGE} AS builder-base
+
+ARG MAX_JOBS="16"
 
 RUN ldconfig
 
 RUN apt-get -qq update && \
     apt-get -qq install -y --no-install-recommends \
-      python3-pip git ninja-build && \
+      python3-pip git ninja-build cmake && \
     apt-get clean && \
-    pip3 install -U --no-cache-dir pip packaging setuptools wheel
+    pip3 install -U --no-cache-dir pip packaging setuptools wheel setuptools_scm regex
+
+# Create the /wheels directory
+WORKDIR /wheels
+
+WORKDIR /workspace
+
 
-FROM alpine/git:2.36.3 as vllm-downloader
+FROM alpine/git:2.36.3 AS vllm-downloader
 WORKDIR /git
-ARG COMMIT_HASH
-RUN git clone --filter=blob:none --depth 1 --no-single-branch --no-checkout \
-      https://github.com/coreweave/vllm.git && \
+ARG VLLM_COMMIT
+RUN git clone --filter=tree:0 --no-single-branch --no-checkout \
+      https://github.com/vllm-project/vllm && \
     cd vllm && \
-    git checkout "${COMMIT_HASH}" && \
+    git checkout "${VLLM_COMMIT}" && \
     git submodule update --init --recursive --jobs 8 \
-      --depth 1 --filter=blob:none
+      --depth 1 --filter=tree:0
 
-FROM builder-base as vllm-builder
-WORKDIR /workspace
+
+FROM alpine/git:2.36.3 AS flashinfer-downloader
+WORKDIR /git
+ARG FLASHINFER_COMMIT
+RUN git clone --filter=tree:0 --no-single-branch --no-checkout \
+      https://github.com/flashinfer-ai/flashinfer && \
+    cd flashinfer && \
+    git checkout "${FLASHINFER_COMMIT}" && \
+    git submodule update --init --recursive --jobs 8 \
+      --depth 1 --filter=tree:0
+
+
+FROM builder-base AS vllm-builder
 RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \
     --mount=type=bind,from=freezer,target=/tmp/frozen,rw \
     /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \
-    LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" \
+    if [ -z "$MAX_JOBS" ]; then unset MAX_JOBS; fi && \
+    python3 -m pip install --no-cache-dir py-cpuinfo && \
+    if [ -f 'use_existing_torch.py' ]; then \
+      python3 use_existing_torch.py; \
+    else \
+      git cat-file blob \
+        e489ad7a210f4234db696d1f2749d5f3662fa65b:use_existing_torch.py \
+        | python3 -; \
+    fi && \
+    USE_CUDNN=1 USE_CUSPARSELT=1 \
+    LIBRARY_PATH="/usr/local/cuda/lib64:${LIBRARY_PATH:+:$LIBRARY_PATH}" \
+    CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
       python3 -m pip wheel -w /wheels \
       -v --no-cache-dir --no-build-isolation --no-deps \
       -c /tmp/frozen/constraints.txt \
@@ -60,22 +66,48 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw
 
 WORKDIR /wheels
 
-FROM ${BASE_IMAGE} as base
+
+FROM builder-base AS flashinfer-builder
+RUN --mount=type=bind,from=flashinfer-downloader,source=/git/flashinfer,target=/workspace,rw \
+    --mount=type=bind,from=freezer,target=/tmp/frozen,rw \
+    /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/frozen/constraints.txt && \
+    export TORCH_CUDA_ARCH_LIST="$(echo "${TORCH_CUDA_ARCH_LIST}" | sed 's@[67]\.0 \+@@g')" && \
+    python3 -m flashinfer.aot && \
+    python3 -m pip wheel -w /wheels \
+      -v --no-cache-dir --no-build-isolation --no-deps \
+      -c /tmp/frozen/constraints.txt \
+      ./
+
+WORKDIR /wheels
+
+
+FROM ${BASE_IMAGE} AS base
 
 WORKDIR /workspace
 
-RUN apt-get -qq update && apt-get install -y --no-install-recommends curl && apt-get clean
+RUN apt-get -qq update && apt-get install -y --no-install-recommends curl libsodium23 && apt-get clean
 
 RUN --mount=type=bind,from=freezer,target=/tmp/frozen \
     /tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /tmp/constraints.txt
 
-RUN python3 -m pip install --no-cache-dir \
-      "fschat[model_worker] == 0.2.30" "triton == 2.1.0" \
-      -c /tmp/constraints.txt
-
 RUN --mount=type=bind,from=vllm-builder,source=/wheels,target=/tmp/wheels \
-    python3 -m pip install --no-cache-dir /tmp/wheels/*.whl -c /tmp/constraints.txt && \
+    python3 -m pip install --no-cache-dir "$(printf '%s[tensorizer]' /tmp/wheels/*.whl)" -c /tmp/constraints.txt
+
+RUN --mount=type=bind,from=flashinfer-builder,source=/wheels,target=/tmp/wheels \
+    python3 -m pip install --no-cache-dir /tmp/wheels/*.whl -c /tmp/constraints.txt
+
+# Copied from vLLM's Dockerfile
+ARG TARGETPLATFORM
+
+RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+        python3 -m pip install --no-cache-dir \
+          accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' \
+          boto3 runai-model-streamer runai-model-streamer[s3] -c /tmp/constraints.txt; \
+    else \
+        python3 -m pip install --no-cache-dir \
+          accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.3' 'timm==0.9.10' \
+          boto3 runai-model-streamer runai-model-streamer[s3] -c /tmp/constraints.txt; \
+    fi && \
     rm /tmp/constraints.txt
 
-
-EXPOSE 8080
\ No newline at end of file
+EXPOSE 8080