Skip to content

Commit 497a91e

Browse files
authored
[CI] Update FlashInfer to 0.2.6.post1 (#19297)
Signed-off-by: mgoin <mgoin64@gmail.com>
1 parent 943ffa5 commit 497a91e

File tree

1 file changed

+17
-15
lines changed

1 file changed

+17
-15
lines changed

docker/Dockerfile

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -243,30 +243,32 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
243243
--extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
244244

245245
# If we need to build FlashInfer wheel before its release:
246-
# $ export FLASHINFER_ENABLE_AOT=1
247246
# $ # Note we remove 7.0 from the arch list compared to the list below, since FlashInfer only supports sm75+
248-
# $ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX'
247+
# $ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a'
249248
# $ git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
250249
# $ cd flashinfer
251-
# $ git checkout 524304395bd1d8cd7d07db083859523fcaa246a4
252-
# $ rm -rf build
253-
# $ python3 setup.py bdist_wheel --dist-dir=dist --verbose
254-
# $ ls dist
255-
# $ # upload the wheel to a public location, e.g. https://wheels.vllm.ai/flashinfer/524304395bd1d8cd7d07db083859523fcaa246a4/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl
250+
# $ git checkout v0.2.6.post1
251+
# $ python -m flashinfer.aot
252+
# $ python -m build --no-isolation --wheel
253+
# $ ls -la dist
254+
# -rw-rw-r-- 1 mgoin mgoin 205M Jun 9 18:03 flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl
255+
# $ # upload the wheel to a public location, e.g. https://wheels.vllm.ai/flashinfer/v0.2.6.post1/flashinfer_python-0.2.6.post1-cp39-abi3-linux_x86_64.whl
256256

257257
RUN --mount=type=cache,target=/root/.cache/uv \
258258
. /etc/environment && \
259259
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
260-
# FlashInfer alreary has a wheel for PyTorch 2.7.0 and CUDA 12.8. This is enough for CI use
260+
# FlashInfer already has a wheel for PyTorch 2.7.0 and CUDA 12.8. This is enough for CI use
261261
if [[ "$CUDA_VERSION" == 12.8* ]]; then \
262-
uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.5%2Bcu128torch2.7-cp38-abi3-linux_x86_64.whl; \
262+
uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl; \
263263
else \
264-
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX'; \
265-
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
266-
if [ "$CUDA_MAJOR" -lt 12 ]; then \
267-
export FLASHINFER_ENABLE_SM90=0; \
268-
fi; \
269-
uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@21ea1d2545f74782b91eb8c08fd503ac4c0743fc" ; \
264+
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0a 10.0a' && \
265+
git clone https://github.com/flashinfer-ai/flashinfer.git --single-branch --branch v0.2.6.post1 --recursive && \
266+
# Needed to build AOT kernels
267+
(cd flashinfer && \
268+
python3 -m flashinfer.aot && \
269+
uv pip install --system --no-build-isolation . \
270+
) && \
271+
rm -rf flashinfer; \
270272
fi \
271273
fi
272274
COPY examples examples

0 commit comments

Comments
 (0)