menloresearch
diff --git a/‎.github/runners/linux/Dockerfile.multi
Lines changed: 20 additions & 7 deletions b/‎.github/runners/linux/Dockerfile.multi
Lines changed: 20 additions & 7 deletions
diff --git a/‎.github/runners/windows/Dockerfile
Lines changed: 115 additions & 0 deletions b/‎.github/runners/windows/Dockerfile
Lines changed: 115 additions & 0 deletions
@@ -1,6 +1,6 @@
 # Multi-stage Dockerfile
 ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
-ARG BASE_TAG=24.03-py3
+ARG BASE_TAG=24.05-py3
 ARG DEVEL_IMAGE=devel
 
 FROM ${BASE_IMAGE}:${BASE_TAG} as base
@@ -22,6 +22,10 @@ RUN bash ./install_cmake.sh && rm install_cmake.sh
 COPY docker/common/install_ccache.sh install_ccache.sh
 RUN bash ./install_ccache.sh && rm install_ccache.sh
 
+# Only take effect when the base image is 12.4.0-devel-centos7.
+COPY docker/common/install_cuda_toolkit.sh install_cuda_toolkit.sh
+RUN bash ./install_cuda_toolkit.sh && rm install_cuda_toolkit.sh
+
 # Download & install internal TRT release
 ARG TRT_VER
 ARG CUDA_VER
@@ -48,9 +52,7 @@ RUN bash ./install_mpi4py.sh && rm install_mpi4py.sh
 # Install PyTorch
 ARG TORCH_INSTALL_TYPE="skip"
 COPY docker/common/install_pytorch.sh install_pytorch.sh
-# Apply PyTorch patch for supporting compiling with CUDA 12.4 from source codes
-COPY docker/common/pytorch_pr_116072.patch /tmp/pytorch_pr_116072.patch
-RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh /tmp/pytorch_pr_116072.patch
+RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
 
 COPY setup.py requirements.txt requirements-dev.txt ./
 
@@ -108,19 +110,30 @@ COPY tensorrt_llm tensorrt_llm
 COPY 3rdparty 3rdparty
 COPY setup.py requirements.txt requirements-dev.txt ./
 
+# Create cache directories for pip and ccache
+RUN mkdir -p /root/.cache/pip /root/.cache/ccache
+ENV CCACHE_DIR=/root/.cache/ccache
+# Build the TRT-LLM wheel
 ARG BUILD_WHEEL_ARGS="--clean --trt_root /usr/local/tensorrt --python_bindings --benchmarks"
-RUN python3 scripts/build_wheel.py ${BUILD_WHEEL_ARGS}
+RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/ccache \
+    python3 scripts/build_wheel.py ${BUILD_WHEEL_ARGS}
 
 FROM ${DEVEL_IMAGE} as release
 
+# Create a cache directory for pip
+RUN mkdir -p /root/.cache/pip
+
 WORKDIR /app/tensorrt_llm
 COPY --from=wheel /src/tensorrt_llm/build/tensorrt_llm*.whl .
-RUN pip install tensorrt_llm*.whl --extra-index-url https://pypi.nvidia.com && \
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install tensorrt_llm*.whl --extra-index-url https://pypi.nvidia.com && \
     rm tensorrt_llm*.whl
 COPY README.md ./
 COPY docs docs
 COPY cpp/include include
-RUN ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/libs")') lib && \
+RUN ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/bin")') bin && \
+    test -f bin/executorWorker && \
+    ln -sv $(python3 -c 'import site; print(f"{site.getsitepackages()[0]}/tensorrt_llm/libs")') lib && \
     test -f lib/libnvinfer_plugin_tensorrt_llm.so && \
     ln -sv lib/libnvinfer_plugin_tensorrt_llm.so lib/libnvinfer_plugin_tensorrt_llm.so.9 && \
     echo "/app/tensorrt_llm/lib" > /etc/ld.so.conf.d/tensorrt_llm.conf && \
 
@@ -0,0 +1,115 @@
+# https://learn.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2022
+
+# Use the Windows Server Core 2019 image.
+FROM mcr.microsoft.com/windows/servercore:ltsc2022 AS devel
+
+SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]
+
+# -----------------------------------------------------------------------------
+# Create a working directory
+
+WORKDIR "C:\\\\workspace"
+
+# -----------------------------------------------------------------------------
+# Install runtime dependencies
+
+COPY setup_env.ps1 C:\\workspace\\setup_env.ps1
+# TRT is installed along with build-time dependencies
+RUN C:\workspace\setup_env.ps1 -skipTRT -skipCUDNN
+RUN Remove-Item "C:\workspace\setup_env.ps1" -Force
+# If enabling CUDNN, CUDNN paths are populated in the env variable CUDNN, add it to PATH
+# RUN [Environment]::SetEnvironmentVariable('Path', $Env:Path + ';' + $Env:CUDNN, [EnvironmentVariableTarget]::Machine)
+
+# -----------------------------------------------------------------------------
+# Install build-time dependencies
+
+COPY setup_build_env.ps1 C:\\workspace\\setup_build_env.ps1
+# TRT is installed in workspace
+RUN C:\workspace\setup_build_env.ps1 -TRTPath 'C:\\workspace'
+RUN Remove-Item "C:\workspace\setup_build_env.ps1" -Force
+
+# Add binaries to Path
+RUN [Environment]::SetEnvironmentVariable('Path', $Env:Path + ';C:\Program Files\CMake\bin', [EnvironmentVariableTarget]::Machine)
+
+# -----------------------------------------------------------------------------
+
+# Install Vim (can delete this but it's nice to have)
+# and add binaries to Path
+
+RUN Invoke-WebRequest -Uri https://ftp.nluug.nl/pub/vim/pc/gvim90.exe \
+    -OutFile "install_vim.exe"; \
+    Start-Process install_vim.exe -Wait -ArgumentList '/S'; \
+    Remove-Item install_vim.exe -Force ; \
+    [Environment]::SetEnvironmentVariable('Path', $Env:Path + ';C:\Program Files (x86)\Vim\vim90', [EnvironmentVariableTarget]::Machine)
+# -----------------------------------------------------------------------------
+
+# Install Chocolatey
+# Chocolatey is a package manager for Windows
+
+# If you try to install Chocolatey 2.0.0, it fails on .NET Framework 4.8 installation
+# https://stackoverflow.com/a/76470753
+ENV chocolateyVersion=1.4.0
+
+# https://docs.chocolatey.org/en-us/choco/setup#install-with-powershell.exe
+RUN Set-ExecutionPolicy Bypass -Scope Process -Force; \
+ [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; \
+ iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
+
+# -----------------------------------------------------------------------------
+
+# Install Git via Chocolatey
+RUN choco install git -y
+
+# -----------------------------------------------------------------------------
+# Install CUDA 11.8 NVTX
+RUN Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe \
+    -OutFile cuda_11.8.0_windows_network.exe; \
+    Invoke-WebRequest -Uri https://7-zip.org/a/7zr.exe \
+    -OutFile 7zr.exe
+
+RUN .\7zr.exe e -i!'nsight_nvtx\nsight_nvtx\NVIDIA NVTX Installer.x86_64.Release.v1.21018621.Win64.msi' cuda_11.8.0_windows_network.exe ;
+
+RUN cmd.exe /S /C "msiexec.exe /i 'NVIDIA NVTX Installer.x86_64.Release.v1.21018621.Win64.msi' /norestart /quiet"
+
+RUN Remove-Item 'NVIDIA NVTX Installer.x86_64.Release.v1.21018621.Win64.msi' -Force ; \
+    Remove-Item 7zr.exe -Force ; \
+    Remove-Item cuda_11.8.0_windows_network.exe -Force
+
+# -----------------------------------------------------------------------------
+
+# Define the entry point for the docker container.
+# This entry point launches the 64-bit PowerShell developer shell.
+# We need to launch with amd64 arch otherwise Powershell defaults to x86 32-bit build commands which don't jive with CUDA
+ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\Common7\\Tools\\VsDevCmd.bat", "-arch=amd64", "&&", "powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"]
+
+# -----------------------------------------------------------------------------
+# COPY requirements-windows.txt C:\\workspace\\requirements-windows.txt
+# COPY requirements-dev-windows.txt C:\\workspace\\requirements-dev-windows.txt
+# RUN python3 -m pip --no-cache-dir install -r C:\workspace\requirements-dev-windows.txt
+# RUN Remove-Item "C:\workspace\requirements-windows.txt" -Force
+# RUN Remove-Item "C:\workspace\requirements-dev-windows.txt" -Force
+
+ADD ./requirements-dev-windows.txt ./requirements-dev-windows.txt
+ADD ./requirements-windows.txt ./requirements-windows.txt
+
+RUN python3 -m pip install --no-cache-dir -r .\requirements-dev-windows.txt
+
+
+ARG RUNNER_VERSION=2.317.0
+
+# Define the entry point for the docker container.
+# This entry point launches the 64-bit PowerShell developer shell.
+# We need to launch with amd64 arch otherwise Powershell defaults to x86 32-bit build commands which don't jive with CUDA
+# ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\Common7\\Tools\\VsDevCmd.bat", "-arch=amd64", "&&", "powershell.exe", "-NoLogo", "-ExecutionPolicy", "Bypass"]
+
+RUN Invoke-WebRequest \
+      -Uri https://github.com/actions/runner/releases/download/v$env:RUNNER_VERSION/actions-runner-win-x64-$env:RUNNER_VERSION.zip \
+      -OutFile runner.zip; \
+    Expand-Archive -Path ./runner.zip -DestinationPath ./actions-runner; \
+    Remove-Item -Path .\runner.zip;
+
+ADD runner.ps1 ./runner.ps1
+
+RUN powershell -Command New-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 -PropertyType DWORD -Force
+
+CMD ["powershell.exe", "-ExecutionPolicy", "Unrestricted", "-File", ".\\runner.ps1"]