invoke-ai · heathen711 · Jun 29, 2025 · Jun 29, 2025 · Jul 3, 2025 · Jul 3, 2025
@@ -39,6 +39,18 @@ jobs:
       - name: checkout
         uses: actions/checkout@v4
 
+      - name: Free up more disk space on the runner
+        # https://github.com/actions/runner-images/issues/2840#issuecomment-1284059930
+        run: |
+          echo "----- Free space before cleanup"
+          df -h
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+          sudo swapoff /mnt/swapfile
+          sudo rm -rf /mnt/swapfile
+          echo "----- Free space after cleanup"
+          df -h
+
       - name: check for changed files
         if: ${{ inputs.always_run != true }}
         id: changed-files

@@ -44,7 +44,6 @@ ENV \
     UV_MANAGED_PYTHON=1 \
     UV_LINK_MODE=copy \
     UV_PROJECT_ENVIRONMENT=/opt/venv \
-    UV_INDEX="https://download.pytorch.org/whl/cu124" \
     INVOKEAI_ROOT=/invokeai \
     INVOKEAI_HOST=0.0.0.0 \
     INVOKEAI_PORT=9090 \
@@ -75,19 +74,17 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,source=uv.lock,target=uv.lock \
     # this is just to get the package manager to recognize that the project exists, without making changes to the docker layer
     --mount=type=bind,source=invokeai/version,target=invokeai/version \
-    if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then UV_INDEX="https://download.pytorch.org/whl/cpu"; \
-    elif [ "$GPU_DRIVER" = "rocm" ]; then UV_INDEX="https://download.pytorch.org/whl/rocm6.2"; \
-    fi && \
-    uv sync --frozen
-
-# build patchmatch
-RUN cd /usr/lib/$(uname -p)-linux-gnu/pkgconfig/ && ln -sf opencv4.pc opencv.pc
-RUN python -c "from patchmatch import patch_match"
+    ulimit -n 30000 && \
+    uv sync --group $GPU_DRIVER --frozen
 
 # Link amdgpu.ids for ROCm builds
 # contributed by https://github.com/Rubonnek
 RUN mkdir -p "/opt/amdgpu/share/libdrm" &&\
-    ln -s "/usr/share/libdrm/amdgpu.ids" "/opt/amdgpu/share/libdrm/amdgpu.ids"
+    ln -s "/usr/share/libdrm/amdgpu.ids" "/opt/amdgpu/share/libdrm/amdgpu.ids" && groupadd render
+
+# build patchmatch
+RUN cd /usr/lib/$(uname -p)-linux-gnu/pkgconfig/ && ln -sf opencv4.pc opencv.pc
+RUN python -c "from patchmatch import patch_match"
 
 RUN mkdir -p ${INVOKEAI_ROOT} && chown -R ${CONTAINER_UID}:${CONTAINER_GID} ${INVOKEAI_ROOT}
 
@@ -106,8 +103,6 @@ COPY invokeai ${INVOKEAI_SRC}/invokeai
 RUN --mount=type=cache,target=/root/.cache/uv \
     --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
     --mount=type=bind,source=uv.lock,target=uv.lock \
-    if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then UV_INDEX="https://download.pytorch.org/whl/cpu"; \
-    elif [ "$GPU_DRIVER" = "rocm" ]; then UV_INDEX="https://download.pytorch.org/whl/rocm6.2"; \
-    fi && \
+    ulimit -n 30000 && \
     uv pip install -e .
 
@@ -0,0 +1,136 @@
+# syntax=docker/dockerfile:1.4
+
+#### Web UI ------------------------------------
+
+FROM docker.io/node:22-slim AS web-builder
+ENV PNPM_HOME="/pnpm"
+ENV PATH="$PNPM_HOME:$PATH"
+RUN corepack use pnpm@8.x
+RUN corepack enable
+
+WORKDIR /build
+COPY invokeai/frontend/web/ ./
+RUN --mount=type=cache,target=/pnpm/store \
+    pnpm install --frozen-lockfile
+RUN npx vite build
+
+## Backend ---------------------------------------
+
+FROM library/ubuntu:24.04
+
+ARG DEBIAN_FRONTEND=noninteractive
+RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
+RUN --mount=type=cache,target=/var/cache/apt \
+    --mount=type=cache,target=/var/lib/apt \
+    apt update && apt install -y --no-install-recommends \
+    ca-certificates \
+    git \
+    gosu \
+    libglib2.0-0 \
+    libgl1 \
+    libglx-mesa0 \
+    build-essential \
+    libopencv-dev \
+    libstdc++-10-dev \
+    wget
+
+ENV \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    VIRTUAL_ENV=/opt/venv \
+    INVOKEAI_SRC=/opt/invokeai \
+    PYTHON_VERSION=3.12 \
+    UV_PYTHON=3.12 \
+    UV_COMPILE_BYTECODE=1 \
+    UV_MANAGED_PYTHON=1 \
+    UV_LINK_MODE=copy \
+    UV_PROJECT_ENVIRONMENT=/opt/venv \
+    INVOKEAI_ROOT=/invokeai \
+    INVOKEAI_HOST=0.0.0.0 \
+    INVOKEAI_PORT=9090 \
+    PATH="/opt/venv/bin:$PATH" \
+    CONTAINER_UID=${CONTAINER_UID:-1000} \
+    CONTAINER_GID=${CONTAINER_GID:-1000}
+
+ARG GPU_DRIVER=cuda
+
+# Install `uv` for package management
+COPY --from=ghcr.io/astral-sh/uv:0.6.9 /uv /uvx /bin/
+
+# Install python & allow non-root user to use it by traversing the /root dir without read permissions
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv python install ${PYTHON_VERSION} && \
+    # chmod --recursive a+rX /root/.local/share/uv/python
+    chmod 711 /root
+
+WORKDIR ${INVOKEAI_SRC}
+
+# Install project's dependencies as a separate layer so they aren't rebuilt every commit.
+# bind-mount instead of copy to defer adding sources to the image until next layer.
+#
+# NOTE: there are no pytorch builds for arm64 + cuda, only cpu
+# x86_64/CUDA is the default
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    # this is just to get the package manager to recognize that the project exists, without making changes to the docker layer
+    --mount=type=bind,source=invokeai/version,target=invokeai/version \
+    ulimit -n 30000 && \
+    uv sync --group $GPU_DRIVER --frozen
+
+RUN --mount=type=cache,target=/var/cache/apt \
+    --mount=type=cache,target=/var/lib/apt \
+    if [ "$GPU_DRIVER" = "rocm" ]; then \
+    wget -O /tmp/amdgpu-install.deb \
+    https://repo.radeon.com/amdgpu-install/6.3.4/ubuntu/noble/amdgpu-install_6.3.60304-1_all.deb && \
+    apt install -y /tmp/amdgpu-install.deb && \
+    apt update && \
+    amdgpu-install --usecase=rocm -y && \
+    apt-get autoclean && \
+    apt clean && \
+    rm -rf /tmp/* /var/tmp/* && \
+    usermod -a -G render ubuntu && \
+    usermod -a -G video ubuntu && \
+    echo "\\n/opt/rocm/lib\\n/opt/rocm/lib64" >> /etc/ld.so.conf.d/rocm.conf && \
+    ldconfig && \
+    update-alternatives --auto rocm; \
+    fi
+
+## Heathen711: Leaving this for review input, will remove before merge
+# RUN --mount=type=cache,target=/var/cache/apt \
+#     --mount=type=cache,target=/var/lib/apt \
+#     if [ "$GPU_DRIVER" = "rocm" ]; then \
+#     groupadd render && \
+#     usermod -a -G render ubuntu && \
+#     usermod -a -G video ubuntu; \
+#     fi
+
+## Link amdgpu.ids for ROCm builds
+## contributed by https://github.com/Rubonnek
+# RUN mkdir -p "/opt/amdgpu/share/libdrm" &&\
+#     ln -s "/usr/share/libdrm/amdgpu.ids" "/opt/amdgpu/share/libdrm/amdgpu.ids"
+
+# build patchmatch
+RUN cd /usr/lib/$(uname -p)-linux-gnu/pkgconfig/ && ln -sf opencv4.pc opencv.pc
+RUN python -c "from patchmatch import patch_match"
+
+RUN mkdir -p ${INVOKEAI_ROOT} && chown -R ${CONTAINER_UID}:${CONTAINER_GID} ${INVOKEAI_ROOT}
+
+COPY docker/docker-entrypoint.sh ./
+ENTRYPOINT ["/opt/invokeai/docker-entrypoint.sh"]
+CMD ["invokeai-web"]
+
+# --link requires buldkit w/ dockerfile syntax 1.4, does not work with podman
+COPY --link --from=web-builder /build/dist ${INVOKEAI_SRC}/invokeai/frontend/web/dist
+
+# add sources last to minimize image changes on code changes
+COPY invokeai ${INVOKEAI_SRC}/invokeai
+
+# this should not increase image size because we've already installed dependencies
+# in a previous layer
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    ulimit -n 30000 && \
+    uv pip install -e .
+
@@ -47,8 +47,12 @@ services:
 
   invokeai-rocm:
     <<: *invokeai
-    devices:
-      - /dev/kfd:/dev/kfd
-      - /dev/dri:/dev/dri
+    environment:
+      # if set, CONTAINER_INVOKEAI_ROOT will override the Invoke runtime directory location *inside* the container
+      - INVOKEAI_ROOT=${CONTAINER_INVOKEAI_ROOT:-/invokeai}
+      - HF_HOME
+      - AMD_VISIBLE_DEVICES=all
+      - RENDER_GROUP_ID=993
+    runtime: amd
     profiles:
       - rocm
@@ -21,6 +21,13 @@ _=$(id ${USER} 2>&1) || useradd -u ${USER_ID} ${USER}
 # ensure the UID is correct
 usermod -u ${USER_ID} ${USER} 1>/dev/null
 
+## ROCM specific configuration
+# render group within the container must match the host render group
+# otherwise the container will not be able to access the host GPU.
+groupmod -g ${RENDER_GROUP_ID:-993} render
+usermod -a -G render ${USER}
+usermod -a -G video ${USER}
+
 ### Set the $PUBLIC_KEY env var to enable SSH access.
 # We do not install openssh-server in the image by default to avoid bloat.
 # but it is useful to have the full SSH server e.g. on Runpod.

@@ -13,7 +13,7 @@ run() {
 
   # parse .env file for build args
   build_args=$(awk '$1 ~ /=[^$]/ && $0 !~ /^#/ {print "--build-arg " $0 " "}' .env) &&
-  profile="$(awk -F '=' '/GPU_DRIVER/ {print $2}' .env)"
+  profile="$(awk -F '=' '/GPU_DRIVER=/ {print $2}' .env)"
 
   # default to 'cuda' profile
   [[ -z "$profile" ]] && profile="cuda"
@@ -30,7 +30,7 @@ run() {
 
   printf "%s\n" "starting service $service_name"
   docker compose --profile "$profile" up -d "$service_name"
-  docker compose logs -f
+  docker compose --profile "$profile" logs -f
 }
 
 run
@@ -73,16 +73,32 @@ dependencies = [
   "pypatchmatch",
   "python-multipart",
   "requests",
-  "semver~=3.0.1"
+  "semver~=3.0.1",
 ]
 
+[dependency-groups]
+cpu = ["torch==2.7.1+cpu", "torchvision==0.22.1+cpu"]
+cuda = ["torch==2.7.1+cu128", "torchvision==0.22.1+cu128"]
+rocm = ["torch==2.7.1+rocm6.3", "torchvision==0.22.1+rocm6.3"]
+
 [project.optional-dependencies]
 "xformers" = [
   # Core generation dependencies, pinned for reproducible builds.
   "xformers>=0.0.28.post1; sys_platform!='darwin'",
   # torch 2.4+cu carries its own triton dependency
 ]
 
+# These enable the usage of installing the package with specific support.
+# uv pip install .[rocm] --python 3.12 --python-preference only-managed --force-reinstall --index-strategy unsafe-best-match
+# Problem is that these break `uv lock --index-strategy unsafe-best-match`
+# This does work though, as the pyproject.toml has the indexes defined.
+# uv pip install . torch==2.7.1+rocm6.3 --force-reinstall --index-strategy unsafe-best-match
+# Maybe we update the docs to show these instead of the --index way?
+
+# cpu = ["torch==2.7.1+cpu"]
+# cuda = ["torch==2.7.1+cu128"]
+# rocm = ["torch==2.7.1+rocm6.3"]
+
 "onnx" = ["onnxruntime"]
 "onnx-cuda" = ["onnxruntime-gpu"]
 "onnx-directml" = ["onnxruntime-directml"]
@@ -113,6 +129,24 @@ dependencies = [
 # Prevent opencv-python from ever being chosen during dependency resolution.
 # This prevents conflicts with opencv-contrib-python, which Invoke requires.
 override-dependencies = ["opencv-python; sys_platform=='never'"]
+conflicts = [[{ group = "cpu" }, { group = "cuda" }, { group = "rocm" }]]
+
+# This will cause: `uv lock --index-strategy unsafe-best-match` to be needed for future locks
+# If you are updating these, make sure to update the docker/Dockerfile as well.
+[[tool.uv.index]]
+name = "torch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+group = "cpu"
+
+[[tool.uv.index]]
+name = "torch-cuda"
+url = "https://download.pytorch.org/whl/cu128"
+group = "cuda"
+
+[[tool.uv.index]]
+name = "torch-rocm"
+url = "https://download.pytorch.org/whl/rocm6.3"
+group = "rocm"
 
 
 [project.scripts]