From 468c4d34c74f9675d4820a120e81637333c244d5 Mon Sep 17 00:00:00 2001 From: Rohin Garg Date: Thu, 3 Jul 2025 18:34:55 +0000 Subject: [PATCH 01/19] init draft of ga Signed-off-by: Rohin Garg --- .github/workflows/docker-publish.yml | 69 ++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 .github/workflows/docker-publish.yml diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 00000000000..1fa444e4698 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,69 @@ +name: Build & Publish vLLM Docker + +on: + push: + branches: + - main + +permissions: + id-token: write + contents: read + +concurrency: + group: deployment + cancel-in-progress: false + +jobs: + build-and-push: + runs-on: ubuntu-22.04 + timeout-minutes: 360 + + env: + REGION: us-central1 + REPO: vllm + IMAGE: vllm-forked + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to GCP + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} + service_account: ${{ secrets.GCP_SA_EMAIL }} + + - name: "Set up Cloud SDK" + uses: "google-github-actions/setup-gcloud@v2" + with: + install_components: "beta" + + - name: Configure Docker credential helper + run: | + gcloud --quiet auth configure-docker ${REGION}-docker.pkg.dev + gcloud --quiet auth configure-docker gcr.io + + - name: Determine tags + id: tags + run: | + SHA=$(git rev-parse --short=7 HEAD) + echo "sha=$SHA" >> $GITHUB_OUTPUT + echo "tags=${SHA},latest" >> $GITHUB_OUTPUT + + - name: Build & Push to Staging, Prod, and GCR + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + platforms: linux/amd64 + push: true + build-args: | + HF_HOME=/huggingface/cache + tags: | + ${REGION}-docker.pkg.dev/${{ secrets.PROJECT_ID_STAGING }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + ${REGION}-docker.pkg.dev/${{ secrets.PROJECT_ID_STAGING }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + ${REGION}-docker.pkg.dev/${{ secrets.PROJECT_ID_PROD }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + ${REGION}-docker.pkg.dev/${{ secrets.PROJECT_ID_PROD }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + gcr.io/${{ secrets.PROJECT_ID_PROD }}/vllm/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + gcr.io/${{ secrets.PROJECT_ID_PROD }}/vllm/${{ env.IMAGE }}:latest + From c4bb71533966644faf878b8c4467999598206177 Mon Sep 17 00:00:00 2001 From: Rohin Garg Date: Fri, 4 Jul 2025 00:38:40 +0000 Subject: [PATCH 02/19] finished ga/dockerfile setup Signed-off-by: Rohin Garg --- .github/workflows/docker-publish.yml | 16 +++++++++------- docker/Dockerfile_internal | 10 ++++++++++ 2 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 docker/Dockerfile_internal diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 1fa444e4698..1f000664a2e 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -22,6 +22,8 @@ jobs: REGION: us-central1 REPO: vllm IMAGE: vllm-forked + STAGING_PROJECT: character-ai-staging + PROD_PROJECT: character-ai steps: - name: Checkout code @@ -54,16 +56,16 @@ jobs: uses: docker/build-push-action@v5 with: context: . - file: ./Dockerfile + file: ./docker/Dockerfile_internal platforms: linux/amd64 push: true build-args: | HF_HOME=/huggingface/cache tags: | - ${REGION}-docker.pkg.dev/${{ secrets.PROJECT_ID_STAGING }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} - ${REGION}-docker.pkg.dev/${{ secrets.PROJECT_ID_STAGING }}/${{ env.REPO }}/${{ env.IMAGE }}:latest - ${REGION}-docker.pkg.dev/${{ secrets.PROJECT_ID_PROD }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} - ${REGION}-docker.pkg.dev/${{ secrets.PROJECT_ID_PROD }}/${{ env.REPO }}/${{ env.IMAGE }}:latest - gcr.io/${{ secrets.PROJECT_ID_PROD }}/vllm/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} - gcr.io/${{ secrets.PROJECT_ID_PROD }}/vllm/${{ env.IMAGE }}:latest + ${REGION}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + ${REGION}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + ${REGION}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + ${REGION}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:latest diff --git a/docker/Dockerfile_internal b/docker/Dockerfile_internal new file mode 100644 index 00000000000..73f82ba796d --- /dev/null +++ b/docker/Dockerfile_internal @@ -0,0 +1,10 @@ +FROM us-central1-docker.pkg.dev/character-ai/vllm/vllm-forked:latest as builder + +COPY . /tmp/vllm +RUN pip uninstall -y vllm +RUN pip install /tmp/vllm +RUN rm -rf /tmp/vllm +RUN python3 -c "import vllm; print('Custom vLLM loaded successfully')" + + +ENV HF_HOME=/huggingface/cache From 35329a324fa009aadc2a3e5cec561dc7b41d253c Mon Sep 17 00:00:00 2001 From: Rohin Garg Date: Mon, 7 Jul 2025 03:22:27 +0000 Subject: [PATCH 03/19] add manual trigger for workflow Signed-off-by: Rohin Garg --- .github/workflows/docker-publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 1f000664a2e..b46394a0ec1 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -4,6 +4,7 @@ on: push: branches: - main + workflow_dispatch: permissions: id-token: write From 97c3441db8f59007a901a34b17e4b51ce80cf8e1 Mon Sep 17 00:00:00 2001 From: Rohin Garg Date: Mon, 7 Jul 2025 16:50:14 +0000 Subject: [PATCH 04/19] temp add pull req as trigger Signed-off-by: Rohin Garg --- .github/workflows/docker-publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index b46394a0ec1..5ec66b4b621 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -4,6 +4,7 @@ on: push: branches: - main + pull_request: workflow_dispatch: permissions: From 99adfb3b7a82d2b0f5e1367f06405d638182e99d Mon Sep 17 00:00:00 2001 From: Rohin Garg Date: Mon, 7 Jul 2025 16:56:19 +0000 Subject: [PATCH 05/19] fixed REGION env variable Signed-off-by: Rohin Garg --- .github/workflows/docker-publish.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 5ec66b4b621..60272712ebb 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -21,7 +21,7 @@ jobs: timeout-minutes: 360 env: - REGION: us-central1 + REGION : us-central1 REPO: vllm IMAGE: vllm-forked STAGING_PROJECT: character-ai-staging @@ -44,7 +44,7 @@ jobs: - name: Configure Docker credential helper run: | - gcloud --quiet auth configure-docker ${REGION}-docker.pkg.dev + gcloud --quiet auth configure-docker ${{ env.REGION }}-docker.pkg.dev gcloud --quiet auth configure-docker gcr.io - name: Determine tags @@ -64,10 +64,10 @@ jobs: build-args: | HF_HOME=/huggingface/cache tags: | - ${REGION}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} - ${REGION}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest - ${REGION}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} - ${REGION}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + ${{ env.REGION }}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:latest From f97a9390c9dd615ee071e84f804c6add5672223f Mon Sep 17 00:00:00 2001 From: Rohin Garg Date: Mon, 7 Jul 2025 17:10:35 +0000 Subject: [PATCH 06/19] add cleanup steps to free up disk space Signed-off-by: Rohin Garg --- .github/workflows/docker-publish.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 60272712ebb..0216642a10c 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -31,6 +31,21 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Free up disk space + run: | + echo "Disk usage before cleanup:" + df -h + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo apt-get clean + sudo docker system prune -af + sudo docker volume prune -f + echo "Disk usage after cleanup:" + df -h + - name: Authenticate to GCP uses: google-github-actions/auth@v2 with: From 7da60f098aa219c98c8af561dcced3ddb45c51f3 Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Mon, 7 Jul 2025 10:39:01 -0700 Subject: [PATCH 07/19] turned off cache to optimize disk usage --- .github/workflows/docker-publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 0216642a10c..b10bbec1ad1 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -76,6 +76,7 @@ jobs: file: ./docker/Dockerfile_internal platforms: linux/amd64 push: true + no-cache: true build-args: | HF_HOME=/huggingface/cache tags: | From 8fe521cdc9549fa7e1067ce40353f49e5d684563 Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Mon, 7 Jul 2025 10:46:48 -0700 Subject: [PATCH 08/19] Update Dockerfile_internal to copy to /mnt/ --- docker/Dockerfile_internal | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile_internal b/docker/Dockerfile_internal index 73f82ba796d..ea6aff8c434 100644 --- a/docker/Dockerfile_internal +++ b/docker/Dockerfile_internal @@ -1,9 +1,9 @@ FROM us-central1-docker.pkg.dev/character-ai/vllm/vllm-forked:latest as builder -COPY . /tmp/vllm +COPY . /mnt/vllm RUN pip uninstall -y vllm -RUN pip install /tmp/vllm -RUN rm -rf /tmp/vllm +RUN pip install /mnt/vllm +RUN rm -rf /mnt/vllm RUN python3 -c "import vllm; print('Custom vLLM loaded successfully')" From 63428b507d223dd9e5024a9b7c1e6e2bccca5516 Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Mon, 7 Jul 2025 11:23:40 -0700 Subject: [PATCH 09/19] use larger runner --- .github/workflows/docker-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index b10bbec1ad1..57260967030 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -17,7 +17,7 @@ concurrency: jobs: build-and-push: - runs-on: ubuntu-22.04 + runs-on: ubuntu-22.04-16core timeout-minutes: 360 env: From eae87fd9236555b4f495758e0e14931ec45b8c7c Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Mon, 7 Jul 2025 11:35:53 -0700 Subject: [PATCH 10/19] rollback runner change and make cleaning more aggressive --- .github/workflows/docker-publish.yml | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 57260967030..f9f9ca9bd6c 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -17,7 +17,7 @@ concurrency: jobs: build-and-push: - runs-on: ubuntu-22.04-16core + runs-on: ubuntu-22.04 timeout-minutes: 360 env: @@ -35,14 +35,35 @@ jobs: run: | echo "Disk usage before cleanup:" df -h + + # Remove unnecessary packages and files + sudo apt-get remove -y '^aspnetcore-.*' '^dotnet-.*' '^llvm-.*' '^php.*' '^mysql-.*' '^postgresql-.*' + sudo apt-get autoremove -y + sudo apt-get autoclean + + # Remove large directories sudo rm -rf /usr/local/lib/android sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf /usr/local/share/boost sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/local/graalvm/ + sudo rm -rf /usr/local/.ghcup/ + sudo rm -rf /usr/local/share/powershell + sudo rm -rf /usr/local/share/chromium + sudo rm -rf /usr/local/lib/node_modules + + # Clean package caches sudo apt-get clean - sudo docker system prune -af - sudo docker volume prune -f + + # Clean Docker + sudo docker system prune -af --volumes + sudo docker builder prune -af + + # Clean temp files + sudo rm -rf /tmp/* + sudo rm -rf /var/tmp/* + echo "Disk usage after cleanup:" df -h From a3616b6098f667445668fb17bc8fc3b701b45adf Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Mon, 7 Jul 2025 12:26:54 -0700 Subject: [PATCH 11/19] add more disk cleanup steps --- .github/workflows/docker-publish.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index f9f9ca9bd6c..af7b70bf215 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -37,11 +37,26 @@ jobs: df -h # Remove unnecessary packages and files + sudo apt install aptitude -y >/dev/null 2>&1 + sudo aptitude purge aria2 ansible azure-cli shellcheck rpm xorriso zsync \ + esl-erlang firefox gfortran-8 gfortran-9 google-chrome-stable \ + imagemagick \ + libmagickcore-dev libmagickwand-dev libmagic-dev ant ant-optional kubectl \ + mercurial apt-transport-https mono-complete libmysqlclient \ + unixodbc-dev yarn chrpath libssl-dev libxft-dev \ + libfreetype6 libfreetype6-dev libfontconfig1 libfontconfig1-dev \ + snmp pollinate libpq-dev postgresql-client powershell ruby-full \ + sphinxsearch subversion mongodb-org azure-cli microsoft-edge-stable \ + -y -f >/dev/null 2>&1 + sudo aptitude purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true + sudo apt purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true + sudo apt-get remove -y '^aspnetcore-.*' '^dotnet-.*' '^llvm-.*' '^php.*' '^mysql-.*' '^postgresql-.*' sudo apt-get autoremove -y sudo apt-get autoclean # Remove large directories + sudo rm -rf /opt/hostedtoolcache sudo rm -rf /usr/local/lib/android sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc @@ -66,6 +81,12 @@ jobs: echo "Disk usage after cleanup:" df -h + + - name: Check disk space + run: | + sudo dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -nr | head + df . -h + sudo du /usr/ -hx -d 4 --threshold=1G | sort -hr | head - name: Authenticate to GCP uses: google-github-actions/auth@v2 From 42b88b1e0fe884b6e97ff38196c4afc27168e271 Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Mon, 7 Jul 2025 13:25:42 -0700 Subject: [PATCH 12/19] update to use correct larger runner --- .github/workflows/docker-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index af7b70bf215..e28782d5d37 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -17,7 +17,7 @@ concurrency: jobs: build-and-push: - runs-on: ubuntu-22.04 + runs-on: large_ubuntu_4cpu timeout-minutes: 360 env: From f86b244bd00fbb155f4dc639243f2daf7daf9f97 Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Mon, 7 Jul 2025 14:54:29 -0700 Subject: [PATCH 13/19] add caching back, clean up the cleaning code --- .github/workflows/docker-publish.yml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index e28782d5d37..c45ec7b0241 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -35,21 +35,6 @@ jobs: run: | echo "Disk usage before cleanup:" df -h - - # Remove unnecessary packages and files - sudo apt install aptitude -y >/dev/null 2>&1 - sudo aptitude purge aria2 ansible azure-cli shellcheck rpm xorriso zsync \ - esl-erlang firefox gfortran-8 gfortran-9 google-chrome-stable \ - imagemagick \ - libmagickcore-dev libmagickwand-dev libmagic-dev ant ant-optional kubectl \ - mercurial apt-transport-https mono-complete libmysqlclient \ - unixodbc-dev yarn chrpath libssl-dev libxft-dev \ - libfreetype6 libfreetype6-dev libfontconfig1 libfontconfig1-dev \ - snmp pollinate libpq-dev postgresql-client powershell ruby-full \ - sphinxsearch subversion mongodb-org azure-cli microsoft-edge-stable \ - -y -f >/dev/null 2>&1 - sudo aptitude purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true - sudo apt purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true sudo apt-get remove -y '^aspnetcore-.*' '^dotnet-.*' '^llvm-.*' '^php.*' '^mysql-.*' '^postgresql-.*' sudo apt-get autoremove -y @@ -118,7 +103,6 @@ jobs: file: ./docker/Dockerfile_internal platforms: linux/amd64 push: true - no-cache: true build-args: | HF_HOME=/huggingface/cache tags: | From 954ec6058e5eddc971427af5c1bcbcee20100d62 Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Tue, 8 Jul 2025 12:55:28 -0700 Subject: [PATCH 14/19] add fail-fast false --- .github/workflows/docker-publish.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index c45ec7b0241..2c97d26bcf2 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -17,6 +17,8 @@ concurrency: jobs: build-and-push: + strategy: + fail-fast: false runs-on: large_ubuntu_4cpu timeout-minutes: 360 From 7477482c2c10732a1aa0d20070d31d651df5b76b Mon Sep 17 00:00:00 2001 From: Rohin Garg Date: Wed, 9 Jul 2025 10:22:58 -0700 Subject: [PATCH 15/19] added flag to prevent recompilation of CUDA kernels Signed-off-by: Rohin Garg --- MANIFEST.in | 2 +- docker/Dockerfile_internal | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 82fd22b845f..33794647a2e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,4 +7,4 @@ include requirements/cpu.txt include CMakeLists.txt recursive-include cmake * -recursive-include csrc * +recursive-include csrc * \ No newline at end of file diff --git a/docker/Dockerfile_internal b/docker/Dockerfile_internal index ea6aff8c434..ecb9b7aacfe 100644 --- a/docker/Dockerfile_internal +++ b/docker/Dockerfile_internal @@ -2,7 +2,7 @@ FROM us-central1-docker.pkg.dev/character-ai/vllm/vllm-forked:latest as builder COPY . /mnt/vllm RUN pip uninstall -y vllm -RUN pip install /mnt/vllm +RUN VLLM_USE_PRECOMPILED=1 pip install /mnt/vllm RUN rm -rf /mnt/vllm RUN python3 -c "import vllm; print('Custom vLLM loaded successfully')" From 66ee42f41bcb8081e7fcdf1f09c7c6c267b31427 Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Wed, 16 Jul 2025 10:06:43 -0700 Subject: [PATCH 16/19] add separate auth steps for staging & prod, remove disk space cleanup step --- .github/workflows/docker-publish.yml | 107 +++++++++++---------------- 1 file changed, 43 insertions(+), 64 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 2c97d26bcf2..8e33678475e 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -1,104 +1,86 @@ name: Build & Publish vLLM Docker - on: push: branches: - main pull_request: workflow_dispatch: - permissions: id-token: write contents: read - concurrency: group: deployment cancel-in-progress: false - jobs: build-and-push: strategy: fail-fast: false runs-on: large_ubuntu_4cpu timeout-minutes: 360 - env: - REGION : us-central1 + REGION: us-central1 REPO: vllm IMAGE: vllm-forked STAGING_PROJECT: character-ai-staging PROD_PROJECT: character-ai - steps: - name: Checkout code uses: actions/checkout@v4 - - - name: Free up disk space + + - name: Determine tags + id: tags run: | - echo "Disk usage before cleanup:" - df -h + SHA=$(git rev-parse --short=7 HEAD) + echo "sha=$SHA" >> $GITHUB_OUTPUT + echo "tags=${SHA},latest" >> $GITHUB_OUTPUT - sudo apt-get remove -y '^aspnetcore-.*' '^dotnet-.*' '^llvm-.*' '^php.*' '^mysql-.*' '^postgresql-.*' - sudo apt-get autoremove -y - sudo apt-get autoclean - - # Remove large directories - sudo rm -rf /opt/hostedtoolcache - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf /usr/local/share/boost - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - sudo rm -rf /usr/local/graalvm/ - sudo rm -rf /usr/local/.ghcup/ - sudo rm -rf /usr/local/share/powershell - sudo rm -rf /usr/local/share/chromium - sudo rm -rf /usr/local/lib/node_modules - - # Clean package caches - sudo apt-get clean - - # Clean Docker - sudo docker system prune -af --volumes - sudo docker builder prune -af - - # Clean temp files - sudo rm -rf /tmp/* - sudo rm -rf /var/tmp/* + # Authenticate to staging and push + - name: Authenticate to GCP Staging + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER_STAGING }} + service_account: ${{ secrets.GCP_SA_EMAIL_STAGING }} - echo "Disk usage after cleanup:" - df -h + - name: "Set up Cloud SDK for Staging" + uses: "google-github-actions/setup-gcloud@v2" + with: + install_components: "beta" - - name: Check disk space + - name: Configure Docker credential helper for Staging run: | - sudo dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -nr | head - df . -h - sudo du /usr/ -hx -d 4 --threshold=1G | sort -hr | head + gcloud --quiet auth configure-docker ${{ env.REGION }}-docker.pkg.dev + + - name: Build & Push to Staging + uses: docker/build-push-action@v5 + with: + context: . + file: ./docker/Dockerfile_internal + platforms: linux/amd64 + push: true + build-args: | + HF_HOME=/huggingface/cache + tags: | + ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest - - name: Authenticate to GCP + # Authenticate to prod and push + - name: Authenticate to GCP Prod uses: google-github-actions/auth@v2 with: - workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} - service_account: ${{ secrets.GCP_SA_EMAIL }} - - - name: "Set up Cloud SDK" + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER_PROD }} + service_account: ${{ secrets.GCP_SA_EMAIL_PROD }} + + - name: "Set up Cloud SDK for Prod" uses: "google-github-actions/setup-gcloud@v2" with: install_components: "beta" - - - name: Configure Docker credential helper + + - name: Configure Docker credential helper for Prod run: | gcloud --quiet auth configure-docker ${{ env.REGION }}-docker.pkg.dev gcloud --quiet auth configure-docker gcr.io - - - name: Determine tags - id: tags - run: | - SHA=$(git rev-parse --short=7 HEAD) - echo "sha=$SHA" >> $GITHUB_OUTPUT - echo "tags=${SHA},latest" >> $GITHUB_OUTPUT - - - name: Build & Push to Staging, Prod, and GCR + + - name: Build & Push to Prod and GCR uses: docker/build-push-action@v5 with: context: . @@ -108,10 +90,7 @@ jobs: build-args: | HF_HOME=/huggingface/cache tags: | - ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} - ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest - ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + ${{ env.REGION }}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} ${{ env.REGION }}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:latest - From 9853776fa8a0022e314ba68bc48b49c39a39c76c Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Wed, 16 Jul 2025 10:48:13 -0700 Subject: [PATCH 17/19] share image between staging and prod --- .github/workflows/docker-publish.yml | 41 +++++++++++++++------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 8e33678475e..267e2a0bd3e 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -26,7 +26,13 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 - + + - name: Check disk space + run: | + sudo dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -nr | head + df . -h + sudo du /usr/ -hx -d 4 --threshold=1G | sort -hr | head + - name: Determine tags id: tags run: | @@ -34,19 +40,18 @@ jobs: echo "sha=$SHA" >> $GITHUB_OUTPUT echo "tags=${SHA},latest" >> $GITHUB_OUTPUT - # Authenticate to staging and push - name: Authenticate to GCP Staging uses: google-github-actions/auth@v2 with: workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER_STAGING }} service_account: ${{ secrets.GCP_SA_EMAIL_STAGING }} - - name: "Set up Cloud SDK for Staging" + - name: "Set up Cloud SDK" uses: "google-github-actions/setup-gcloud@v2" with: install_components: "beta" - - name: Configure Docker credential helper for Staging + - name: Configure Docker credential helper for build run: | gcloud --quiet auth configure-docker ${{ env.REGION }}-docker.pkg.dev @@ -63,7 +68,7 @@ jobs: ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest - # Authenticate to prod and push + # Re-authenticate to prod for prod pushes - name: Authenticate to GCP Prod uses: google-github-actions/auth@v2 with: @@ -80,17 +85,15 @@ jobs: gcloud --quiet auth configure-docker ${{ env.REGION }}-docker.pkg.dev gcloud --quiet auth configure-docker gcr.io - - name: Build & Push to Prod and GCR - uses: docker/build-push-action@v5 - with: - context: . - file: ./docker/Dockerfile_internal - platforms: linux/amd64 - push: true - build-args: | - HF_HOME=/huggingface/cache - tags: | - ${{ env.REGION }}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} - ${{ env.REGION }}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest - gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} - gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:latest + # Push same image to prod + - name: Push to Prod and GCR + run: | + docker tag local-image:${{ steps.tags.outputs.sha }} ${{ env.REGION }}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + docker tag local-image:${{ steps.tags.outputs.sha }} ${{ env.REGION }}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + docker tag local-image:${{ steps.tags.outputs.sha }} gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + docker tag local-image:${{ steps.tags.outputs.sha }} gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:latest + + docker push ${{ env.REGION }}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + docker push ${{ env.REGION }}-docker.pkg.dev/${{ env.PROD_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + docker push gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + docker push gcr.io/${{ env.PROD_PROJECT }}/vllm/${{ env.IMAGE }}:latest From 3f58af3d18f6a1b14b1402304bd36ec4d83301e3 Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Wed, 16 Jul 2025 11:14:14 -0700 Subject: [PATCH 18/19] fix build --- .github/workflows/docker-publish.yml | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 267e2a0bd3e..15a40a2bfa7 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -26,7 +26,6 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 - - name: Check disk space run: | sudo dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -nr | head @@ -39,7 +38,7 @@ jobs: SHA=$(git rev-parse --short=7 HEAD) echo "sha=$SHA" >> $GITHUB_OUTPUT echo "tags=${SHA},latest" >> $GITHUB_OUTPUT - + - name: Authenticate to GCP Staging uses: google-github-actions/auth@v2 with: @@ -55,19 +54,24 @@ jobs: run: | gcloud --quiet auth configure-docker ${{ env.REGION }}-docker.pkg.dev - - name: Build & Push to Staging - uses: docker/build-push-action@v5 - with: - context: . - file: ./docker/Dockerfile_internal - platforms: linux/amd64 - push: true - build-args: | - HF_HOME=/huggingface/cache - tags: | - ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} - ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest - + # Build the image locally first + - name: Build Docker image + run: | + docker build \ + -f ./docker/Dockerfile_internal \ + --build-arg HF_HOME=/huggingface/cache \ + -t local-image:${{ steps.tags.outputs.sha }} \ + . + + # Tag and push to staging + - name: Tag and Push to Staging + run: | + docker tag local-image:${{ steps.tags.outputs.sha }} ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + docker tag local-image:${{ steps.tags.outputs.sha }} ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + + docker push ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:${{ steps.tags.outputs.sha }} + docker push ${{ env.REGION }}-docker.pkg.dev/${{ env.STAGING_PROJECT }}/${{ env.REPO }}/${{ env.IMAGE }}:latest + # Re-authenticate to prod for prod pushes - name: Authenticate to GCP Prod uses: google-github-actions/auth@v2 From 50c776c76f290e0acf801b84813bf7d29d740ca1 Mon Sep 17 00:00:00 2001 From: rohingarg-c Date: Wed, 16 Jul 2025 15:24:35 -0700 Subject: [PATCH 19/19] remove pull request trigger --- .github/workflows/docker-publish.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 15a40a2bfa7..5e273c8392f 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -3,7 +3,6 @@ on: push: branches: - main - pull_request: workflow_dispatch: permissions: id-token: write