feat: Amazon SageMaker compatible images (#103)

JGalego · web-flow · commit 432448ca18c0 · 2024-04-11T19:02:10.000+02:00
diff --git a/.github/workflows/build_all.yaml b/.github/workflows/build_all.yaml
@@ -83,3 +83,71 @@
            labels: ${{ steps.meta.outputs.labels }}
            cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-all,mode=max
            cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-all,mode=max
+
+   build-and-push-sagemaker-image:
+     needs:
+      - build-and-push-image
+     concurrency:
+       group: ${{ github.workflow }}-${{ github.job }}-all-${{ github.head_ref || github.run_id }}
+       cancel-in-progress: true
+     runs-on: [self-hosted, intel-cpu, 32-cpu, tgi-ci]
+     permissions:
+       contents: write
+       packages: write
+       # This is used to complete the identity challenge
+       # with sigstore/fulcio when running outside of PRs.
+       id-token: write
+       security-events: write
+     steps:
+       - name: Checkout repository
+         uses: actions/checkout@v3
+       - name: Initialize Docker Buildx
+         uses: docker/setup-buildx-action@v2.0.0
+         with:
+           install: true
+       - name: Configure sccache
+         uses: actions/github-script@v6
+         with:
+           script: |
+             core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
+             core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
+       - name: Inject slug/short variables
+         uses: rlespinasse/github-slug-action@v4.4.1
+       - name: Login to internal Container Registry
+         uses: docker/login-action@v2.1.0
+         with:
+           username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
+           password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
+           registry: registry.internal.huggingface.tech
+       - name: Extract metadata (tags, labels) for Docker
+         id: meta
+         uses: docker/metadata-action@v4.3.0
+         with:
+           images: |
+             registry.internal.huggingface.tech/api-inference/text-embeddings-inference/sagemaker
+           flavor: |
+             latest=false
+           tags: |
+             type=semver,pattern=cuda-{{version}}
+             type=semver,pattern=cuda-{{major}}.{{minor}}
+             type=raw,value=cuda-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
+             type=raw,value=cuda-sha-${{ env.GITHUB_SHA_SHORT }}
+       - name: Build and push Docker image
+         id: build-and-push-sagemaker
+         uses: docker/build-push-action@v4
+         with:
+           context: .
+           file: Dockerfile-cuda-all
+           push: ${{ github.event_name != 'pull_request' }}
+           platforms: 'linux/amd64'
+           target: sagemaker
+           build-args: |
+             SCCACHE_GHA_ENABLED=on
+             ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
+             ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
+             GIT_SHA=${{ env.GITHUB_SHA }}
+             DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
+           tags: ${{ steps.meta.outputs.tags }}
+           labels: ${{ steps.meta.outputs.labels }}
+           cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-all,mode=max
+           cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-all,mode=max 
diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all
@@ -127,3 +127,11 @@ RUN chmod +x entrypoint.sh
 
 ENTRYPOINT ["./entrypoint.sh"]
 CMD ["--json-output"]
+
+# Amazon SageMaker compatible image
+FROM base AS sagemaker
+
+COPY sagemaker-entrypoint.sh entrypoint.sh
+RUN chmod +x entrypoint.sh
+
+ENTRYPOINT ["./entrypoint.sh"]
diff --git a/sagemaker-entrypoint.sh b/sagemaker-entrypoint.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+if [[ -z "${HF_MODEL_ID}" ]]; then
+  echo "HF_MODEL_ID must be set"
+  exit 1
+fi
+export MODEL_ID="${HF_MODEL_ID}"
+
+if [[ -n "${HF_MODEL_REVISION}" ]]; then
+  export REVISION="${HF_MODEL_REVISION}"
+fi
+
+if ! command -v nvidia-smi &> /dev/null; then
+    echo "Error: 'nvidia-smi' command not found."
+    exit 1
+fi
+
+if [[ -z "${CUDA_COMPUTE_CAP}" ]]
+then
+    compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')
+else
+    compute_cap=$CUDA_COMPUTE_CAP
+fi
+
+if [[ ${compute_cap} -eq 75 ]]
+then
+    text-embeddings-router-75 --port 8080
+elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
+then
+    text-embeddings-router-80 --port 8080
+elif [[ ${compute_cap} -eq 90 ]]
+then
+    text-embeddings-router-90 --port 8080
+else
+    echo "cuda compute cap ${compute_cap} is not supported"; exit 1
+fi