Skip to content

Commit 432448c

Browse files
authored
feat: Amazon SageMaker compatible images (#103)
1 parent 3c385a4 commit 432448c

File tree

3 files changed

+112
-0
lines changed

3 files changed

+112
-0
lines changed

.github/workflows/build_all.yaml

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,71 @@
8383
labels: ${{ steps.meta.outputs.labels }}
8484
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-all,mode=max
8585
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-all,mode=max
86+
87+
build-and-push-sagemaker-image:
88+
needs:
89+
- build-and-push-image
90+
concurrency:
91+
group: ${{ github.workflow }}-${{ github.job }}-all-${{ github.head_ref || github.run_id }}
92+
cancel-in-progress: true
93+
runs-on: [self-hosted, intel-cpu, 32-cpu, tgi-ci]
94+
permissions:
95+
contents: write
96+
packages: write
97+
# This is used to complete the identity challenge
98+
# with sigstore/fulcio when running outside of PRs.
99+
id-token: write
100+
security-events: write
101+
steps:
102+
- name: Checkout repository
103+
uses: actions/checkout@v3
104+
- name: Initialize Docker Buildx
105+
uses: docker/setup-buildx-action@v2.0.0
106+
with:
107+
install: true
108+
- name: Configure sccache
109+
uses: actions/github-script@v6
110+
with:
111+
script: |
112+
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
113+
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
114+
- name: Inject slug/short variables
115+
uses: rlespinasse/github-slug-action@v4.4.1
116+
- name: Login to internal Container Registry
117+
uses: docker/login-action@v2.1.0
118+
with:
119+
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
120+
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
121+
registry: registry.internal.huggingface.tech
122+
- name: Extract metadata (tags, labels) for Docker
123+
id: meta
124+
uses: docker/metadata-action@v4.3.0
125+
with:
126+
images: |
127+
registry.internal.huggingface.tech/api-inference/text-embeddings-inference/sagemaker
128+
flavor: |
129+
latest=false
130+
tags: |
131+
type=semver,pattern=cuda-{{version}}
132+
type=semver,pattern=cuda-{{major}}.{{minor}}
133+
type=raw,value=cuda-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
134+
type=raw,value=cuda-sha-${{ env.GITHUB_SHA_SHORT }}
135+
- name: Build and push Docker image
136+
id: build-and-push-sagemaker
137+
uses: docker/build-push-action@v4
138+
with:
139+
context: .
140+
file: Dockerfile-cuda-all
141+
push: ${{ github.event_name != 'pull_request' }}
142+
platforms: 'linux/amd64'
143+
target: sagemaker
144+
build-args: |
145+
SCCACHE_GHA_ENABLED=on
146+
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
147+
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
148+
GIT_SHA=${{ env.GITHUB_SHA }}
149+
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
150+
tags: ${{ steps.meta.outputs.tags }}
151+
labels: ${{ steps.meta.outputs.labels }}
152+
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-all,mode=max
153+
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-all,mode=max

Dockerfile-cuda-all

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,11 @@ RUN chmod +x entrypoint.sh
127127

128128
ENTRYPOINT ["./entrypoint.sh"]
129129
CMD ["--json-output"]
130+
131+
# Amazon SageMaker compatible image
132+
FROM base AS sagemaker
133+
134+
COPY sagemaker-entrypoint.sh entrypoint.sh
135+
RUN chmod +x entrypoint.sh
136+
137+
ENTRYPOINT ["./entrypoint.sh"]

sagemaker-entrypoint.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
3+
if [[ -z "${HF_MODEL_ID}" ]]; then
4+
echo "HF_MODEL_ID must be set"
5+
exit 1
6+
fi
7+
export MODEL_ID="${HF_MODEL_ID}"
8+
9+
if [[ -n "${HF_MODEL_REVISION}" ]]; then
10+
export REVISION="${HF_MODEL_REVISION}"
11+
fi
12+
13+
if ! command -v nvidia-smi &> /dev/null; then
14+
echo "Error: 'nvidia-smi' command not found."
15+
exit 1
16+
fi
17+
18+
if [[ -z "${CUDA_COMPUTE_CAP}" ]]
19+
then
20+
compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')
21+
else
22+
compute_cap=$CUDA_COMPUTE_CAP
23+
fi
24+
25+
if [[ ${compute_cap} -eq 75 ]]
26+
then
27+
text-embeddings-router-75 --port 8080
28+
elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
29+
then
30+
text-embeddings-router-80 --port 8080
31+
elif [[ ${compute_cap} -eq 90 ]]
32+
then
33+
text-embeddings-router-90 --port 8080
34+
else
35+
echo "cuda compute cap ${compute_cap} is not supported"; exit 1
36+
fi

0 commit comments

Comments
 (0)