Skip to content

Commit 1477844

Browse files
authored
Add SageMaker CPU images and validate (#240)
1 parent 0b07f9b commit 1477844

File tree

4 files changed

+47
-27
lines changed

4 files changed

+47
-27
lines changed

Dockerfile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,11 @@ COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/loc
108108

109109
ENTRYPOINT ["text-embeddings-router"]
110110
CMD ["--json-output"]
111+
112+
# Amazon SageMaker compatible image
113+
FROM base AS sagemaker
114+
115+
COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
116+
COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
117+
118+
ENTRYPOINT ["./entrypoint.sh"]

Dockerfile-cuda-all

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ CMD ["--json-output"]
131131
# Amazon SageMaker compatible image
132132
FROM base AS sagemaker
133133

134-
COPY sagemaker-entrypoint.sh entrypoint.sh
135-
RUN chmod +x entrypoint.sh
134+
COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh
136135

137-
ENTRYPOINT ["./entrypoint.sh"]
136+
ENTRYPOINT ["./entrypoint.sh"]

sagemaker-entrypoint-cuda-all.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/bash
2+
3+
if [[ -z "${HF_MODEL_ID}" ]]; then
4+
echo "HF_MODEL_ID must be set"
5+
exit 1
6+
fi
7+
export MODEL_ID="${HF_MODEL_ID}"
8+
9+
if [[ -n "${HF_MODEL_REVISION}" ]]; then
10+
export REVISION="${HF_MODEL_REVISION}"
11+
fi
12+
13+
if ! command -v nvidia-smi &> /dev/null; then
14+
echo "Error: 'nvidia-smi' command not found."
15+
exit 1
16+
fi
17+
18+
if [[ -z "${CUDA_COMPUTE_CAP}" ]]
19+
then
20+
compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')
21+
else
22+
compute_cap=$CUDA_COMPUTE_CAP
23+
fi
24+
25+
if [[ ${compute_cap} -eq 75 ]]
26+
then
27+
text-embeddings-router-75 --port 8080 --json-output
28+
elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
29+
then
30+
text-embeddings-router-80 --port 8080 --json-output
31+
elif [[ ${compute_cap} -eq 90 ]]
32+
then
33+
text-embeddings-router-90 --port 8080 --json-output
34+
else
35+
echo "cuda compute cap ${compute_cap} is not supported"; exit 1
36+
fi

sagemaker-entrypoint.sh

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,4 @@ if [[ -n "${HF_MODEL_REVISION}" ]]; then
1010
export REVISION="${HF_MODEL_REVISION}"
1111
fi
1212

13-
if ! command -v nvidia-smi &> /dev/null; then
14-
echo "Error: 'nvidia-smi' command not found."
15-
exit 1
16-
fi
17-
18-
if [[ -z "${CUDA_COMPUTE_CAP}" ]]
19-
then
20-
compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')
21-
else
22-
compute_cap=$CUDA_COMPUTE_CAP
23-
fi
24-
25-
if [[ ${compute_cap} -eq 75 ]]
26-
then
27-
text-embeddings-router-75 --port 8080
28-
elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
29-
then
30-
text-embeddings-router-80 --port 8080
31-
elif [[ ${compute_cap} -eq 90 ]]
32-
then
33-
text-embeddings-router-90 --port 8080
34-
else
35-
echo "cuda compute cap ${compute_cap} is not supported"; exit 1
36-
fi
13+
text-embeddings-router --port 8080 --json-output

0 commit comments

Comments
 (0)