diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 5dca432a..7df54cad 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -76,6 +76,15 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ fi; +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ + then \ + CUDA_COMPUTE_CAP=89 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ + else \ + CUDA_COMPUTE_CAP=89 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ + fi; + RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ if [ $VERTEX = "true" ]; \ @@ -113,6 +122,17 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + if [ $VERTEX = "true" ]; \ + then \ + CUDA_COMPUTE_CAP=89 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ + else \ + CUDA_COMPUTE_CAP=89 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ + fi; + +RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-89 + RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ if [ $VERTEX = "true" ]; \ @@ -140,6 +160,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75 COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80 +COPY --from=builder /usr/src/target/release/text-embeddings-router-89 /usr/local/bin/text-embeddings-router-89 COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90 # Amazon SageMaker compatible image diff --git a/cuda-all-entrypoint.sh b/cuda-all-entrypoint.sh index d9be21ea..8b3f49fc 100644 --- a/cuda-all-entrypoint.sh +++ b/cuda-all-entrypoint.sh @@ -10,9 +10,12 @@ compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | se if [ ${compute_cap} -eq 75 ] then exec text-embeddings-router-75 "$@" -elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ] +elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 89 ] then exec text-embeddings-router-80 "$@" +elif [ ${compute_cap} -ge 89 -a ${compute_cap} -lt 90 ] +then + exec text-embeddings-router-89 "$@" elif [ ${compute_cap} -eq 90 ] then exec text-embeddings-router-90 "$@"