Skip to content

Commit 0b07f9b

Browse files
fix: fix cuda-all image (#239)
1 parent cb802a2 commit 0b07f9b

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

Dockerfile-cuda-all

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ FROM builder as builder-75
4848

4949
RUN if [ $VERTEX = "true" ]; \
5050
then \
51-
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
51+
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --features http --no-default-features --recipe-path recipe.json && sccache -s; \
5252
else \
53-
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
53+
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --no-default-features --features http --recipe-path recipe.json && sccache -s; \
5454
fi;
5555

5656
COPY backends backends
@@ -70,9 +70,9 @@ FROM builder as builder-80
7070

7171
RUN if [ $VERTEX = "true" ]; \
7272
then \
73-
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
73+
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --features http --no-default-features --recipe-path recipe.json && sccache -s; \
7474
else \
75-
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
75+
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --no-default-features --features http --recipe-path recipe.json && sccache -s; \
7676
fi;
7777

7878
COPY backends backends
@@ -83,18 +83,18 @@ COPY Cargo.lock ./
8383

8484
RUN if [ $VERTEX = "true" ]; \
8585
then \
86-
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F http -F google --no-default-features && sccache -s; \
86+
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F http -F google --no-default-features && sccache -s; \
8787
else \
88-
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F http --no-default-features && sccache -s; \
88+
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F http --no-default-features && sccache -s; \
8989
fi;
9090

9191
FROM builder as builder-90
9292

9393
RUN if [ $VERTEX = "true" ]; \
9494
then \
95-
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
95+
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --features http --no-default-features --recipe-path recipe.json && sccache -s; \
9696
else \
97-
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
97+
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --features http --no-default-features --recipe-path recipe.json && sccache -s; \
9898
fi;
9999

100100
COPY backends backends
@@ -105,9 +105,9 @@ COPY Cargo.lock ./
105105

106106
RUN if [ $VERTEX = "true" ]; \
107107
then \
108-
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F http -F google --no-default-features && sccache -s; \
108+
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F http -F google --no-default-features && sccache -s; \
109109
else \
110-
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F http --no-default-features && sccache -s; \
110+
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F http --no-default-features && sccache -s; \
111111
fi;
112112

113113
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 as base

backends/candle/src/flash_attn.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ pub(crate) fn flash_attn_varlen(
7373
return attention;
7474
}
7575
#[cfg(not(feature = "flash-attn"))]
76-
candle::bail!("Flash attention is not installed. Use `flash-attn-v1` feature.")
76+
candle::bail!("Flash attention is not installed. Use `flash-attn` feature.")
7777
}
7878
candle::bail!(
7979
"GPU with CUDA capability {} is not supported",

0 commit comments

Comments
 (0)