Skip to content

Commit d123a5a

Browse files
feat: faster CPU image on AMD (#35)
1 parent c54eb2e commit d123a5a

File tree

15 files changed

+297
-143
lines changed

15 files changed

+297
-143
lines changed

Cargo.lock

Lines changed: 5 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ homepage = "https://github.com/huggingface/text-embeddings-inference"
1818

1919
[patch.crates-io]
2020
cudarc = { git = "https://github.com/OlivierDehaene/cudarc", rev = "4c8e6d36a4a4c31e2e4649ae5246226452a01fc1" }
21-
candle = { git = "https://github.com/huggingface/candle", rev = "122da875806f274a7aa9048f76d7a676b473e56f", package = "candle-core" }
22-
candle-nn = { git = "https://github.com/huggingface/candle", rev = "122da875806f274a7aa9048f76d7a676b473e56f", package = "candle-nn" }
23-
candle-transformers = { git = "https://github.com/huggingface/candle", rev = "122da875806f274a7aa9048f76d7a676b473e56f", package = "candle-transformers" }
24-
candle-flash-attn = { git = "https://github.com/huggingface/candle", rev = "122da875806f274a7aa9048f76d7a676b473e56f", package = "candle-flash-attn" }
21+
candle = { git = "https://github.com/OlivierDehaene/candle", rev = "9f2b4081b83a0e47ec1b12caa71d3cac7cc2161e", package = "candle-core" }
22+
candle-nn = { git = "https://github.com/OlivierDehaene/candle", rev = "9f2b4081b83a0e47ec1b12caa71d3cac7cc2161e", package = "candle-nn" }
23+
candle-transformers = { git = "https://github.com/OlivierDehaene/candle", rev = "9f2b4081b83a0e47ec1b12caa71d3cac7cc2161e", package = "candle-transformers" }
24+
candle-flash-attn = { git = "https://github.com/OlivierDehaene/candle", rev = "9f2b4081b83a0e47ec1b12caa71d3cac7cc2161e", package = "candle-flash-attn" }
2525

2626
[profile.release]
2727
debug = 1

Dockerfile

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,31 +35,53 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
3535

3636
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
3737
intel-oneapi-mkl-devel \
38+
build-essential \
3839
&& rm -rf /var/lib/apt/lists/*
3940

41+
RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
42+
gcc -shared -fPIC -o libfakeintel.so fakeintel.c
43+
4044
COPY --from=planner /usr/src/recipe.json recipe.json
4145

42-
RUN cargo chef cook --release --features candle --features mkl --no-default-features --recipe-path recipe.json && sccache -s
46+
RUN cargo chef cook --release --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s
4347

4448
COPY backends backends
4549
COPY core core
4650
COPY router router
4751
COPY Cargo.toml ./
4852
COPY Cargo.lock ./
4953

50-
RUN cargo build --release --bin text-embeddings-router -F candle -F mkl --no-default-features && sccache -s
54+
RUN cargo build --release --bin text-embeddings-router -F candle -F mkl-dynamic --no-default-features && sccache -s
5155

5256
FROM debian:bookworm-slim
5357

5458
ENV HUGGINGFACE_HUB_CACHE=/data \
55-
PORT=80
59+
PORT=80 \
60+
MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
61+
RAYON_NUM_THREADS=8 \
62+
LD_PRELOAD=/usr/local/libfakeintel.so \
63+
LD_LIBRARY_PATH=/usr/local/lib
5664

5765
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
5866
libomp-dev \
5967
ca-certificates \
6068
libssl-dev \
6169
&& rm -rf /var/lib/apt/lists/*
6270

71+
# Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch...
72+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2
73+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2
74+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2
75+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2
76+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2
77+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx.so.2 /usr/local/lib/libmkl_vml_avx.so.2
78+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2
79+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2
80+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx.so.2 /usr/local/lib/libmkl_avx.so.2
81+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2
82+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2
83+
COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so
84+
6385
COPY --from=builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
6486

6587
ENTRYPOINT ["text-embeddings-router"]

backends/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ clap = ["dep:clap", "text-embeddings-backend-core/clap"]
1919
python = ["dep:text-embeddings-backend-python"]
2020
candle = ["dep:text-embeddings-backend-candle"]
2121
mkl = ["text-embeddings-backend-candle?/mkl"]
22+
mkl-dynamic = ["text-embeddings-backend-candle?/mkl-dynamic"]
2223
accelerate = ["text-embeddings-backend-candle?/accelerate"]
2324
static-linking = ["text-embeddings-backend-candle?/static-linking"]
2425
flash-attn = ["text-embeddings-backend-candle?/flash-attn"]

backends/candle/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ homepage.workspace = true
77

88
[dependencies]
99
accelerate-src = { version = "0.3.2", optional = true }
10-
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"], optional = true }
10+
intel-mkl-src = { version = "0.8.1", optional = true }
1111
candle = { version = "0.3.0", package = "candle-core", default-features = false }
1212
candle-nn = { version = "0.3.0" }
1313
candle-transformers = { version = "0.3.0" }
@@ -29,7 +29,8 @@ anyhow = { version = "1", features = ["backtrace"] }
2929

3030
[features]
3131
accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate"]
32-
mkl = ["dep:intel-mkl-src", "candle/mkl", "candle-nn/mkl"]
32+
mkl = ["dep:intel-mkl-src", "intel-mkl-src/mkl-static-lp64-iomp", "candle/mkl", "candle-nn/mkl"]
33+
mkl-dynamic = ["dep:intel-mkl-src", "intel-mkl-src/mkl-dynamic-lp64-iomp", "candle/mkl-dynamic", "candle-nn/mkl-dynamic"]
3334
cuda = ["candle/cuda", "candle-nn/cuda", "dep:candle-cublaslt", "dep:candle-layer-norm"]
3435
flash-attn-v1 = ["dep:candle-flash-attn-v1", "cuda"]
3536
flash-attn = ["dep:candle-flash-attn", "cuda"]

backends/candle/src/lib.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ use text_embeddings_backend_core::{BackendError, Batch, Embedding, EmbeddingBack
1515

1616
pub struct CandleBackend {
1717
model: Box<dyn EmbeddingModel + Send>,
18-
device: Device,
1918
}
2019

2120
impl CandleBackend {
@@ -126,7 +125,7 @@ impl CandleBackend {
126125
}
127126
};
128127

129-
Ok(Self { model, device })
128+
Ok(Self { model })
130129
}
131130
}
132131

@@ -142,10 +141,7 @@ impl EmbeddingBackend for CandleBackend {
142141
}
143142

144143
fn max_batch_size(&self) -> Option<usize> {
145-
match self.device {
146-
Device::Cpu => Some(1),
147-
Device::Cuda(_) => None,
148-
}
144+
None
149145
}
150146
}
151147

backends/candle/src/models.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[cfg(feature = "mkl")]
1+
#[cfg(any(feature = "mkl", feature = "mkl-dynamic"))]
22
extern crate intel_mkl_src;
33

44
#[cfg(feature = "accelerate")]

0 commit comments

Comments
 (0)