Skip to content

Commit 5c6151c

Browse files
fix: use malloc_trim to cleanup pages (#307)
1 parent 46de6a4 commit 5c6151c

File tree

19 files changed

+1317
-738
lines changed

19 files changed

+1317
-738
lines changed

Cargo.lock

Lines changed: 985 additions & 470 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,26 @@ edition = "2021"
1616
authors = ["Olivier Dehaene"]
1717
homepage = "https://github.com/huggingface/text-embeddings-inference"
1818

19+
[workspace.dependencies]
20+
anyhow = "1.0.75"
21+
clap = { version = "4.1", features = ["derive", "env"] }
22+
hf-hub = { version = "0.3.2", features = ["tokio", "online"], default-features = false }
23+
metrics = "0.23"
24+
nohash-hasher = "0.2"
25+
tokenizers = { version = "0.19.1", default-features = false, features = ["onig", "esaxx_fast"] }
26+
tokio = { version = "1.25", features = ["rt", "rt-multi-thread", "parking_lot", "sync", "signal"] }
27+
tracing = "0.1"
28+
serde = { version = "1.0", features = ["serde_derive"] }
29+
serde_json = "1.0"
30+
thiserror = "1.0"
31+
32+
1933
[patch.crates-io]
2034
cudarc = { git = "https://github.com/coreylowman/cudarc", rev = "c388e724af93a3e8fbe484f5ded2d8b3c1badd8e" }
2135
candle = { git = "https://github.com/OlivierDehaene/candle", rev = "33b7ecf9ed82bb7c20f1a94555218fabfbaa2fe3", package = "candle-core" }
2236
candle-nn = { git = "https://github.com/OlivierDehaene/candle", rev = "33b7ecf9ed82bb7c20f1a94555218fabfbaa2fe3", package = "candle-nn" }
2337
candle-transformers = { git = "https://github.com/OlivierDehaene/candle", rev = "33b7ecf9ed82bb7c20f1a94555218fabfbaa2fe3", package = "candle-transformers" }
2438
candle-flash-attn = { git = "https://github.com/OlivierDehaene/candle", rev = "33b7ecf9ed82bb7c20f1a94555218fabfbaa2fe3", package = "candle-flash-attn" }
25-
hf-hub = { git = "https://github.com/huggingface/hf-hub", rev = "b167f69692be5f49eb8003788f7f8a499a98b096" }
2639

2740
[profile.release]
2841
debug = 0

backends/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ authors.workspace = true
66
homepage.workspace = true
77

88
[dependencies]
9-
clap = { version = "4.1.4", features = ["derive"], optional = true }
9+
clap = { workspace = true, optional = true }
1010
text-embeddings-backend-core = { path = "core" }
1111
text-embeddings-backend-python = { path = "python", optional = true }
1212
text-embeddings-backend-candle = { path = "candle", optional = true }
13-
tokio = { version = "^1.25", features = ["sync"] }
14-
tracing = "^0.1"
13+
tokio = { workspace = true }
14+
tracing = { workspace = true }
1515

1616
[features]
1717
clap = ["dep:clap", "text-embeddings-backend-core/clap"]

backends/candle/Cargo.toml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ authors.workspace = true
66
homepage.workspace = true
77

88
[dependencies]
9-
anyhow = "^1.0"
9+
anyhow = { workspace = true }
1010
accelerate-src = { version = "0.3.2", optional = true }
1111
intel-mkl-src = { version = "0.8.1", optional = true }
1212
candle = { version = "*", package = "candle-core", default-features = false }
@@ -17,21 +17,21 @@ candle-flash-attn-v1 = { git = "https://github.com/huggingface/candle-flash-attn
1717
candle-cublaslt = { git = "https://github.com/huggingface/candle-cublaslt", rev = "cf789b7dd6d4abb19b03b9556442f94f0588b4a0", optional = true }
1818
candle-layer-norm = { git = "https://github.com/huggingface/candle-layer-norm", rev = "94c2add7d94c2d63aebde77f7534614e04dbaea1", optional = true }
1919
candle-rotary = { git = "https://github.com/huggingface/candle-rotary", rev = "0a718a0856569a92f3112e64f10d07e4447822e8", optional = true }
20-
nohash-hasher = "^0.2"
20+
nohash-hasher = { workspace = true }
2121
text-embeddings-backend-core = { path = "../core" }
22-
tracing = "^0.1"
22+
tracing = { workspace = true }
2323
safetensors = "^0.4"
24-
thiserror = "^1.0"
25-
serde = { version = "^1.0", features = ["serde_derive"] }
26-
serde_json = "^1.0"
24+
thiserror = { workspace = true }
25+
serde = { workspace = true }
26+
serde_json = { workspace = true }
2727
memmap2 = "^0.9"
2828

2929
[dev-dependencies]
3030
insta = { git = "https://github.com/OlivierDehaene/insta", rev = "f4f98c0410b91fb5a28b10df98e4422955be9c2c", features = ["yaml"] }
3131
is_close = "0.1.3"
3232
hf-hub = "0.3.2"
33-
anyhow = "1.0.75"
34-
tokenizers = { version = "^0.19.1", default-features = false, features = ["onig", "esaxx_fast"] }
33+
anyhow = { workspace = true }
34+
tokenizers = { workspace = true }
3535
serial_test = "2.0.0"
3636

3737
[build-dependencies]

backends/core/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ authors.workspace = true
66
homepage.workspace = true
77

88
[dependencies]
9-
thiserror = "^1.0"
10-
clap = { version = "^4.1", features = ["derive"], optional = true }
11-
nohash-hasher = "^0.2"
9+
thiserror = { workspace = true }
10+
clap = { workspace = true, optional = true }
11+
nohash-hasher = { workspace = true }
1212

1313
[features]
1414
clap = ["dep:clap"]

backends/src/lib.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use text_embeddings_backend_python::PythonBackend;
2222
#[derive(Debug, Clone)]
2323
pub struct Backend {
2424
/// Channel to communicate with the background thread
25-
backend_sender: mpsc::UnboundedSender<BackendCommand>,
25+
backend_sender: mpsc::Sender<BackendCommand>,
2626
/// Health status
2727
health_receiver: watch::Receiver<bool>,
2828
_backend_thread: Arc<BackendThread>,
@@ -40,7 +40,7 @@ impl Backend {
4040
otlp_endpoint: Option<String>,
4141
otlp_service_name: String,
4242
) -> Result<Self, BackendError> {
43-
let (backend_sender, backend_receiver) = mpsc::unbounded_channel();
43+
let (backend_sender, backend_receiver) = mpsc::channel(8);
4444

4545
let backend = init_backend(
4646
model_path,
@@ -76,6 +76,7 @@ impl Backend {
7676
let (sender, receiver) = oneshot::channel();
7777
self.backend_sender
7878
.send(BackendCommand::Health(Span::current(), sender))
79+
.await
7980
.expect("No backend receiver. This is a bug.");
8081
receiver.await.expect(
8182
"Backend blocking task dropped the sender without sending a response. This is a bug.",
@@ -110,7 +111,7 @@ impl Backend {
110111
let (sender, receiver) = oneshot::channel();
111112

112113
self.backend_sender
113-
.send(BackendCommand::Embed(batch, Span::current(), sender))
114+
.try_send(BackendCommand::Embed(batch, Span::current(), sender))
114115
.expect("No backend receiver. This is a bug.");
115116
receiver.await.expect(
116117
"Backend blocking task dropped the sender without send a response. This is a bug.",
@@ -122,7 +123,7 @@ impl Backend {
122123
let (sender, receiver) = oneshot::channel();
123124

124125
self.backend_sender
125-
.send(BackendCommand::Predict(batch, Span::current(), sender))
126+
.try_send(BackendCommand::Predict(batch, Span::current(), sender))
126127
.expect("No backend receiver. This is a bug.");
127128
receiver.await.expect(
128129
"Backend blocking task dropped the sender without send a response. This is a bug.",
@@ -174,7 +175,7 @@ struct BackendThread(Option<JoinHandle<()>>);
174175
impl BackendThread {
175176
fn new(
176177
backend: Box<dyn CoreBackend + Send>,
177-
mut backend_receiver: mpsc::UnboundedReceiver<BackendCommand>,
178+
mut backend_receiver: mpsc::Receiver<BackendCommand>,
178179
health_sender: watch::Sender<bool>,
179180
) -> Self {
180181
let handle = std::thread::spawn(move || {

core/Cargo.toml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ authors.workspace = true
66
homepage.workspace = true
77

88
[dependencies]
9-
hf-hub = { version = "^0.3.0", features = ["tokio"], default-features = false }
10-
metrics = "^0.21"
9+
async-channel = "^2.3"
10+
hf-hub = { workspace = true }
11+
metrics = { workspace = true }
1112
text-embeddings-backend = { path = "../backends" }
12-
thiserror = "^1.0"
13-
tokenizers = { version = "^0.19.1", default-features = false, features = ["onig", "esaxx_fast"] }
14-
tracing = "^0.1"
15-
tokio = { version = "^1.25", features = ["rt", "rt-multi-thread", "parking_lot", "sync"] }
13+
thiserror = { workspace = true }
14+
tokenizers = { workspace = true }
15+
tracing = { workspace = true }
16+
tokio = { workspace = true }

0 commit comments

Comments
 (0)