Skip to content

Commit 31a4e21

Browse files
feat: get n tokenization workers from the number of CPUs
1 parent 5c06e62 commit 31a4e21

File tree

5 files changed

+17
-8
lines changed

5 files changed

+17
-8
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/src/tokenization.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ impl Tokenization {
1919
max_input_length: usize,
2020
position_offset: usize,
2121
) -> Self {
22+
tracing::info!("Starting {workers} tokenization workers");
23+
2224
// Create channel
2325
let (sender, receiver) = flume::unbounded();
2426

load_tests/load.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ export const options = {
2626
load_test: {
2727
executor: 'constant-arrival-rate',
2828
duration: '30s',
29-
preAllocatedVUs: 2000,
30-
rate: 500,
29+
preAllocatedVUs: 10000,
30+
rate: 9000,
3131
timeUnit: '1s',
3232
gracefulStop: '1s',
3333
},

router/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ futures = "^0.3"
2626
flume = "0.10.14"
2727
init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] }
2828
hf-hub = { version = "0.3.0", features = ["tokio"] }
29+
num_cpus = "1.16.0"
2930
metrics = "0.21.0"
3031
metrics-exporter-prometheus = { version = "0.12.1", features = [] }
3132
opentelemetry = { version = "0.19.0", features = ["rt-tokio"] }

router/src/main.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@ struct Args {
4141
#[clap(long, env)]
4242
revision: Option<String>,
4343

44-
/// The number of tokenizer workers used for payload validation and truncation inside the
45-
/// router.
46-
#[clap(default_value = "8", long, env)]
47-
tokenization_workers: usize,
44+
/// Optionally control the number of tokenizer workers used for payload tokenization, validation
45+
/// and truncation.
46+
/// Default to the number of CPU cores on the machine.
47+
#[clap(long, env)]
48+
tokenization_workers: Option<usize>,
4849

4950
/// The dtype to be forced upon the model.
5051
#[clap(default_value = "float16", long, env, value_enum)]
@@ -175,9 +176,13 @@ async fn main() -> Result<()> {
175176
config.pad_token_id + 1
176177
};
177178

179+
let tokenization_workers = args
180+
.tokenization_workers
181+
.unwrap_or_else(num_cpus::get_physical);
182+
178183
// Tokenization logic
179184
let tokenization = Tokenization::new(
180-
args.tokenization_workers,
185+
tokenization_workers,
181186
tokenizer,
182187
config.max_position_embeddings,
183188
position_offset,
@@ -217,7 +222,7 @@ async fn main() -> Result<()> {
217222
max_concurrent_requests: args.max_concurrent_requests,
218223
max_input_length: config.max_position_embeddings,
219224
max_batch_tokens: args.max_batch_tokens,
220-
tokenization_workers: args.tokenization_workers,
225+
tokenization_workers,
221226
max_batch_requests,
222227
max_client_batch_size: args.max_client_batch_size,
223228
version: env!("CARGO_PKG_VERSION"),

0 commit comments

Comments
 (0)