Skip to content

Commit e91834d

Browse files
authored
Reduce default model load thread count to 4 (#193)
1 parent 6a77ef5 commit e91834d

File tree

2 files changed

+2
-5
lines changed

2 files changed

+2
-5
lines changed

src/server.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,7 @@ InferenceServer::InferenceServer()
104104
exit_timeout_secs_ = 30;
105105
pinned_memory_pool_size_ = 1 << 28;
106106
buffer_manager_thread_count_ = 0;
107-
model_load_thread_count_ =
108-
std::max(2u, 2 * std::thread::hardware_concurrency());
107+
model_load_thread_count_ = 4;
109108
enable_model_namespacing_ = false;
110109

111110
#ifdef TRITON_ENABLE_GPU

src/tritonserver.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,9 +367,7 @@ TritonServerOptions::TritonServerOptions()
367367
rate_limit_mode_(tc::RateLimitMode::RL_OFF), metrics_(true),
368368
gpu_metrics_(true), cpu_metrics_(true), metrics_interval_(2000),
369369
exit_timeout_(30), pinned_memory_pool_size_(1 << 28),
370-
buffer_manager_thread_count_(0),
371-
model_load_thread_count_(
372-
std::max(2u, 2 * std::thread::hardware_concurrency())),
370+
buffer_manager_thread_count_(0), model_load_thread_count_(4),
373371
enable_model_namespacing_(false),
374372
#ifdef TRITON_ENABLE_GPU
375373
min_compute_capability_(TRITON_MIN_COMPUTE_CAPABILITY),

0 commit comments

Comments
 (0)