From 4a191115d92113df7a6423a3edef59d61e547976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Thu, 6 Mar 2025 21:41:04 +0100 Subject: [PATCH] tests: use adaptive number of threads --- tests/test-backend-ops.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index b4e3631ed081a..4fe20a2e84666 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -36,12 +36,18 @@ #include #include +static size_t get_n_threads(const int64_t ne) { + const size_t max_threads_hw = std::max(std::thread::hardware_concurrency()/2, (unsigned int)1); + const size_t max_threads_ne = (ne + 1024 - 1) / 1024; + return std::min(max_threads_hw, max_threads_ne); +} + static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { size_t nels = ggml_nelements(tensor); std::vector data(nels); { // parallel initialization - static const size_t n_threads = std::thread::hardware_concurrency(); + static const size_t n_threads = get_n_threads(ggml_nelements(tensor)); // static RNG initialization (revisit if n_threads stops being constant) static std::vector generators = []() { std::random_device rd; @@ -100,7 +106,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m }; const size_t min_blocks_per_thread = 1; - const size_t n_threads = std::min(std::thread::hardware_concurrency()/2, + const size_t n_threads = std::min(get_n_threads(ggml_nelements(tensor)), std::max(1, n_blocks / min_blocks_per_thread)); std::vector> tasks; tasks.reserve(n_threads);