Skip to content

Commit f43958b

Browse files
shiltiantstellar
authored andcommitted
[OpenMP] Fixed a crash in hidden helper thread
It is reported that after enabling hidden helper thread, the program can hit the assertion `new_gtid < __kmp_threads_capacity` sometimes. The root cause is explained as follows. Let's say the default `__kmp_threads_capacity` is `N`. If hidden helper thread is enabled, `__kmp_threads_capacity` will be offset to `N+8` by default. If the number of threads we need exceeds `N+8`, e.g. via `num_threads` clause, we need to expand `__kmp_threads`. In `__kmp_expand_threads`, the expansion starts from `__kmp_threads_capacity`, and repeatedly doubling it until the new capacity meets the requirement. Let's assume the new requirement is `Y`. If `Y` happens to meet the constraint `(N+8)*2^X=Y` where `X` is the number of iterations, the new capacity is not enough because we have 8 slots for hidden helper threads. Here is an example. ``` #include <vector> int main(int argc, char *argv[]) { constexpr const size_t N = 1344; std::vector<int> data(N); #pragma omp parallel for for (unsigned i = 0; i < N; ++i) { data[i] = i; } #pragma omp parallel for num_threads(N) for (unsigned i = 0; i < N; ++i) { data[i] += i; } return 0; } ``` My CPU is 20C40T, then `__kmp_threads_capacity` is 160. After offset, `__kmp_threads_capacity` becomes 168. `1344 = (160+8)*2^3`, then the assertions hit. Reviewed By: protze.joachim Differential Revision: https://reviews.llvm.org/D98838 (cherry picked from commit 2df65f8)
1 parent e94372d commit f43958b

File tree

4 files changed

+94
-4
lines changed

4 files changed

+94
-4
lines changed

openmp/runtime/src/kmp_runtime.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
920920
if (TCR_PTR(__kmp_threads[0]) == NULL) {
921921
--capacity;
922922
}
923+
// If it is not for initializing the hidden helper team, we need to take
924+
// __kmp_hidden_helper_threads_num out of the capacity because it is included
925+
// in __kmp_threads_capacity.
926+
if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
927+
capacity -= __kmp_hidden_helper_threads_num;
928+
}
923929
if (__kmp_nth + new_nthreads -
924930
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
925931
capacity) {
@@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) {
36323638
--capacity;
36333639
}
36343640

3641+
// If it is not for initializing the hidden helper team, we need to take
3642+
// __kmp_hidden_helper_threads_num out of the capacity because it is included
3643+
// in __kmp_threads_capacity.
3644+
if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3645+
capacity -= __kmp_hidden_helper_threads_num;
3646+
}
3647+
36353648
/* see if there are too many threads */
36363649
if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
36373650
if (__kmp_tp_cached) {
@@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) {
36643677
/* find an available thread slot */
36653678
// Don't reassign the zero slot since we need that to only be used by
36663679
// initial thread. Slots for hidden helper threads should also be skipped.
3667-
if (initial_thread && __kmp_threads[0] == NULL) {
3680+
if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
36683681
gtid = 0;
36693682
} else {
36703683
for (gtid = __kmp_hidden_helper_threads_num + 1;

openmp/runtime/src/kmp_settings.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) {
504504
nth = (4 * __kmp_xproc);
505505

506506
// If hidden helper task is enabled, we initialize the thread capacity with
507-
// extra
508-
// __kmp_hidden_helper_threads_num.
509-
nth += __kmp_hidden_helper_threads_num;
507+
// extra __kmp_hidden_helper_threads_num.
508+
if (__kmp_enable_hidden_helper) {
509+
nth += __kmp_hidden_helper_threads_num;
510+
}
510511

511512
if (nth > __kmp_max_nth)
512513
nth = __kmp_max_nth;
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// RUN: %libomp-cxx-compile-and-run
2+
3+
#include <omp.h>
4+
5+
#include <algorithm>
6+
#include <cassert>
7+
#include <chrono>
8+
#include <thread>
9+
#include <vector>
10+
11+
void dummy_root() {
12+
// omp_get_max_threads() will do middle initialization
13+
int nthreads = omp_get_max_threads();
14+
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
15+
}
16+
17+
int main(int argc, char *argv[]) {
18+
const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
19+
4 * omp_get_num_procs()),
20+
std::numeric_limits<int>::max());
21+
22+
std::vector<int> data(N);
23+
24+
// Create a new thread to initialize the OpenMP RTL. The new thread will not
25+
// be taken as the "initial thread".
26+
std::thread root(dummy_root);
27+
28+
#pragma omp parallel for num_threads(N)
29+
for (unsigned i = 0; i < N; ++i) {
30+
data[i] = i;
31+
}
32+
33+
#pragma omp parallel for num_threads(N + 1)
34+
for (unsigned i = 0; i < N; ++i) {
35+
data[i] += i;
36+
}
37+
38+
for (unsigned i = 0; i < N; ++i) {
39+
assert(data[i] == 2 * i);
40+
}
41+
42+
root.join();
43+
44+
return 0;
45+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: %libomp-cxx-compile-and-run
2+
3+
#include <omp.h>
4+
5+
#include <algorithm>
6+
#include <cassert>
7+
#include <vector>
8+
9+
int main(int argc, char *argv[]) {
10+
const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
11+
4 * omp_get_num_procs()),
12+
std::numeric_limits<int>::max());
13+
14+
std::vector<int> data(N);
15+
16+
#pragma omp parallel for num_threads(N)
17+
for (unsigned i = 0; i < N; ++i) {
18+
data[i] = i;
19+
}
20+
21+
#pragma omp parallel for num_threads(N + 1)
22+
for (unsigned i = 0; i < N; ++i) {
23+
data[i] += i;
24+
}
25+
26+
for (unsigned i = 0; i < N; ++i) {
27+
assert(data[i] == 2 * i);
28+
}
29+
30+
return 0;
31+
}

0 commit comments

Comments
 (0)