Skip to content

Commit f2e6d78

Browse files
authored
Merge pull request #5975 from ICLDisco/export/pmix-fini-threadinterlock
Avoid a double lock interlock when calling pmix_finalize
2 parents 6b6b153 + 50cf707 commit f2e6d78

File tree

6 files changed

+84
-13
lines changed

6 files changed

+84
-13
lines changed

opal/mca/pmix/ext2x/ext2x_client.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
99
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
1010
* reserved.
11+
* Copyright (c) 2018 The University of Tennessee and The University
12+
* of Tennessee Research Foundation. All rights
13+
* reserved.
1114
* $COPYRIGHT$
1215
*
1316
* Additional copyrights may follow
@@ -166,6 +169,8 @@ int ext2x_client_finalize(void)
166169
{
167170
pmix_status_t rc;
168171
opal_ext2x_event_t *event, *ev2;
172+
opal_list_t evlist;
173+
OBJ_CONSTRUCT(&evlist, opal_list_t);
169174

170175
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
171176
"PMIx_client finalize");
@@ -179,12 +184,19 @@ int ext2x_client_finalize(void)
179184
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
180185
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
181186
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
182-
OPAL_PMIX_WAIT_THREAD(&event->lock);
183187
opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super);
184-
OBJ_RELEASE(event);
188+
/* wait and release outside the loop to avoid double mutex
189+
* interlock */
190+
opal_list_append(&evlist, &event->super);
185191
}
186192
}
187193
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
194+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_ext2x_event_t) {
195+
OPAL_PMIX_WAIT_THREAD(&event->lock);
196+
opal_list_remove_item(&evlist, &event->super);
197+
OBJ_RELEASE(event);
198+
}
199+
OBJ_DESTRUCT(&evlist);
188200
rc = PMIx_Finalize(NULL, 0);
189201

190202
return ext2x_convert_rc(rc);

opal/mca/pmix/ext2x/ext2x_server_south.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
1010
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
1111
* reserved.
12+
* Copyright (c) 2018 The University of Tennessee and The University
13+
* of Tennessee Research Foundation. All rights
14+
* reserved.
1215
* $COPYRIGHT$
1316
*
1417
* Additional copyrights may follow
@@ -181,6 +184,8 @@ int ext2x_server_finalize(void)
181184
{
182185
pmix_status_t rc;
183186
opal_ext2x_event_t *event, *ev2;
187+
opal_list_t evlist;
188+
OBJ_CONSTRUCT(&evlist, opal_list_t);
184189

185190
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
186191
--opal_pmix_base.initialized;
@@ -191,13 +196,19 @@ int ext2x_server_finalize(void)
191196
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
192197
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
193198
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
194-
OPAL_PMIX_WAIT_THREAD(&event->lock);
195199
opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super);
196-
OBJ_RELEASE(event);
200+
/* wait and release outside the loop to avoid double mutex
201+
* interlock */
202+
opal_list_append(&evlist, &event->super);
197203
}
198204
}
199205
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
200-
206+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_ext2x_event_t) {
207+
OPAL_PMIX_WAIT_THREAD(&event->lock);
208+
opal_list_remove_item(&evlist, &event->super);
209+
OBJ_RELEASE(event);
210+
}
211+
OBJ_DESTRUCT(&evlist);
201212
rc = PMIx_server_finalize();
202213
return ext2x_convert_rc(rc);
203214
}

opal/mca/pmix/ext3x/ext3x_client.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
99
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
1010
* reserved.
11+
* Copyright (c) 2018 The University of Tennessee and The University
12+
* of Tennessee Research Foundation. All rights
13+
* reserved.
1114
* $COPYRIGHT$
1215
*
1316
* Additional copyrights may follow
@@ -170,6 +173,8 @@ int ext3x_client_finalize(void)
170173
{
171174
pmix_status_t rc;
172175
opal_ext3x_event_t *event, *ev2;
176+
opal_list_t evlist;
177+
OBJ_CONSTRUCT(&evlist, opal_list_t);
173178

174179
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
175180
"PMIx_client finalize");
@@ -183,12 +188,19 @@ int ext3x_client_finalize(void)
183188
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
184189
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
185190
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
186-
OPAL_PMIX_WAIT_THREAD(&event->lock);
187191
opal_list_remove_item(&mca_pmix_ext3x_component.events, &event->super);
188-
OBJ_RELEASE(event);
192+
/* wait and release outside the loop to avoid double mutex
193+
* interlock */
194+
opal_list_append(&evlist, &event->super);
189195
}
190196
}
191197
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
198+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_ext3x_event_t) {
199+
OPAL_PMIX_WAIT_THREAD(&event->lock);
200+
opal_list_remove_item(&evlist, &event->super);
201+
OBJ_RELEASE(event);
202+
}
203+
OBJ_DESTRUCT(&evlist);
192204
rc = PMIx_Finalize(NULL, 0);
193205

194206
return ext3x_convert_rc(rc);

opal/mca/pmix/ext3x/ext3x_server_south.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
1010
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
1111
* reserved.
12+
* Copyright (c) 2018 The University of Tennessee and The University
13+
* of Tennessee Research Foundation. All rights
14+
* reserved.
1215
* $COPYRIGHT$
1316
*
1417
* Additional copyrights may follow
@@ -187,6 +190,8 @@ int ext3x_server_finalize(void)
187190
{
188191
pmix_status_t rc;
189192
opal_ext3x_event_t *event, *ev2;
193+
opal_list_t evlist;
194+
OBJ_CONSTRUCT(&evlist, opal_list_t);
190195

191196
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
192197
--opal_pmix_base.initialized;
@@ -197,12 +202,19 @@ int ext3x_server_finalize(void)
197202
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
198203
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
199204
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
200-
OPAL_PMIX_WAIT_THREAD(&event->lock);
201205
opal_list_remove_item(&mca_pmix_ext3x_component.events, &event->super);
202-
OBJ_RELEASE(event);
206+
/* wait and release outside the loop to avoid double mutex
207+
* interlock */
208+
opal_list_append(&evlist, &event->super);
203209
}
204210
}
205211
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
212+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_ext3x_event_t) {
213+
OPAL_PMIX_WAIT_THREAD(&event->lock);
214+
opal_list_remove_item(&evlist, &event->super);
215+
OBJ_RELEASE(event);
216+
}
217+
OBJ_DESTRUCT(&evlist);
206218
rc = PMIx_server_finalize();
207219
return ext3x_convert_rc(rc);
208220
}

opal/mca/pmix/pmix4x/pmix4x_client.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
99
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
1010
* reserved.
11+
* Copyright (c) 2017-2018 The University of Tennessee and The University
12+
* of Tennessee Research Foundation. All rights
13+
* reserved.
1114
* $COPYRIGHT$
1215
*
1316
* Additional copyrights may follow
@@ -169,6 +172,8 @@ int pmix4x_client_finalize(void)
169172
{
170173
pmix_status_t rc;
171174
opal_pmix4x_event_t *event, *ev2;
175+
opal_list_t evlist;
176+
OBJ_CONSTRUCT(&evlist, opal_list_t);
172177

173178
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
174179
"PMIx_client finalize");
@@ -182,12 +187,19 @@ int pmix4x_client_finalize(void)
182187
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
183188
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
184189
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
185-
OPAL_PMIX_WAIT_THREAD(&event->lock);
186190
opal_list_remove_item(&mca_pmix_pmix4x_component.events, &event->super);
187-
OBJ_RELEASE(event);
191+
/* wait and release outside the loop to avoid double mutex
192+
* interlock */
193+
opal_list_append(&evlist, &event->super);
188194
}
189195
}
190196
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
197+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_pmix4x_event_t) {
198+
OPAL_PMIX_WAIT_THREAD(&event->lock);
199+
opal_list_remove_item(&evlist, &event->super);
200+
OBJ_RELEASE(event);
201+
}
202+
OBJ_DESTRUCT(&evlist);
191203
rc = PMIx_Finalize(NULL, 0);
192204

193205
return pmix4x_convert_rc(rc);

opal/mca/pmix/pmix4x/pmix4x_server_south.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
1010
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
1111
* reserved.
12+
* Copyright (c) 2017-2018 The University of Tennessee and The University
13+
* of Tennessee Research Foundation. All rights
14+
* reserved.
1215
* $COPYRIGHT$
1316
*
1417
* Additional copyrights may follow
@@ -186,6 +189,8 @@ int pmix4x_server_finalize(void)
186189
{
187190
pmix_status_t rc;
188191
opal_pmix4x_event_t *event, *ev2;
192+
opal_list_t evlist;
193+
OBJ_CONSTRUCT(&evlist, opal_list_t);
189194

190195
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
191196
--opal_pmix_base.initialized;
@@ -196,12 +201,19 @@ int pmix4x_server_finalize(void)
196201
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
197202
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
198203
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
199-
OPAL_PMIX_WAIT_THREAD(&event->lock);
200204
opal_list_remove_item(&mca_pmix_pmix4x_component.events, &event->super);
201-
OBJ_RELEASE(event);
205+
/* wait and release outside the loop to avoid double mutex
206+
* interlock */
207+
opal_list_append(&evlist, &event->super);
202208
}
203209
}
204210
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
211+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_pmix4x_event_t) {
212+
OPAL_PMIX_WAIT_THREAD(&event->lock);
213+
opal_list_remove_item(&evlist, &event->super);
214+
OBJ_RELEASE(event);
215+
}
216+
OBJ_DESTRUCT(&evlist);
205217
rc = PMIx_server_finalize();
206218
return pmix4x_convert_rc(rc);
207219
}

0 commit comments

Comments
 (0)