Skip to content

Commit 0455d39

Browse files
committed
OFI/common: fixes for issue 8860
This patch fixes ofi common so that it works correctly in cases where the mca var base for ofi components (btl, mtl) are loaded/unloaded multiple times. Related to issue #8860 Signed-off-by: Howard Pritchard <hppritcha@gmail.com>
1 parent 0bd8ccc commit 0455d39

File tree

1 file changed

+27
-7
lines changed

1 file changed

+27
-7
lines changed

opal/mca/common/ofi/common_ofi.c

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
33
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
44
* reserved.
5-
* Copyright (c) 2020 Triad National Security, LLC. All rights
5+
* Copyright (c) 2020-2021 Triad National Security, LLC. All rights
66
* reserved.
77
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
88
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
@@ -32,6 +32,7 @@ OPAL_DECLSPEC opal_common_ofi_module_t opal_common_ofi = {.prov_include = NULL,
3232
.verbose = 0};
3333

3434
static const char default_prov_exclude_list[] = "shm,sockets,tcp,udp,rstream";
35+
static opal_mutex_t opal_common_ofi_mutex = OPAL_MUTEX_STATIC_INIT;
3536

3637
OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item)
3738
{
@@ -54,23 +55,29 @@ OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item)
5455

5556
OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_component_t *component)
5657
{
57-
static int registered = 0;
5858
static int include_index;
5959
static int exclude_index;
6060
static int verbose_index;
61+
int param;
6162

6263
if (fi_version() < FI_VERSION(1, 0)) {
6364
return OPAL_ERROR;
6465
}
6566

66-
if (!registered) {
67+
OPAL_THREAD_LOCK(&opal_common_ofi_mutex);
68+
69+
param = mca_base_var_find("opal", "opal_common", "ofi", "provider_incude");
70+
if (0 > param) {
6771
/*
6872
* this monkey business is needed because of the way the MCA VARs stuff tries to handle
6973
* pointers to strings when when destructing the MCA var database. If you don't do
7074
* something like this,the MCA var framework will try to dereference a pointer which itself
7175
* is no longer a valid address owing to having been previously dlclosed.
7276
*/
73-
opal_common_ofi.prov_include = (char **) malloc(sizeof(char *));
77+
if (NULL == opal_common_ofi.prov_include) {
78+
opal_common_ofi.prov_include = (char **) malloc(sizeof(char *));
79+
assert(NULL != opal_common_ofi.prov_include);
80+
}
7481
*opal_common_ofi.prov_include = NULL;
7582
include_index = mca_base_var_register(
7683
"opal", "opal_common", "ofi", "provider_include",
@@ -79,7 +86,14 @@ OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_componen
7986
"exclusive with mtl_ofi_provider_exclude.",
8087
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_1, MCA_BASE_VAR_SCOPE_READONLY,
8188
opal_common_ofi.prov_include);
82-
opal_common_ofi.prov_exclude = (char **) malloc(sizeof(char *));
89+
}
90+
91+
param = mca_base_var_find("opal", "opal_common", "ofi", "provider_exclude");
92+
if (0 > param) {
93+
if (NULL == opal_common_ofi.prov_exclude) {
94+
opal_common_ofi.prov_exclude = (char **) malloc(sizeof(char *));
95+
assert(NULL != opal_common_ofi.prov_exclude);
96+
}
8397
*opal_common_ofi.prov_exclude = strdup(default_prov_exclude_list);
8498
exclude_index = mca_base_var_register(
8599
"opal", "opal_common", "ofi", "provider_exclude",
@@ -88,12 +102,16 @@ OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_componen
88102
"exclusive with mtl_ofi_provider_include.",
89103
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_1, MCA_BASE_VAR_SCOPE_READONLY,
90104
opal_common_ofi.prov_exclude);
105+
}
106+
107+
param = mca_base_var_find("opal", "opal_common", "ofi", "verbose");
108+
if (0 > param) {
91109
verbose_index = mca_base_var_register("opal", "opal_common", "ofi", "verbose",
92110
"Verbose level of the OFI components",
93111
MCA_BASE_VAR_TYPE_INT, NULL, 0,
94112
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
95-
MCA_BASE_VAR_SCOPE_LOCAL, &opal_common_ofi.verbose);
96-
registered = 1;
113+
MCA_BASE_VAR_SCOPE_LOCAL,
114+
&opal_common_ofi.verbose);
97115
}
98116

99117
if (component) {
@@ -108,6 +126,8 @@ OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_componen
108126
"verbose", 0);
109127
}
110128

129+
OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex);
130+
111131
return OPAL_SUCCESS;
112132
}
113133

0 commit comments

Comments
 (0)