1
+ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
1
2
/*
2
3
* Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED.
3
4
* Copyright (c) 2019 Intel, Inc. All rights reserved.
4
5
* Copyright (c) 2019 Research Organization for Information Science
5
6
* and Technology (RIST). All rights reserved.
6
7
* Copyright (c) 2021 Triad National Security, LLC. All rights
7
8
* reserved.
9
+ * Copyright (c) 2022 Google, LLC. All rights reserved.
8
10
*
9
11
* $COPYRIGHT$
10
12
*
@@ -33,11 +35,11 @@ static int use_safety_valve = 0;
33
35
34
36
extern mca_base_framework_t opal_memory_base_framework ;
35
37
36
- opal_common_ucx_module_t opal_common_ucx = {. verbose = 0 ,
37
- . progress_iterations = 100 ,
38
- . registered = 0 ,
39
- .opal_mem_hooks = 1 ,
40
- . tls = NULL };
38
+ opal_common_ucx_module_t opal_common_ucx =
39
+ {
40
+ . progress_iterations = 100 ,
41
+ .opal_mem_hooks = 1 ,
42
+ };
41
43
42
44
static opal_mutex_t opal_common_ucx_mutex = OPAL_MUTEX_STATIC_INIT ;
43
45
@@ -48,87 +50,59 @@ static void opal_common_ucx_mem_release_cb(void *buf, size_t length, void *cbdat
48
50
49
51
OPAL_DECLSPEC void opal_common_ucx_mca_var_register (const mca_base_component_t * component )
50
52
{
51
- static const char * default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc" ;
52
- static const char * default_devices = "mlx*" ;
53
- static int hook_index ;
54
- static int verbose_index ;
55
- static int progress_index ;
56
- static int tls_index ;
57
- static int devices_index ;
58
- int param ;
53
+ char * default_tls = "rc_verbs,ud_verbs,rc_mlx5,dc_mlx5,ud_mlx5,cuda_ipc,rocm_ipc" ;
54
+ char * default_devices = "mlx*" ;
55
+ int hook_index ;
56
+ int verbose_index ;
57
+ int progress_index ;
58
+ int tls_index ;
59
+ int devices_index ;
59
60
60
61
OPAL_THREAD_LOCK (& opal_common_ucx_mutex );
61
62
62
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "verbose" );
63
- if (0 > param ) {
64
- verbose_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "verbose" ,
65
- "Verbose level of the UCX components" ,
66
- MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
67
- MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
68
- MCA_BASE_VAR_SCOPE_LOCAL , & opal_common_ucx .verbose );
69
- }
70
-
71
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "progress_iterations" );
72
- if (0 > param ) {
73
- progress_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "progress_iterations" ,
74
- "Set number of calls of internal UCX progress "
75
- "calls per opal_progress call" ,
76
- MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
77
- MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
78
- MCA_BASE_VAR_SCOPE_LOCAL ,
79
- & opal_common_ucx .progress_iterations );
80
- }
81
-
82
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "opal_mem_hooks" );
83
- if (0 > param ) {
84
- hook_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "opal_mem_hooks" ,
85
- "Use OPAL memory hooks, instead of UCX internal "
86
- "memory hooks" ,
87
- MCA_BASE_VAR_TYPE_BOOL , NULL , 0 , 0 , OPAL_INFO_LVL_3 ,
63
+ /* It is harmless to re-register variables so go ahead an re-register. */
64
+ verbose_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "verbose" ,
65
+ "Verbose level of the UCX components" ,
66
+ MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
67
+ MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
68
+ MCA_BASE_VAR_SCOPE_LOCAL , & opal_common_ucx .verbose );
69
+ progress_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "progress_iterations" ,
70
+ "Set number of calls of internal UCX progress "
71
+ "calls per opal_progress call" ,
72
+ MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
73
+ MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
88
74
MCA_BASE_VAR_SCOPE_LOCAL ,
89
- & opal_common_ucx .opal_mem_hooks );
75
+ & opal_common_ucx .progress_iterations );
76
+ hook_index = mca_base_var_register ("opal" , "opal_common" , "ucx" , "opal_mem_hooks" ,
77
+ "Use OPAL memory hooks, instead of UCX internal "
78
+ "memory hooks" ,
79
+ MCA_BASE_VAR_TYPE_BOOL , NULL , 0 , 0 , OPAL_INFO_LVL_3 ,
80
+ MCA_BASE_VAR_SCOPE_LOCAL ,
81
+ & opal_common_ucx .opal_mem_hooks );
82
+
83
+ if (NULL == opal_common_ucx .tls ) {
84
+ opal_common_ucx .tls = default_tls ;
90
85
}
91
86
92
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "tls" );
93
- if (0 > param ) {
94
-
95
- /*
96
- * this monkey business is needed because of the way the MCA VARs framework tries to handle pointers to strings
97
- * when destructing the MCA var database. If you don't do something like this,the MCA var framework will try
98
- * to dereference a pointer which itself is no longer a valid address owing to having been previously dlclosed.
99
- * Same for the devices pointer below.
100
- */
101
- if (NULL == opal_common_ucx .tls ) {
102
- opal_common_ucx .tls = malloc (sizeof (* opal_common_ucx .tls ));
103
- assert (NULL != opal_common_ucx .tls );
104
- }
105
- * opal_common_ucx .tls = strdup (default_tls );
106
- tls_index = mca_base_var_register (
107
- "opal" , "opal_common" , "ucx" , "tls" ,
108
- "List of UCX transports which should be supported on the system, to enable "
109
- "selecting the UCX component. Special values: any (any available). "
110
- "A '^' prefix negates the list. "
111
- "For example, in order to exclude on shared memory and TCP transports, "
112
- "please set to '^posix,sysv,self,tcp,cma,knem,xpmem'." ,
113
- MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 , OPAL_INFO_LVL_3 , MCA_BASE_VAR_SCOPE_LOCAL ,
114
- opal_common_ucx .tls );
115
- }
116
-
117
- param = mca_base_var_find ("opal" , "opal_common" , "ucx" , "devices" );
118
- if (0 > param ) {
119
-
120
- if (NULL == opal_common_ucx .devices ) {
121
- opal_common_ucx .devices = malloc (sizeof (* opal_common_ucx .devices ));
122
- assert (NULL != opal_common_ucx .devices );
123
- }
124
- * opal_common_ucx .devices = strdup (default_devices );
125
- devices_index = mca_base_var_register (
126
- "opal" , "opal_common" , "ucx" , "devices" ,
127
- "List of device driver pattern names, which, if supported by UCX, will "
128
- "bump its priority above ob1. Special values: any (any available)" ,
129
- MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 , OPAL_INFO_LVL_3 , MCA_BASE_VAR_SCOPE_LOCAL ,
130
- opal_common_ucx .devices );
87
+ tls_index = mca_base_var_register (
88
+ "opal" , "opal_common" , "ucx" , "tls" ,
89
+ "List of UCX transports which should be supported on the system, to enable "
90
+ "selecting the UCX component. Special values: any (any available). "
91
+ "A '^' prefix negates the list. "
92
+ "For example, in order to exclude on shared memory and TCP transports, "
93
+ "please set to '^posix,sysv,self,tcp,cma,knem,xpmem'." ,
94
+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 , OPAL_INFO_LVL_3 , MCA_BASE_VAR_SCOPE_LOCAL ,
95
+ & opal_common_ucx .tls );
96
+
97
+ if (NULL == opal_common_ucx .devices ) {
98
+ opal_common_ucx .devices = default_devices ;
131
99
}
100
+ devices_index = mca_base_var_register (
101
+ "opal" , "opal_common" , "ucx" , "devices" ,
102
+ "List of device driver pattern names, which, if supported by UCX, will "
103
+ "bump its priority above ob1. Special values: any (any available)" ,
104
+ MCA_BASE_VAR_TYPE_STRING , NULL , 0 , 0 , OPAL_INFO_LVL_3 , MCA_BASE_VAR_SCOPE_LOCAL ,
105
+ & opal_common_ucx .devices );
132
106
133
107
if (component ) {
134
108
mca_base_var_register_synonym (verbose_index , component -> mca_project_name ,
@@ -261,8 +235,8 @@ OPAL_DECLSPEC opal_common_ucx_support_level_t opal_common_ucx_support_level(ucp_
261
235
int ret ;
262
236
#endif
263
237
264
- is_any_tl = !strcmp (* opal_common_ucx .tls , "any" );
265
- is_any_device = !strcmp (* opal_common_ucx .devices , "any" );
238
+ is_any_tl = !strcmp (opal_common_ucx .tls , "any" );
239
+ is_any_device = !strcmp (opal_common_ucx .devices , "any" );
266
240
267
241
/* Check for special value "any" */
268
242
if (is_any_tl && is_any_device ) {
@@ -273,19 +247,19 @@ OPAL_DECLSPEC opal_common_ucx_support_level_t opal_common_ucx_support_level(ucp_
273
247
274
248
#if HAVE_DECL_OPEN_MEMSTREAM
275
249
/* Split transports list */
276
- negate = ('^' == (* opal_common_ucx .tls )[0 ]);
277
- tl_list = opal_argv_split (* opal_common_ucx .tls + (negate ? 1 : 0 ), ',' );
250
+ negate = ('^' == (opal_common_ucx .tls )[0 ]);
251
+ tl_list = opal_argv_split (opal_common_ucx .tls + (negate ? 1 : 0 ), ',' );
278
252
if (tl_list == NULL ) {
279
253
MCA_COMMON_UCX_VERBOSE (1 , "failed to split tl list '%s', ucx is disabled" ,
280
- * opal_common_ucx .tls );
254
+ opal_common_ucx .tls );
281
255
goto out ;
282
256
}
283
257
284
258
/* Split devices list */
285
- device_list = opal_argv_split (* opal_common_ucx .devices , ',' );
259
+ device_list = opal_argv_split (opal_common_ucx .devices , ',' );
286
260
if (device_list == NULL ) {
287
261
MCA_COMMON_UCX_VERBOSE (1 , "failed to split devices list '%s', ucx is disabled" ,
288
- * opal_common_ucx .devices );
262
+ opal_common_ucx .devices );
289
263
goto out_free_tl_list ;
290
264
}
291
265
0 commit comments