Skip to content

Commit e572aee

Browse files
bosilcajsquyres
authored andcommitted
Use module names in HAN.
Improve the module selection for the up and low collective modules to allow the, more user-friendly, use of the module name in addition to module number. This is a partial fix for #10438 Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent 26986f0 commit e572aee

File tree

2 files changed

+135
-61
lines changed

2 files changed

+135
-61
lines changed

ompi/mca/coll/han/coll_han.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,27 @@ struct mca_coll_han_allgather_s {
169169
};
170170
typedef struct mca_coll_han_allgather_s mca_coll_han_allgather_t;
171171

172+
typedef struct mca_coll_han_op_up_low_module_name_t {
173+
char* han_op_up_module_name;
174+
char* han_op_low_module_name;
175+
} mca_coll_han_op_up_low_module_name_t;
176+
177+
/**
178+
* The only reason we need to keep these around is because our MCA system does
179+
* not support MCA variables that do not point to existing variables (aka. where
180+
* mbv_storage does not exists until the completion of the application). Thus,
181+
* we need to keep track of the storage for all variables, even the ones we
182+
* only use to translated into a string.
183+
*/
184+
typedef struct mca_coll_han_op_module_name_t {
185+
mca_coll_han_op_up_low_module_name_t bcast;
186+
mca_coll_han_op_up_low_module_name_t reduce;
187+
mca_coll_han_op_up_low_module_name_t allreduce;
188+
mca_coll_han_op_up_low_module_name_t allgather;
189+
mca_coll_han_op_up_low_module_name_t gather;
190+
mca_coll_han_op_up_low_module_name_t scatter;
191+
} mca_coll_han_op_module_name_t;
192+
172193
/**
173194
* Structure to hold the han coll component. First it holds the
174195
* base coll component, and then holds a bunch of
@@ -213,6 +234,8 @@ typedef struct mca_coll_han_component_t {
213234
uint32_t han_scatter_up_module;
214235
/* low level module for scatter */
215236
uint32_t han_scatter_low_module;
237+
/* name of the modules */
238+
mca_coll_han_op_module_name_t han_op_module_name;
216239
/* whether we need reproducible results
217240
* (but disables topological optimisations)
218241
*/

ompi/mca/coll/han/coll_han_component.c

Lines changed: 112 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,43 @@ static int han_open(void)
101101
return mca_coll_han_init_dynamic_rules();
102102
}
103103

104-
105104
/*
106105
* Shut down the component
107106
*/
108107
static int han_close(void)
109108
{
110109
mca_coll_han_free_dynamic_rules();
110+
111+
free(mca_coll_han_component.han_op_module_name.bcast.han_op_up_module_name);
112+
mca_coll_han_component.han_op_module_name.bcast.han_op_up_module_name = NULL;
113+
free(mca_coll_han_component.han_op_module_name.bcast.han_op_low_module_name);
114+
mca_coll_han_component.han_op_module_name.bcast.han_op_low_module_name = NULL;
115+
116+
free(mca_coll_han_component.han_op_module_name.reduce.han_op_up_module_name);
117+
mca_coll_han_component.han_op_module_name.reduce.han_op_up_module_name = NULL;
118+
free(mca_coll_han_component.han_op_module_name.reduce.han_op_low_module_name);
119+
mca_coll_han_component.han_op_module_name.reduce.han_op_low_module_name = NULL;
120+
121+
free(mca_coll_han_component.han_op_module_name.allreduce.han_op_up_module_name);
122+
mca_coll_han_component.han_op_module_name.allreduce.han_op_up_module_name = NULL;
123+
free(mca_coll_han_component.han_op_module_name.allreduce.han_op_low_module_name);
124+
mca_coll_han_component.han_op_module_name.allreduce.han_op_low_module_name = NULL;
125+
126+
free(mca_coll_han_component.han_op_module_name.allgather.han_op_up_module_name);
127+
mca_coll_han_component.han_op_module_name.allgather.han_op_up_module_name = NULL;
128+
free(mca_coll_han_component.han_op_module_name.allgather.han_op_low_module_name);
129+
mca_coll_han_component.han_op_module_name.allgather.han_op_low_module_name = NULL;
130+
131+
free(mca_coll_han_component.han_op_module_name.gather.han_op_up_module_name);
132+
mca_coll_han_component.han_op_module_name.gather.han_op_up_module_name = NULL;
133+
free(mca_coll_han_component.han_op_module_name.gather.han_op_low_module_name);
134+
mca_coll_han_component.han_op_module_name.gather.han_op_low_module_name = NULL;
135+
136+
free(mca_coll_han_component.han_op_module_name.scatter.han_op_up_module_name);
137+
mca_coll_han_component.han_op_module_name.scatter.han_op_up_module_name = NULL;
138+
free(mca_coll_han_component.han_op_module_name.scatter.han_op_low_module_name);
139+
mca_coll_han_component.han_op_module_name.scatter.han_op_low_module_name = NULL;
140+
111141
return OMPI_SUCCESS;
112142
}
113143

@@ -147,6 +177,37 @@ const char* mca_coll_han_topo_lvl_to_str(TOPO_LVL_T topo_lvl)
147177
}
148178
}
149179

180+
static int
181+
mca_coll_han_query_module_from_mca(mca_base_component_t* c,
182+
const char* param_name,
183+
const char* param_doc,
184+
int info_level,
185+
uint32_t* module_id,
186+
char** storage)
187+
{
188+
char *module_name, *endptr = NULL;
189+
190+
int mod_id = COMPONENTS_COUNT;
191+
mod_id = (*module_id > (uint32_t)mod_id) ? mod_id : (int)*module_id; /* stay in range */
192+
mod_id = (mod_id < 0) ? 0 : mod_id; /* in range */
193+
194+
*storage = available_components[mod_id].component_name;
195+
196+
(void) mca_base_component_var_register(c, param_name, param_doc,
197+
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
198+
info_level,
199+
MCA_BASE_VAR_SCOPE_READONLY, storage);
200+
module_name = *storage;
201+
mod_id = strtol(module_name, &endptr, 10);
202+
if( module_name == endptr ) { /* no conversion, maybe we got a module name instead */
203+
/* Convert module name to id */
204+
mod_id = mca_coll_han_component_name_to_id(module_name);
205+
}
206+
/* Keep the module in the range */
207+
*module_id = (mod_id < 0) ? 0 : mod_id;
208+
209+
return OMPI_SUCCESS;
210+
}
150211

151212
/*
152213
* Register MCA params
@@ -177,18 +238,17 @@ static int han_register(void)
177238
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_segsize);
178239

179240
cs->han_bcast_up_module = 0;
180-
(void) mca_base_component_var_register(c, "bcast_up_module",
181-
"up level module for bcast, 0 libnbc, 1 adapt",
182-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
183-
OPAL_INFO_LVL_9,
184-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_up_module);
241+
(void) mca_coll_han_query_module_from_mca(c, "bcast_up_module",
242+
"up level module for bcast, 0 libnbc, 1 adapt",
243+
OPAL_INFO_LVL_9, &cs->han_bcast_up_module,
244+
&cs->han_op_module_name.bcast.han_op_up_module_name);
185245

186246
cs->han_bcast_low_module = 0;
187-
(void) mca_base_component_var_register(c, "bcast_low_module",
188-
"low level module for bcast, 0 tuned, 1 sm",
189-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
190-
OPAL_INFO_LVL_9,
191-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_low_module);
247+
(void) mca_coll_han_query_module_from_mca(c, "bcast_low_module",
248+
"low level module for bcast, 0 tuned, 1 sm",
249+
OPAL_INFO_LVL_9,
250+
&cs->han_bcast_low_module,
251+
&cs->han_op_module_name.bcast.han_op_low_module_name);
192252

193253
cs->han_reduce_segsize = 65536;
194254
(void) mca_base_component_var_register(c, "reduce_segsize",
@@ -198,18 +258,17 @@ static int han_register(void)
198258
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_segsize);
199259

200260
cs->han_reduce_up_module = 0;
201-
(void) mca_base_component_var_register(c, "reduce_up_module",
202-
"up level module for allreduce, 0 libnbc, 1 adapt",
203-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
204-
OPAL_INFO_LVL_9,
205-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_up_module);
261+
(void) mca_coll_han_query_module_from_mca(c, "reduce_up_module",
262+
"up level module for allreduce, 0 libnbc, 1 adapt",
263+
OPAL_INFO_LVL_9, &cs->han_reduce_up_module,
264+
&cs->han_op_module_name.reduce.han_op_up_module_name);
206265

207266
cs->han_reduce_low_module = 0;
208-
(void) mca_base_component_var_register(c, "reduce_low_module",
209-
"low level module for allreduce, 0 tuned, 1 sm",
210-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
211-
OPAL_INFO_LVL_9,
212-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_low_module);
267+
(void) mca_coll_han_query_module_from_mca(c, "reduce_low_module",
268+
"low level module for allreduce, 0 tuned, 1 sm",
269+
OPAL_INFO_LVL_9, &cs->han_reduce_low_module,
270+
&cs->han_op_module_name.reduce.han_op_low_module_name);
271+
213272
cs->han_allreduce_segsize = 65536;
214273
(void) mca_base_component_var_register(c, "allreduce_segsize",
215274
"segment size for allreduce",
@@ -218,60 +277,52 @@ static int han_register(void)
218277
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_segsize);
219278

220279
cs->han_allreduce_up_module = 0;
221-
(void) mca_base_component_var_register(c, "allreduce_up_module",
222-
"up level module for allreduce, 0 libnbc, 1 adapt",
223-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
224-
OPAL_INFO_LVL_9,
225-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_up_module);
280+
(void) mca_coll_han_query_module_from_mca(c, "allreduce_up_module",
281+
"up level module for allreduce, 0 libnbc, 1 adapt",
282+
OPAL_INFO_LVL_9, &cs->han_allreduce_up_module,
283+
&cs->han_op_module_name.allreduce.han_op_up_module_name);
226284

227285
cs->han_allreduce_low_module = 0;
228-
(void) mca_base_component_var_register(c, "allreduce_low_module",
229-
"low level module for allreduce, 0 tuned, 1 sm",
230-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
231-
OPAL_INFO_LVL_9,
232-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_low_module);
286+
(void) mca_coll_han_query_module_from_mca(c, "allreduce_low_module",
287+
"low level module for allreduce, 0 tuned, 1 sm",
288+
OPAL_INFO_LVL_9, &cs->han_allreduce_low_module,
289+
&cs->han_op_module_name.allreduce.han_op_low_module_name);
233290

234291
cs->han_allgather_up_module = 0;
235-
(void) mca_base_component_var_register(c, "allgather_up_module",
236-
"up level module for allgather, 0 libnbc, 1 adapt",
237-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
238-
OPAL_INFO_LVL_9,
239-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allgather_up_module);
292+
(void) mca_coll_han_query_module_from_mca(c, "allgather_up_module",
293+
"up level module for allgather, 0 libnbc, 1 adapt",
294+
OPAL_INFO_LVL_9, &cs->han_allgather_up_module,
295+
&cs->han_op_module_name.allgather.han_op_up_module_name);
240296

241297
cs->han_allgather_low_module = 0;
242-
(void) mca_base_component_var_register(c, "allgather_low_module",
243-
"low level module for allgather, 0 tuned, 1 sm",
244-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
245-
OPAL_INFO_LVL_9,
246-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allgather_low_module);
298+
(void) mca_coll_han_query_module_from_mca(c, "allgather_low_module",
299+
"low level module for allgather, 0 tuned, 1 sm",
300+
OPAL_INFO_LVL_9, &cs->han_allgather_low_module,
301+
&cs->han_op_module_name.allgather.han_op_low_module_name);
247302

248303
cs->han_gather_up_module = 0;
249-
(void) mca_base_component_var_register(c, "gather_up_module",
250-
"up level module for gather, 0 libnbc, 1 adapt",
251-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
252-
OPAL_INFO_LVL_9,
253-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_up_module);
304+
(void) mca_coll_han_query_module_from_mca(c, "gather_up_module",
305+
"up level module for gather, 0 libnbc, 1 adapt",
306+
OPAL_INFO_LVL_9, &cs->han_gather_up_module,
307+
&cs->han_op_module_name.gather.han_op_up_module_name);
254308

255309
cs->han_gather_low_module = 0;
256-
(void) mca_base_component_var_register(c, "gather_low_module",
257-
"low level module for gather, 0 tuned, 1 sm",
258-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
259-
OPAL_INFO_LVL_9,
260-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_low_module);
310+
(void) mca_coll_han_query_module_from_mca(c, "gather_low_module",
311+
"low level module for gather, 0 tuned, 1 sm",
312+
OPAL_INFO_LVL_9, &cs->han_gather_low_module,
313+
&cs->han_op_module_name.gather.han_op_low_module_name);
261314

262315
cs->han_scatter_up_module = 0;
263-
(void) mca_base_component_var_register(c, "scatter_up_module",
264-
"up level module for scatter, 0 libnbc, 1 adapt",
265-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
266-
OPAL_INFO_LVL_9,
267-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_up_module);
316+
(void) mca_coll_han_query_module_from_mca(c, "scatter_up_module",
317+
"up level module for scatter, 0 libnbc, 1 adapt",
318+
OPAL_INFO_LVL_9, &cs->han_scatter_up_module,
319+
&cs->han_op_module_name.scatter.han_op_up_module_name);
268320

269321
cs->han_scatter_low_module = 0;
270-
(void) mca_base_component_var_register(c, "scatter_low_module",
271-
"low level module for scatter, 0 tuned, 1 sm",
272-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
273-
OPAL_INFO_LVL_9,
274-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_low_module);
322+
(void) mca_coll_han_query_module_from_mca(c, "scatter_low_module",
323+
"low level module for scatter, 0 tuned, 1 sm",
324+
OPAL_INFO_LVL_9, &cs->han_scatter_low_module,
325+
&cs->han_op_module_name.scatter.han_op_low_module_name);
275326

276327
cs->han_reproducible = 0;
277328
(void) mca_base_component_var_register(c, "reproducible",

0 commit comments

Comments
 (0)