21
21
* and Technology (RIST). All rights reserved.
22
22
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
23
23
* Copyright (c) 2017 FUJITSU LIMITED. All rights reserved.
24
+ * Copyright (c) 2020 BULL S.A.S. All rights reserved.
24
25
* $COPYRIGHT$
25
26
*
26
27
* Additional copyrights may follow
37
38
#include "mpi.h"
38
39
#include "ompi/communicator/communicator.h"
39
40
#include "opal/util/output.h"
41
+ #include "opal/util/argv.h"
40
42
#include "opal/util/show_help.h"
41
43
#include "opal/class/opal_list.h"
42
44
#include "opal/class/opal_object.h"
43
45
#include "ompi/mca/mca.h"
44
46
#include "opal/mca/base/base.h"
45
47
#include "ompi/mca/coll/coll.h"
46
48
#include "ompi/mca/coll/base/base.h"
47
-
49
+ #include "ompi/mca/coll/base/coll_base_util.h"
48
50
49
51
/*
50
- * Local types
52
+ * Stuff for the OBJ interface
51
53
*/
52
- struct avail_coll_t {
53
- opal_list_item_t super ;
54
-
55
- int ac_priority ;
56
- mca_coll_base_module_2_3_0_t * ac_module ;
57
- const char * ac_component_name ;
58
- };
59
- typedef struct avail_coll_t avail_coll_t ;
60
-
54
+ OBJ_CLASS_INSTANCE (mca_coll_base_avail_coll_t , opal_list_item_t , NULL , NULL );
61
55
62
56
/*
63
57
* Local functions
@@ -77,12 +71,6 @@ static int query_2_0_0(const mca_coll_base_component_2_0_0_t *
77
71
int * priority ,
78
72
mca_coll_base_module_2_3_0_t * * module );
79
73
80
- /*
81
- * Stuff for the OBJ interface
82
- */
83
- static OBJ_CLASS_INSTANCE (avail_coll_t , opal_list_item_t , NULL, NULL) ;
84
-
85
-
86
74
#define COPY (module , comm , func ) \
87
75
do { \
88
76
if (NULL != module->coll_ ## func) { \
@@ -138,11 +126,14 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
138
126
/* FIX ME - Do some kind of collective operation to find a module
139
127
that everyone has available */
140
128
129
+ /* List to store every valid module */
130
+ comm -> c_coll -> module_list = OBJ_NEW (opal_list_t );
131
+
141
132
/* do the selection loop */
142
133
for (item = opal_list_remove_first (selectable );
143
134
NULL != item ; item = opal_list_remove_first (selectable )) {
144
135
145
- avail_coll_t * avail = (avail_coll_t * ) item ;
136
+ mca_coll_base_avail_coll_t * avail = (mca_coll_base_avail_coll_t * ) item ;
146
137
147
138
/* initialize the module */
148
139
ret = avail -> ac_module -> coll_module_enable (avail -> ac_module , comm );
@@ -153,6 +144,9 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
153
144
(OMPI_SUCCESS == ret ? "Enabled" : "Disabled" ) );
154
145
155
146
if (OMPI_SUCCESS == ret ) {
147
+ /* Save every component that is initialized,
148
+ * queried and enabled successfully */
149
+ opal_list_append (comm -> c_coll -> module_list , & avail -> super );
156
150
157
151
/* copy over any of the pointers */
158
152
COPY (avail -> ac_module , comm , allgather );
@@ -230,10 +224,11 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
230
224
COPY (avail -> ac_module , comm , neighbor_alltoallw_init );
231
225
232
226
COPY (avail -> ac_module , comm , reduce_local );
227
+ } else {
228
+ /* release the original module reference and the list item */
229
+ OBJ_RELEASE (avail -> ac_module );
230
+ OBJ_RELEASE (avail );
233
231
}
234
- /* release the original module reference and the list item */
235
- OBJ_RELEASE (avail -> ac_module );
236
- OBJ_RELEASE (avail );
237
232
}
238
233
239
234
/* Done with the list from the check_components() call so release it. */
@@ -306,8 +301,8 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm)
306
301
307
302
static int avail_coll_compare (opal_list_item_t * * a ,
308
303
opal_list_item_t * * b ) {
309
- avail_coll_t * acoll = (avail_coll_t * ) * a ;
310
- avail_coll_t * bcoll = (avail_coll_t * ) * b ;
304
+ mca_coll_base_avail_coll_t * acoll = (mca_coll_base_avail_coll_t * ) * a ;
305
+ mca_coll_base_avail_coll_t * bcoll = (mca_coll_base_avail_coll_t * ) * b ;
311
306
312
307
if (acoll -> ac_priority > bcoll -> ac_priority ) {
313
308
return 1 ;
@@ -318,6 +313,20 @@ static int avail_coll_compare (opal_list_item_t **a,
318
313
return 0 ;
319
314
}
320
315
316
+ static inline int
317
+ component_in_argv (char * * argv , const char * component_name )
318
+ {
319
+ if ( NULL != argv ) {
320
+ while ( NULL != * argv ) {
321
+ if ( 0 == strcmp (component_name , * argv ) ) {
322
+ return 1 ;
323
+ }
324
+ argv ++ ; /* move to the next argument */
325
+ }
326
+ }
327
+ return 0 ;
328
+ }
329
+
321
330
/*
322
331
* For each module in the list, check and see if it wants to run, and
323
332
* do the resulting priority comparison. Make a list of modules to be
@@ -327,25 +336,85 @@ static int avail_coll_compare (opal_list_item_t **a,
327
336
static opal_list_t * check_components (opal_list_t * components ,
328
337
ompi_communicator_t * comm )
329
338
{
330
- int priority ;
339
+ int priority , flag ;
331
340
const mca_base_component_t * component ;
332
341
mca_base_component_list_item_t * cli ;
333
342
mca_coll_base_module_2_3_0_t * module ;
334
343
opal_list_t * selectable ;
335
- avail_coll_t * avail ;
336
-
344
+ mca_coll_base_avail_coll_t * avail ;
345
+ char info_val [OPAL_MAX_INFO_VAL + 1 ];
346
+ char * * coll_argv = NULL , * * coll_exclude = NULL , * * coll_include = NULL ;
347
+
348
+ /* Check if this communicator comes with restrictions on the collective modules
349
+ * it wants to use. The restrictions are consistent with the MCA parameter
350
+ * to limit the collective components loaded, but it applies for each
351
+ * communicator and is provided as an info key during the communicator
352
+ * creation. Unlike the MCA param, this info key is used not to select
353
+ * components but either to prevent components from being used or to
354
+ * force a change in the component priority.
355
+ */
356
+ if ( NULL != comm -> super .s_info ) {
357
+ opal_info_get (comm -> super .s_info , "ompi_comm_coll_preference" ,
358
+ sizeof (info_val ), info_val , & flag );
359
+ if ( !flag ) {
360
+ goto proceed_to_select ;
361
+ }
362
+ coll_argv = opal_argv_split (info_val , ',' );
363
+ if (NULL == coll_argv ) {
364
+ goto proceed_to_select ;
365
+ }
366
+ int idx2 , count_include = opal_argv_count (coll_argv );
367
+ /* Allocate the coll_include argv */
368
+ coll_include = (char * * )malloc ((count_include + 1 ) * sizeof (char * ));
369
+ coll_include [count_include ] = NULL ; /* NULL terminated array */
370
+ /* Dispatch the include/exclude in the corresponding arrays */
371
+ for ( int idx = 0 ; NULL != coll_argv [idx ]; idx ++ ) {
372
+ if ( '^' == coll_argv [idx ][0 ] ) {
373
+ coll_include [idx ] = NULL ; /* NULL terminated array */
374
+
375
+ /* Allocate the coll_exclude argv */
376
+ coll_exclude = (char * * )malloc ((count_include - idx + 1 ) * sizeof (char * ));
377
+ /* save the exclude components */
378
+ for ( idx2 = idx ; NULL != coll_argv [idx2 ]; idx2 ++ ) {
379
+ coll_exclude [idx2 - idx ] = coll_argv [idx2 ];
380
+ }
381
+ coll_exclude [idx2 - idx ] = NULL ; /* NULL-terminated array */
382
+ coll_exclude [0 ] = coll_exclude [0 ] + 1 ; /* get rid of the ^ */
383
+ count_include = idx ;
384
+ break ;
385
+ }
386
+ coll_include [idx ] = coll_argv [idx ];
387
+ }
388
+ /* Reverse the order of the coll_inclide argv to faciliate the ordering of
389
+ * the selected components reverse.
390
+ */
391
+ for ( idx2 = 0 ; idx2 < (count_include - 1 ); idx2 ++ ) {
392
+ char * temp = coll_include [idx2 ];
393
+ coll_include [idx2 ] = coll_include [count_include - 1 ];
394
+ coll_include [count_include - 1 ] = temp ;
395
+ count_include -- ;
396
+ }
397
+ }
398
+ proceed_to_select :
337
399
/* Make a list of the components that query successfully */
338
400
selectable = OBJ_NEW (opal_list_t );
339
401
340
402
/* Scan through the list of components */
341
403
OPAL_LIST_FOREACH (cli , & ompi_coll_base_framework .framework_components , mca_base_component_list_item_t ) {
342
404
component = cli -> cli_component ;
343
405
406
+ /* dont bother is we have this component in the exclusion list */
407
+ if ( component_in_argv (coll_exclude , component -> mca_component_name ) ) {
408
+ opal_output_verbose (10 , ompi_coll_base_framework .framework_output ,
409
+ "coll:base:comm_select: component disqualified: %s (due to communicator info key)" ,
410
+ component -> mca_component_name );
411
+ continue ;
412
+ }
344
413
priority = check_one_component (comm , component , & module );
345
414
if (priority >= 0 ) {
346
415
/* We have a component that indicated that it wants to run
347
416
by giving us a module */
348
- avail = OBJ_NEW (avail_coll_t );
417
+ avail = OBJ_NEW (mca_coll_base_avail_coll_t );
349
418
avail -> ac_priority = priority ;
350
419
avail -> ac_module = module ;
351
420
// Point to the string so we don't have to free later
@@ -376,6 +445,27 @@ static opal_list_t *check_components(opal_list_t * components,
376
445
/* Put this list in priority order */
377
446
opal_list_sort (selectable , avail_coll_compare );
378
447
448
+ /* For all valid component reorder them not on their provided priorities but on
449
+ * the order requested in the info key. As at this point the coll_include is
450
+ * already ordered backward we can simply prepend the components.
451
+ */
452
+ mca_coll_base_avail_coll_t * item , * item_next ;
453
+ OPAL_LIST_FOREACH_SAFE (item , item_next ,
454
+ selectable , mca_coll_base_avail_coll_t ) {
455
+ if ( component_in_argv (coll_include , item -> ac_component_name ) ) {
456
+ opal_list_remove_item (selectable , & item -> super );
457
+ opal_list_prepend (selectable , & item -> super );
458
+ }
459
+ }
460
+
461
+ opal_argv_free (coll_argv );
462
+ if ( NULL != coll_exclude ) {
463
+ free (coll_exclude );
464
+ }
465
+ if ( NULL != coll_include ) {
466
+ free (coll_include );
467
+ }
468
+
379
469
/* All done */
380
470
return selectable ;
381
471
}
@@ -409,7 +499,6 @@ static int check_one_component(ompi_communicator_t * comm,
409
499
return priority ;
410
500
}
411
501
412
-
413
502
/**************************************************************************
414
503
* Query functions
415
504
**************************************************************************/
0 commit comments