14
14
* reserved.
15
15
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
16
16
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
17
+ * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights
18
+ * reserved.
17
19
* $COPYRIGHT$
18
20
*
19
21
* Additional copyrights may follow
@@ -44,8 +46,6 @@ typedef struct opened_component_t {
44
46
mca_pml_base_component_t * om_component ;
45
47
} opened_component_t ;
46
48
47
- static bool modex_reqd = false;
48
-
49
49
/**
50
50
* Function for selecting one component from all those that are
51
51
* available.
@@ -59,7 +59,7 @@ static bool modex_reqd=false;
59
59
int mca_pml_base_select (bool enable_progress_threads ,
60
60
bool enable_mpi_threads )
61
61
{
62
- int i , priority = 0 , best_priority = 0 , num_pml = 0 ;
62
+ int i , priority = 0 , best_priority = 0 , num_pml = 0 , ret = 0 ;
63
63
opal_list_item_t * item = NULL ;
64
64
mca_base_component_list_item_t * cli = NULL ;
65
65
mca_pml_base_component_t * component = NULL , * best_component = NULL ;
@@ -186,13 +186,6 @@ int mca_pml_base_select(bool enable_progress_threads,
186
186
"selected %s best priority %d\n" ,
187
187
best_component -> pmlm_version .mca_component_name , best_priority );
188
188
189
- /* if more than one PML could be considered, then we still need the
190
- * modex since we cannot know which one will be selected on all procs
191
- */
192
- if (1 < num_pml ) {
193
- modex_reqd = true;
194
- }
195
-
196
189
/* Save the winner */
197
190
198
191
mca_pml_base_selected_component = * best_component ;
@@ -287,13 +280,11 @@ int mca_pml_base_select(bool enable_progress_threads,
287
280
}
288
281
289
282
/* register winner in the modex */
290
- if (modex_reqd && 0 == OMPI_PROC_MY_NAME -> vpid ) {
291
- mca_pml_base_pml_selected (best_component -> pmlm_version .mca_component_name );
292
- }
283
+ ret = mca_pml_base_pml_selected (best_component -> pmlm_version .mca_component_name );
293
284
294
285
/* All done */
295
286
296
- return OMPI_SUCCESS ;
287
+ return ret ;
297
288
}
298
289
299
290
/* need a "commonly" named PML structure so everything ends up in the
@@ -307,50 +298,55 @@ static mca_base_component_t pml_base_component = {
307
298
};
308
299
309
300
301
+ /*
302
+ * If direct modex, then publish PML for all procs. If full modex then
303
+ * publish PML for rank 0 only. This information is used during add_procs
304
+ * to perform PML check.
305
+ * During PML check, for direct modex, compare our PML with the peer's
306
+ * PML for all procs in the add_procs call. This does not change the
307
+ * connection complexity of modex transfers, since adding the proc is
308
+ * going to get the peer information in the MTL/PML/BTL anyway.
309
+ * For full modex, compare our PML with rank 0.
310
+ * Direct Modex is performed when collect_all_data is false, as we do
311
+ * not perform a fence operation during MPI_Init if async_modex is true.
312
+ * If async_modex is false and collect_all_data is false then we do a
313
+ * zero-byte barrier and we would still require direct modex during
314
+ * add_procs
315
+ */
310
316
int
311
317
mca_pml_base_pml_selected (const char * name )
312
318
{
313
- int rc ;
319
+ int rc = 0 ;
314
320
315
- OPAL_MODEX_SEND (rc , PMIX_GLOBAL , & pml_base_component , name , strlen (name ) + 1 );
321
+ if (!opal_pmix_collect_all_data || 0 == OMPI_PROC_MY_NAME -> vpid ) {
322
+ OPAL_MODEX_SEND (rc , PMIX_GLOBAL , & pml_base_component , name ,
323
+ strlen (name ) + 1 );
324
+ }
316
325
return rc ;
317
326
}
318
327
319
- int
320
- mca_pml_base_pml_check_selected (const char * my_pml ,
321
- ompi_proc_t * * procs ,
322
- size_t nprocs )
328
+ static int
329
+ mca_pml_base_pml_check_selected_impl (const char * my_pml ,
330
+ opal_process_name_t proc_name )
323
331
{
324
332
size_t size ;
325
- int ret ;
333
+ int ret = 0 ;
326
334
char * remote_pml ;
327
- opal_process_name_t rank0 = {.jobid = ompi_proc_local ()-> super .proc_name .jobid , .vpid = 0 };
328
335
329
- /* if no modex was required by the PML, then
330
- * we can assume success
331
- */
332
- if (!modex_reqd ) {
336
+ /* if we are proc_name=OMPI_PROC_MY_NAME, then we can also assume success */
337
+ if (0 == opal_compare_proc (ompi_proc_local ()-> super .proc_name , proc_name )) {
333
338
opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
334
- "check:select: modex not reqd " );
339
+ "check:select: PML check not necessary on self " );
335
340
return OMPI_SUCCESS ;
336
341
}
337
-
338
- /* if we are rank=0, then we can also assume success */
339
- if (0 == OMPI_PROC_MY_NAME -> vpid ) {
340
- opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
341
- "check:select: rank=0" );
342
- return OMPI_SUCCESS ;
343
- }
344
-
345
- /* get the name of the PML module selected by rank=0 */
346
- OPAL_MODEX_RECV_STRING_OPTIONAL (ret , mca_base_component_to_string (& pml_base_component ),
347
- & rank0 , (void * * ) & remote_pml , & size );
348
-
349
- /* if this key wasn't found, then just assume all is well... */
342
+ OPAL_MODEX_RECV_STRING (ret ,
343
+ mca_base_component_to_string (& pml_base_component ),
344
+ & proc_name , (void * * ) & remote_pml , & size );
350
345
if (PMIX_ERR_NOT_FOUND == ret ) {
351
346
opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
352
- "check:select: PML modex for vpid 0 data not found" );
353
- return OMPI_SUCCESS ;
347
+ "check:select: PML modex for process %s not found" ,
348
+ OMPI_NAME_PRINT (& proc_name ));
349
+ return OMPI_ERR_NOT_FOUND ;
354
350
}
355
351
356
352
/* the remote pml returned should never be NULL if an error
@@ -359,22 +355,26 @@ mca_pml_base_pml_check_selected(const char *my_pml,
359
355
*/
360
356
if (NULL == remote_pml ) {
361
357
opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
362
- "check:select: got a NULL pml from rank=0" );
358
+ "check:select: got a NULL pml from process %s" ,
359
+ OMPI_NAME_PRINT (& proc_name ));
363
360
return OMPI_ERR_UNREACH ;
364
361
}
365
362
366
363
opal_output_verbose ( 10 , ompi_pml_base_framework .framework_output ,
367
- "check:select: checking my pml %s against rank=0 pml %s" ,
368
- my_pml , remote_pml );
364
+ "check:select: checking my pml %s against process %s"
365
+ " pml %s" , my_pml , OMPI_NAME_PRINT (& proc_name ),
366
+ remote_pml );
369
367
370
368
/* if that module doesn't match my own, return an error */
371
369
if ((size != strlen (my_pml ) + 1 ) ||
372
370
(0 != strcmp (my_pml , remote_pml ))) {
373
- char * errhost = opal_get_proc_hostname (& procs [0 ]-> super );
371
+ char * errhost ;
372
+ OPAL_MODEX_RECV_VALUE_OPTIONAL (ret , PMIX_HOSTNAME , & proc_name ,
373
+ & (errhost ), PMIX_STRING );
374
374
opal_output (0 , "%s selected pml %s, but peer %s on %s selected pml %s" ,
375
375
OMPI_NAME_PRINT (& ompi_proc_local ()-> super .proc_name ),
376
- my_pml , OMPI_NAME_PRINT (& procs [ 0 ] -> super . proc_name ),
377
- errhost ,
376
+ my_pml , OMPI_NAME_PRINT (& proc_name ),
377
+ ( NULL == errhost ) ? "unknown" : errhost ,
378
378
remote_pml );
379
379
free (remote_pml );
380
380
free (errhost );
@@ -385,3 +385,38 @@ mca_pml_base_pml_check_selected(const char *my_pml,
385
385
free (remote_pml );
386
386
return OMPI_SUCCESS ;
387
387
}
388
+
389
+ int
390
+ mca_pml_base_pml_check_selected (const char * my_pml ,
391
+ ompi_proc_t * * procs ,
392
+ size_t nprocs )
393
+ {
394
+ int ret = 0 ;
395
+ size_t i ;
396
+
397
+ if (!opal_pmix_collect_all_data ) {
398
+ /*
399
+ * If direct modex, then compare our PML with the peer's PML
400
+ * for all procs
401
+ */
402
+ for (i = 0 ; i < nprocs ; i ++ ) {
403
+ ret = mca_pml_base_pml_check_selected_impl (
404
+ my_pml ,
405
+ procs [i ]-> super .proc_name );
406
+ if (ret ) {
407
+ return ret ;
408
+ }
409
+ }
410
+ } else {
411
+ /* else if full modex compare our PML with rank 0 */
412
+ opal_process_name_t proc_name = {
413
+ .jobid = ompi_proc_local ()-> super .proc_name .jobid ,
414
+ .vpid = 0
415
+ };
416
+ ret = mca_pml_base_pml_check_selected_impl (
417
+ my_pml ,
418
+ proc_name );
419
+ }
420
+
421
+ return ret ;
422
+ }
0 commit comments