Skip to content

Commit 4f03f44

Browse files
authored
Merge pull request #7582 from dipti-kothari/pml_check
mca/pml: PML check for direct modex
2 parents de0f34e + 5418cc5 commit 4f03f44

File tree

1 file changed

+83
-48
lines changed

1 file changed

+83
-48
lines changed

ompi/mca/pml/base/pml_base_select.c

Lines changed: 83 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* reserved.
1515
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
17+
* Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights
18+
* reserved.
1719
* $COPYRIGHT$
1820
*
1921
* Additional copyrights may follow
@@ -44,8 +46,6 @@ typedef struct opened_component_t {
4446
mca_pml_base_component_t *om_component;
4547
} opened_component_t;
4648

47-
static bool modex_reqd=false;
48-
4949
/**
5050
* Function for selecting one component from all those that are
5151
* available.
@@ -59,7 +59,7 @@ static bool modex_reqd=false;
5959
int mca_pml_base_select(bool enable_progress_threads,
6060
bool enable_mpi_threads)
6161
{
62-
int i, priority = 0, best_priority = 0, num_pml = 0;
62+
int i, priority = 0, best_priority = 0, num_pml = 0, ret = 0;
6363
opal_list_item_t *item = NULL;
6464
mca_base_component_list_item_t *cli = NULL;
6565
mca_pml_base_component_t *component = NULL, *best_component = NULL;
@@ -186,13 +186,6 @@ int mca_pml_base_select(bool enable_progress_threads,
186186
"selected %s best priority %d\n",
187187
best_component->pmlm_version.mca_component_name, best_priority);
188188

189-
/* if more than one PML could be considered, then we still need the
190-
* modex since we cannot know which one will be selected on all procs
191-
*/
192-
if (1 < num_pml) {
193-
modex_reqd = true;
194-
}
195-
196189
/* Save the winner */
197190

198191
mca_pml_base_selected_component = *best_component;
@@ -287,13 +280,11 @@ int mca_pml_base_select(bool enable_progress_threads,
287280
}
288281

289282
/* register winner in the modex */
290-
if (modex_reqd && 0 == OMPI_PROC_MY_NAME->vpid) {
291-
mca_pml_base_pml_selected(best_component->pmlm_version.mca_component_name);
292-
}
283+
ret = mca_pml_base_pml_selected(best_component->pmlm_version.mca_component_name);
293284

294285
/* All done */
295286

296-
return OMPI_SUCCESS;
287+
return ret;
297288
}
298289

299290
/* need a "commonly" named PML structure so everything ends up in the
@@ -307,50 +298,55 @@ static mca_base_component_t pml_base_component = {
307298
};
308299

309300

301+
/*
302+
* If direct modex, then publish PML for all procs. If full modex then
303+
* publish PML for rank 0 only. This information is used during add_procs
304+
* to perform PML check.
305+
* During PML check, for direct modex, compare our PML with the peer's
306+
* PML for all procs in the add_procs call. This does not change the
307+
* connection complexity of modex transfers, since adding the proc is
308+
* going to get the peer information in the MTL/PML/BTL anyway.
309+
* For full modex, compare our PML with rank 0.
310+
* Direct Modex is performed when collect_all_data is false, as we do
311+
* not perform a fence operation during MPI_Init if async_modex is true.
312+
* If async_modex is false and collect_all_data is false then we do a
313+
* zero-byte barrier and we would still require direct modex during
314+
* add_procs
315+
*/
310316
int
311317
mca_pml_base_pml_selected(const char *name)
312318
{
313-
int rc;
319+
int rc = 0;
314320

315-
OPAL_MODEX_SEND(rc, PMIX_GLOBAL, &pml_base_component, name, strlen(name) + 1);
321+
if (!opal_pmix_collect_all_data || 0 == OMPI_PROC_MY_NAME->vpid) {
322+
OPAL_MODEX_SEND(rc, PMIX_GLOBAL, &pml_base_component, name,
323+
strlen(name) + 1);
324+
}
316325
return rc;
317326
}
318327

319-
int
320-
mca_pml_base_pml_check_selected(const char *my_pml,
321-
ompi_proc_t **procs,
322-
size_t nprocs)
328+
static int
329+
mca_pml_base_pml_check_selected_impl(const char *my_pml,
330+
opal_process_name_t proc_name)
323331
{
324332
size_t size;
325-
int ret;
333+
int ret = 0;
326334
char *remote_pml;
327-
opal_process_name_t rank0 = {.jobid = ompi_proc_local()->super.proc_name.jobid, .vpid = 0};
328335

329-
/* if no modex was required by the PML, then
330-
* we can assume success
331-
*/
332-
if (!modex_reqd) {
336+
/* if we are proc_name=OMPI_PROC_MY_NAME, then we can also assume success */
337+
if (0 == opal_compare_proc(ompi_proc_local()->super.proc_name, proc_name)) {
333338
opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
334-
"check:select: modex not reqd");
339+
"check:select: PML check not necessary on self");
335340
return OMPI_SUCCESS;
336341
}
337-
338-
/* if we are rank=0, then we can also assume success */
339-
if (0 == OMPI_PROC_MY_NAME->vpid) {
340-
opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
341-
"check:select: rank=0");
342-
return OMPI_SUCCESS;
343-
}
344-
345-
/* get the name of the PML module selected by rank=0 */
346-
OPAL_MODEX_RECV_STRING_OPTIONAL(ret, mca_base_component_to_string(&pml_base_component),
347-
&rank0, (void**) &remote_pml, &size);
348-
349-
/* if this key wasn't found, then just assume all is well... */
342+
OPAL_MODEX_RECV_STRING(ret,
343+
mca_base_component_to_string(&pml_base_component),
344+
&proc_name, (void**) &remote_pml, &size);
350345
if (PMIX_ERR_NOT_FOUND == ret) {
351346
opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
352-
"check:select: PML modex for vpid 0 data not found");
353-
return OMPI_SUCCESS;
347+
"check:select: PML modex for process %s not found",
348+
OMPI_NAME_PRINT(&proc_name));
349+
return OMPI_ERR_NOT_FOUND;
354350
}
355351

356352
/* the remote pml returned should never be NULL if an error
@@ -359,22 +355,26 @@ mca_pml_base_pml_check_selected(const char *my_pml,
359355
*/
360356
if (NULL == remote_pml) {
361357
opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
362-
"check:select: got a NULL pml from rank=0");
358+
"check:select: got a NULL pml from process %s",
359+
OMPI_NAME_PRINT(&proc_name));
363360
return OMPI_ERR_UNREACH;
364361
}
365362

366363
opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
367-
"check:select: checking my pml %s against rank=0 pml %s",
368-
my_pml, remote_pml);
364+
"check:select: checking my pml %s against process %s"
365+
" pml %s", my_pml, OMPI_NAME_PRINT(&proc_name),
366+
remote_pml);
369367

370368
/* if that module doesn't match my own, return an error */
371369
if ((size != strlen(my_pml) + 1) ||
372370
(0 != strcmp(my_pml, remote_pml))) {
373-
char *errhost = opal_get_proc_hostname(&procs[0]->super);
371+
char *errhost;
372+
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_HOSTNAME, &proc_name,
373+
&(errhost), PMIX_STRING);
374374
opal_output(0, "%s selected pml %s, but peer %s on %s selected pml %s",
375375
OMPI_NAME_PRINT(&ompi_proc_local()->super.proc_name),
376-
my_pml, OMPI_NAME_PRINT(&procs[0]->super.proc_name),
377-
errhost,
376+
my_pml, OMPI_NAME_PRINT(&proc_name),
377+
(NULL == errhost) ? "unknown" : errhost,
378378
remote_pml);
379379
free(remote_pml);
380380
free(errhost);
@@ -385,3 +385,38 @@ mca_pml_base_pml_check_selected(const char *my_pml,
385385
free(remote_pml);
386386
return OMPI_SUCCESS;
387387
}
388+
389+
int
390+
mca_pml_base_pml_check_selected(const char *my_pml,
391+
ompi_proc_t **procs,
392+
size_t nprocs)
393+
{
394+
int ret = 0;
395+
size_t i;
396+
397+
if (!opal_pmix_collect_all_data) {
398+
/*
399+
* If direct modex, then compare our PML with the peer's PML
400+
* for all procs
401+
*/
402+
for (i = 0; i < nprocs; i++) {
403+
ret = mca_pml_base_pml_check_selected_impl(
404+
my_pml,
405+
procs[i]->super.proc_name);
406+
if (ret) {
407+
return ret;
408+
}
409+
}
410+
} else {
411+
/* else if full modex compare our PML with rank 0 */
412+
opal_process_name_t proc_name = {
413+
.jobid = ompi_proc_local()->super.proc_name.jobid,
414+
.vpid = 0
415+
};
416+
ret = mca_pml_base_pml_check_selected_impl(
417+
my_pml,
418+
proc_name);
419+
}
420+
421+
return ret;
422+
}

0 commit comments

Comments
 (0)