Skip to content

Commit cbbe67e

Browse files
authored
Merge pull request #7487 from bosilca/topic/pml_from_vpid0
Make sure the PML selection is consistent across the world.
2 parents c4d3685 + 21d7433 commit cbbe67e

File tree

2 files changed

+46
-5
lines changed

2 files changed

+46
-5
lines changed

ompi/mca/pml/base/pml_base_select.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ mca_pml_base_pml_check_selected(const char *my_pml,
324324
size_t size;
325325
int ret;
326326
char *remote_pml;
327+
opal_process_name_t rank0 = {.jobid = ompi_proc_local()->super.proc_name.jobid, .vpid = 0};
327328

328329
/* if no modex was required by the PML, then
329330
* we can assume success
@@ -342,13 +343,13 @@ mca_pml_base_pml_check_selected(const char *my_pml,
342343
}
343344

344345
/* get the name of the PML module selected by rank=0 */
345-
OPAL_MODEX_RECV(ret, &pml_base_component,
346-
&procs[0]->super.proc_name, (void**) &remote_pml, &size);
346+
OPAL_MODEX_RECV_STRING_OPTIONAL(ret, mca_base_component_to_string(&pml_base_component),
347+
&rank0, (void**) &remote_pml, &size);
347348

348349
/* if this key wasn't found, then just assume all is well... */
349-
if (OMPI_SUCCESS != ret) {
350+
if (PMIX_ERR_NOT_FOUND != ret) {
350351
opal_output_verbose( 10, ompi_pml_base_framework.framework_output,
351-
"check:select: modex data not found");
352+
"check:select: PML modex for vpid 0 data not found");
352353
return OMPI_SUCCESS;
353354
}
354355

opal/mca/pmix/pmix-internal.h

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ typedef struct {
309309
pmix_info_t _info; \
310310
size_t _sz; \
311311
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
312-
"%s[%s:%d] MODEX RECV VALUE OPTIONAL FOR PROC %s KEY %s", \
312+
"%s[%s:%d] MODEX RECV VALUE IMMEDIATE FOR PROC %s KEY %s", \
313313
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
314314
__FILE__, __LINE__, \
315315
OPAL_NAME_PRINT(*(p)), (s))); \
@@ -365,6 +365,46 @@ typedef struct {
365365
} \
366366
} while(0);
367367

368+
/**
369+
* Provide a simplified macro for retrieving modex data
370+
* from another process:
371+
*
372+
* r - the integer return status from the modex op (int)
373+
* s - string key (char*)
374+
* p - pointer to the opal_process_name_t of the proc that posted
375+
* the data (opal_process_name_t*)
376+
* d - pointer to a location wherein the data object
377+
* it to be returned (char**)
378+
* sz - pointer to a location wherein the number of bytes
379+
* in the data object can be returned (size_t)
380+
*/
381+
#define OPAL_MODEX_RECV_STRING_OPTIONAL(r, s, p, d, sz) \
382+
do { \
383+
pmix_proc_t _proc; \
384+
pmix_value_t *_kv = NULL; \
385+
pmix_info_t _info; \
386+
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
387+
"%s[%s:%d] MODEX RECV STRING OPTIONAL FOR PROC %s KEY %s", \
388+
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
389+
__FILE__, __LINE__, \
390+
OPAL_NAME_PRINT(*(p)), (s))); \
391+
*(d) = NULL; \
392+
*(sz) = 0; \
393+
OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \
394+
PMIX_INFO_LOAD(&_info, PMIX_OPTIONAL, NULL, PMIX_BOOL); \
395+
(r) = PMIx_Get(&(_proc), (s), &(_info), 1, &(_kv)); \
396+
if (NULL == _kv) { \
397+
(r) = PMIX_ERR_NOT_FOUND; \
398+
} else if (PMIX_SUCCESS == (r)) { \
399+
*(d) = (uint8_t*)_kv->data.bo.bytes; \
400+
*(sz) = _kv->data.bo.size; \
401+
_kv->data.bo.bytes = NULL; /* protect the data */ \
402+
} \
403+
if (NULL != _kv) { \
404+
PMIX_VALUE_RELEASE(_kv); \
405+
} \
406+
} while(0);
407+
368408
/**
369409
* Provide a simplified macro for retrieving modex data
370410
* from another process:

0 commit comments

Comments
 (0)