Skip to content

Commit d1be5bd

Browse files
committed
PMIx_Group_construct - handle error returns
The error return code from PMIx_Group_construct was not being handled at all. This PR fixes this issue. In cases where the process is running without a PMIx server, for instance in a singleton program which has not invoked MPI_Comm_spawn, and it uses MPI_Comm_create_from_group the following error hep message is emitted: -------------------------------------------------------------------------- Your application has invoked an MPI function that is not supported in this environment. MPI function: MPI_Comm_from_group/MPI_Intercomm_from_groups Reason: PMIx server unreachable -------------------------------------------------------------------------- If a PMIx implementation is being used that does not support PMIx_Group_construct, the following help message is emitted if the code invokes MPI_Comm_create_from_group or MPI_Intercomm_create_from_groups: -------------------------------------------------------------------------- Your application has invoked an MPI function that is not supported in this environment. MPI function: MPI_Comm_from_group/MPI_Intercomm_from_groups Reason: PMIx server does not support PMIx Group operations -------------------------------------------------------------------------- Related to #10736 Signed-off-by: Howard Pritchard <howardp@lanl.gov>
1 parent 14bc2ed commit d1be5bd

File tree

1 file changed

+54
-8
lines changed

1 file changed

+54
-8
lines changed

ompi/communicator/comm_cid.c

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -310,11 +310,13 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
310310
{
311311
pmix_info_t pinfo, *results = NULL;
312312
size_t nresults;
313-
opal_process_name_t *name_array;
313+
opal_process_name_t *name_array = NULL;
314314
char *tag = NULL;
315-
size_t proc_count, cid_base = 0UL;
315+
size_t proc_count;
316+
size_t cid_base;
316317
int rc, leader_rank;
317-
pmix_proc_t *procs;
318+
int ret = OMPI_SUCCESS;
319+
pmix_proc_t *procs = NULL;
318320

319321
rc = ompi_group_to_proc_name_array (newcomm->c_local_group, &name_array, &proc_count);
320322
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
@@ -345,20 +347,64 @@ static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_commu
345347

346348
rc = PMIx_Group_construct(tag, procs, proc_count, &pinfo, 1, &results, &nresults);
347349
PMIX_INFO_DESTRUCT(&pinfo);
350+
if(PMIX_SUCCESS != rc) {
351+
char msg_string[1024];
352+
switch (rc) {
353+
case PMIX_ERR_UNREACH:
354+
sprintf(msg_string,"PMIx server unreachable");
355+
opal_show_help("help-comm.txt",
356+
"MPI function not supported",
357+
true,
358+
"MPI_Comm_from_group/MPI_Intercomm_from_groups",
359+
msg_string);
360+
361+
ret = MPI_ERR_UNSUPPORTED_OPERATION;
362+
break;
363+
case PMIX_ERR_NOT_SUPPORTED:
364+
sprintf(msg_string,"PMIx server does not support PMIx Group operations");
365+
opal_show_help("help-comm.txt",
366+
"MPI function not supported",
367+
true,
368+
"MPI_Comm_from_group/MPI_Intercomm_from_groups",
369+
msg_string);
370+
ret = MPI_ERR_UNSUPPORTED_OPERATION;
371+
break;
372+
default:
373+
ret = opal_pmix_convert_status(rc);
374+
break;
375+
}
376+
goto fn_exit;
377+
}
348378

349379
if (NULL != results) {
350380
PMIX_VALUE_GET_NUMBER(rc, &results[0].value, cid_base, size_t);
351-
PMIX_INFO_FREE(results, nresults);
352381
}
353382

354-
PMIX_PROC_FREE(procs, proc_count);
355-
free (name_array);
356-
357383
rc = PMIx_Group_destruct (tag, NULL, 0);
384+
if(PMIX_SUCCESS != rc) {
385+
ret = opal_pmix_convert_status(rc);
386+
goto fn_exit;
387+
}
358388

359389
ompi_comm_extended_cid_block_initialize (new_block, cid_base, 0, 0);
360390

361-
return OMPI_SUCCESS;
391+
fn_exit:
392+
if (NULL != results) {
393+
PMIX_INFO_FREE(results, nresults);
394+
results = NULL;
395+
}
396+
397+
if(NULL != procs) {
398+
PMIX_PROC_FREE(procs, proc_count);
399+
procs = NULL;
400+
}
401+
402+
if(NULL != name_array) {
403+
free (name_array);
404+
name_array = NULL;
405+
}
406+
407+
return ret;
362408
}
363409

364410
static int ompi_comm_nextcid_ext_nb (ompi_communicator_t *newcomm, ompi_communicator_t *comm,

0 commit comments

Comments
 (0)