Skip to content

Commit ba0bce4

Browse files
committed
Ask for the modex of all jobs connected.
The original code was merging the local modex with the modex of the local processes on the first jobid. This lead to incorrect, and mismatched, information among processes when joining multiple jobid processes (such as on the second spawn merged). This patch iterate over all the jobid on the list of "to connect" processes and adds their information to the local modex. Fixes #11724. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent 8514e71 commit ba0bce4

File tree

1 file changed

+69
-55
lines changed

1 file changed

+69
-55
lines changed

ompi/dpm/dpm.c

Lines changed: 69 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
259259
* fail. */
260260
if (0 >= rportlen) {
261261
rc = rportlen;
262+
/* no need to free here, the root has already done it and everyone else has not yet allocated the rport array */
262263
goto exit;
263264
}
264265

@@ -406,72 +407,85 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
406407
OPAL_LIST_DESTRUCT(&rlist);
407408
goto exit;
408409
}
409-
if (0 < opal_list_get_size(&ilist)) {
410-
uint32_t *peer_ranks = NULL;
410+
if (!opal_list_is_empty(&ilist)) {
411411
int prn, nprn = 0;
412412
char *val;
413-
uint16_t u16;
414413
opal_process_name_t wildcard_rank;
414+
i = 0; /* start from the begining */
415+
415416
/* convert the list of new procs to a proc_t array */
416417
new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist),
417418
sizeof(ompi_proc_t *));
418-
/* get the list of local peers for the new procs */
419-
cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist);
420-
proc = cd->p;
421-
wildcard_rank.jobid = proc->super.proc_name.jobid;
422-
wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid;
423-
/* retrieve the local peers */
424-
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS,
425-
&wildcard_rank, &val, PMIX_STRING);
426-
if (OPAL_SUCCESS == rc && NULL != val) {
427-
char **peers = opal_argv_split(val, ',');
428-
free(val);
429-
nprn = opal_argv_count(peers);
430-
peer_ranks = (uint32_t*)calloc(nprn, sizeof(uint32_t));
431-
for (prn = 0; NULL != peers[prn]; prn++) {
432-
peer_ranks[prn] = strtoul(peers[prn], NULL, 10);
433-
}
434-
opal_argv_free(peers);
435-
}
436-
437-
i = 0;
438-
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
419+
/* Extract the modex info for the first proc on the ilist, and then
420+
* remove all processors in the same jobid from the list by getting
421+
* their connection information and moving them into the proc array.
422+
*/
423+
do {
424+
uint32_t *local_ranks_in_jobid = NULL;
425+
ompi_dpm_proct_caddy_t* next = NULL;
426+
cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist);
439427
proc = cd->p;
440-
new_proc_list[i] = proc ;
441-
/* ompi_proc_complete_init_single() initializes and optionally retrieves
442-
* OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without
443-
* them, we are just fine */
444-
ompi_proc_complete_init_single(proc);
445-
/* if this proc is local, then get its locality */
446-
if (NULL != peer_ranks) {
447-
for (prn=0; prn < nprn; prn++) {
448-
if (peer_ranks[prn] == proc->super.proc_name.vpid) {
449-
/* get their locality string */
450-
val = NULL;
451-
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
452-
&proc->super.proc_name, &val, PMIX_STRING);
453-
if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) {
454-
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val);
455-
free(val);
456-
} else {
457-
/* all we can say is that it shares our node */
458-
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
428+
wildcard_rank.jobid = proc->super.proc_name.jobid;
429+
wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid;
430+
/* retrieve the local peers for the specified jobid */
431+
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS,
432+
&wildcard_rank, &val, PMIX_STRING);
433+
if (OPAL_SUCCESS == rc && NULL != val) {
434+
char **peers = opal_argv_split(val, ',');
435+
free(val);
436+
nprn = opal_argv_count(peers);
437+
local_ranks_in_jobid = (uint32_t*)calloc(nprn, sizeof(uint32_t));
438+
for (prn = 0; NULL != peers[prn]; prn++) {
439+
local_ranks_in_jobid[prn] = strtoul(peers[prn], NULL, 10);
440+
}
441+
opal_argv_free(peers);
442+
}
443+
444+
OPAL_LIST_FOREACH_SAFE(cd, next, &ilist, ompi_dpm_proct_caddy_t) {
445+
proc = cd->p;
446+
if( proc->super.proc_name.jobid != wildcard_rank.jobid )
447+
continue; /* not a proc from this jobid */
448+
449+
new_proc_list[i] = proc;
450+
opal_list_remove_item(&ilist, (opal_list_item_t*)cd); // TODO: do we need to release cd ?
451+
OBJ_RELEASE(cd);
452+
/* ompi_proc_complete_init_single() initializes and optionally retrieves
453+
* OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without
454+
* them, we are just fine */
455+
ompi_proc_complete_init_single(proc);
456+
/* if this proc is local, then get its locality */
457+
if (NULL != local_ranks_in_jobid) {
458+
uint16_t u16;
459+
for (prn=0; prn < nprn; prn++) {
460+
if (local_ranks_in_jobid[prn] == proc->super.proc_name.vpid) {
461+
/* get their locality string */
462+
val = NULL;
463+
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
464+
&proc->super.proc_name, &val, PMIX_STRING);
465+
if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) {
466+
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val);
467+
free(val);
468+
} else {
469+
/* all we can say is that it shares our node */
470+
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
471+
}
472+
proc->super.proc_flags = u16;
473+
/* save the locality for later */
474+
OPAL_PMIX_CONVERT_NAME(&pxproc, &proc->super.proc_name);
475+
pval.type = PMIX_UINT16;
476+
pval.data.uint16 = proc->super.proc_flags;
477+
PMIx_Store_internal(&pxproc, PMIX_LOCALITY, &pval);
478+
break;
459479
}
460-
proc->super.proc_flags = u16;
461-
/* save the locality for later */
462-
OPAL_PMIX_CONVERT_NAME(&pxproc, &proc->super.proc_name);
463-
pval.type = PMIX_UINT16;
464-
pval.data.uint16 = proc->super.proc_flags;
465-
PMIx_Store_internal(&pxproc, PMIX_LOCALITY, &pval);
466-
break;
467480
}
468481
}
482+
++i;
469483
}
470-
++i;
471-
}
472-
if (NULL != peer_ranks) {
473-
free(peer_ranks);
474-
}
484+
if (NULL != local_ranks_in_jobid) {
485+
free(local_ranks_in_jobid);
486+
}
487+
} while (!opal_list_is_empty(&ilist));
488+
475489
/* call add_procs on the new ones */
476490
rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist)));
477491
free(new_proc_list);

0 commit comments

Comments
 (0)