Skip to content

Commit 8e656d9

Browse files
authored
Merge pull request #11736 from bosilca/topic/fix_multi_spawn
Retrieve the modex of all jobs during accept/connect
2 parents 8514e71 + ba0bce4 commit 8e656d9

File tree

1 file changed

+69
-55
lines changed

1 file changed

+69
-55
lines changed

ompi/dpm/dpm.c

Lines changed: 69 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
259259
* fail. */
260260
if (0 >= rportlen) {
261261
rc = rportlen;
262+
/* no need to free here, the root has already done it and everyone else has not yet allocated the rport array */
262263
goto exit;
263264
}
264265

@@ -406,72 +407,85 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
406407
OPAL_LIST_DESTRUCT(&rlist);
407408
goto exit;
408409
}
409-
if (0 < opal_list_get_size(&ilist)) {
410-
uint32_t *peer_ranks = NULL;
410+
if (!opal_list_is_empty(&ilist)) {
411411
int prn, nprn = 0;
412412
char *val;
413-
uint16_t u16;
414413
opal_process_name_t wildcard_rank;
414+
i = 0; /* start from the begining */
415+
415416
/* convert the list of new procs to a proc_t array */
416417
new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist),
417418
sizeof(ompi_proc_t *));
418-
/* get the list of local peers for the new procs */
419-
cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist);
420-
proc = cd->p;
421-
wildcard_rank.jobid = proc->super.proc_name.jobid;
422-
wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid;
423-
/* retrieve the local peers */
424-
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS,
425-
&wildcard_rank, &val, PMIX_STRING);
426-
if (OPAL_SUCCESS == rc && NULL != val) {
427-
char **peers = opal_argv_split(val, ',');
428-
free(val);
429-
nprn = opal_argv_count(peers);
430-
peer_ranks = (uint32_t*)calloc(nprn, sizeof(uint32_t));
431-
for (prn = 0; NULL != peers[prn]; prn++) {
432-
peer_ranks[prn] = strtoul(peers[prn], NULL, 10);
433-
}
434-
opal_argv_free(peers);
435-
}
436-
437-
i = 0;
438-
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
419+
/* Extract the modex info for the first proc on the ilist, and then
420+
* remove all processors in the same jobid from the list by getting
421+
* their connection information and moving them into the proc array.
422+
*/
423+
do {
424+
uint32_t *local_ranks_in_jobid = NULL;
425+
ompi_dpm_proct_caddy_t* next = NULL;
426+
cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist);
439427
proc = cd->p;
440-
new_proc_list[i] = proc ;
441-
/* ompi_proc_complete_init_single() initializes and optionally retrieves
442-
* OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without
443-
* them, we are just fine */
444-
ompi_proc_complete_init_single(proc);
445-
/* if this proc is local, then get its locality */
446-
if (NULL != peer_ranks) {
447-
for (prn=0; prn < nprn; prn++) {
448-
if (peer_ranks[prn] == proc->super.proc_name.vpid) {
449-
/* get their locality string */
450-
val = NULL;
451-
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
452-
&proc->super.proc_name, &val, PMIX_STRING);
453-
if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) {
454-
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val);
455-
free(val);
456-
} else {
457-
/* all we can say is that it shares our node */
458-
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
428+
wildcard_rank.jobid = proc->super.proc_name.jobid;
429+
wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid;
430+
/* retrieve the local peers for the specified jobid */
431+
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS,
432+
&wildcard_rank, &val, PMIX_STRING);
433+
if (OPAL_SUCCESS == rc && NULL != val) {
434+
char **peers = opal_argv_split(val, ',');
435+
free(val);
436+
nprn = opal_argv_count(peers);
437+
local_ranks_in_jobid = (uint32_t*)calloc(nprn, sizeof(uint32_t));
438+
for (prn = 0; NULL != peers[prn]; prn++) {
439+
local_ranks_in_jobid[prn] = strtoul(peers[prn], NULL, 10);
440+
}
441+
opal_argv_free(peers);
442+
}
443+
444+
OPAL_LIST_FOREACH_SAFE(cd, next, &ilist, ompi_dpm_proct_caddy_t) {
445+
proc = cd->p;
446+
if( proc->super.proc_name.jobid != wildcard_rank.jobid )
447+
continue; /* not a proc from this jobid */
448+
449+
new_proc_list[i] = proc;
450+
opal_list_remove_item(&ilist, (opal_list_item_t*)cd); // TODO: do we need to release cd ?
451+
OBJ_RELEASE(cd);
452+
/* ompi_proc_complete_init_single() initializes and optionally retrieves
453+
* OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without
454+
* them, we are just fine */
455+
ompi_proc_complete_init_single(proc);
456+
/* if this proc is local, then get its locality */
457+
if (NULL != local_ranks_in_jobid) {
458+
uint16_t u16;
459+
for (prn=0; prn < nprn; prn++) {
460+
if (local_ranks_in_jobid[prn] == proc->super.proc_name.vpid) {
461+
/* get their locality string */
462+
val = NULL;
463+
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
464+
&proc->super.proc_name, &val, PMIX_STRING);
465+
if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) {
466+
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val);
467+
free(val);
468+
} else {
469+
/* all we can say is that it shares our node */
470+
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
471+
}
472+
proc->super.proc_flags = u16;
473+
/* save the locality for later */
474+
OPAL_PMIX_CONVERT_NAME(&pxproc, &proc->super.proc_name);
475+
pval.type = PMIX_UINT16;
476+
pval.data.uint16 = proc->super.proc_flags;
477+
PMIx_Store_internal(&pxproc, PMIX_LOCALITY, &pval);
478+
break;
459479
}
460-
proc->super.proc_flags = u16;
461-
/* save the locality for later */
462-
OPAL_PMIX_CONVERT_NAME(&pxproc, &proc->super.proc_name);
463-
pval.type = PMIX_UINT16;
464-
pval.data.uint16 = proc->super.proc_flags;
465-
PMIx_Store_internal(&pxproc, PMIX_LOCALITY, &pval);
466-
break;
467480
}
468481
}
482+
++i;
469483
}
470-
++i;
471-
}
472-
if (NULL != peer_ranks) {
473-
free(peer_ranks);
474-
}
484+
if (NULL != local_ranks_in_jobid) {
485+
free(local_ranks_in_jobid);
486+
}
487+
} while (!opal_list_is_empty(&ilist));
488+
475489
/* call add_procs on the new ones */
476490
rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist)));
477491
free(new_proc_list);

0 commit comments

Comments
 (0)