Skip to content

Commit 8ca4d7c

Browse files
committed
Fix singleton operations
You cannot daemonize the "prte" executable when spawning it to support a singleton as that will cause things to hang. Also fix IO forwarding thru the singleton for the spawned child procs by correcting a mistake that caused the IOF request attributes to be overlooked when constructing the job info for the PMIx_Spawn call. Includes an update to the PMIx and PRRTE submodule pointers to pickup a couple of relevant corrections there. See: openpmix/prrte#1621 openpmix/openpmix#2881 Signed-off-by: Ralph Castain <rhc@pmix.org>
1 parent 5bbc591 commit 8ca4d7c

File tree

3 files changed

+15
-16
lines changed

3 files changed

+15
-16
lines changed

3rd-party/openpmix

Submodule openpmix updated 164 files

3rd-party/prrte

Submodule prrte updated 107 files

ompi/dpm/dpm.c

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
* Copyright (c) 2014-2020 Research Organization for Information Science
2121
* and Technology (RIST). All rights reserved.
2222
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
23-
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
23+
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
2424
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights
2525
* reserved.
2626
* Copyright (c) 2022 IBM Corporation. All rights reserved.
@@ -1595,18 +1595,6 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
15951595
opal_list_append(&job_info, &info->super);
15961596
}
15971597

1598-
/* spawn procs */
1599-
ninfo = opal_list_get_size(&job_info);
1600-
if (0 < ninfo) {
1601-
PMIX_INFO_CREATE(pinfo, ninfo);
1602-
n = 0;
1603-
OPAL_LIST_FOREACH(info, &job_info, opal_info_item_t) {
1604-
PMIX_INFO_XFER(&pinfo[n], &info->info);
1605-
++n;
1606-
}
1607-
}
1608-
OPAL_LIST_DESTRUCT(&job_info);
1609-
16101598
if (opal_process_info.is_singleton) {
16111599
/* The GDS 'hash' component is known to work for singleton, so
16121600
* recommend it. The user may set this envar to override the setting.
@@ -1645,6 +1633,18 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
16451633
opal_argv_free(dash_host);
16461634
}
16471635

1636+
/* spawn procs */
1637+
ninfo = opal_list_get_size(&job_info);
1638+
if (0 < ninfo) {
1639+
PMIX_INFO_CREATE(pinfo, ninfo);
1640+
n = 0;
1641+
OPAL_LIST_FOREACH(info, &job_info, opal_info_item_t) {
1642+
PMIX_INFO_XFER(&pinfo[n], &info->info);
1643+
++n;
1644+
}
1645+
}
1646+
OPAL_LIST_DESTRUCT(&job_info);
1647+
16481648
pret = PMIx_Spawn(pinfo, ninfo, apps, count, nspace);
16491649
rc = opal_pmix_convert_status(pret);
16501650
if (NULL != pinfo) {
@@ -2046,7 +2046,6 @@ static int start_dvm(char **hostfiles, char **dash_host)
20462046
opal_asprintf(&tmp, "%d", death_pipe[0]);
20472047
opal_argv_append_nosize(&args, tmp);
20482048
free(tmp);
2049-
opal_argv_append_nosize(&args, "--daemonize");
20502049

20512050
/* Fork off the child */
20522051
pid = fork();

0 commit comments

Comments
 (0)