Skip to content

Commit 650754e

Browse files
hppritchaawlauria
authored andcommitted
PMIx_Connect usage: add optional timeout
Add an MCA parameter that can be used to set a timeot on the PMIx_Connect operation used to support MPI_Comm_accept/connect and relatives. Related to #8958 Signed-off-by: Howard Pritchard <hppritcha@gmail.com> (cherry picked from commit 038291a)
1 parent 62f2e6f commit 650754e

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

ompi/dpm/dpm.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
* and Technology (RIST). All rights reserved.
2222
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
2323
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
24+
* Copyright (c) 2021 Triad National Security, LLC. All rights
25+
* reserved.
2426
* $COPYRIGHT$
2527
*
2628
* Additional copyrights may follow
@@ -104,7 +106,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
104106
bool dense, isnew;
105107
opal_process_name_t pname;
106108
opal_list_t ilist, mlist, rlist;
107-
pmix_info_t info;
109+
pmix_info_t info, tinfo;
108110
pmix_value_t pval;
109111
pmix_pdata_t pdat;
110112
pmix_proc_t *procs, pxproc;
@@ -374,7 +376,10 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
374376
/* tell the host RTE to connect us - this will download
375377
* all known data for the nspace's of participating procs
376378
* so that add_procs will not result in a slew of lookups */
377-
pret = PMIx_Connect(procs, nprocs, NULL, 0);
379+
PMIX_INFO_CONSTRUCT(&tinfo);
380+
PMIX_INFO_LOAD(&tinfo, PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);
381+
pret = PMIx_Connect(procs, nprocs, &tinfo, 1);
382+
PMIX_INFO_DESTRUCT(&tinfo);
378383
PMIX_PROC_FREE(procs, nprocs);
379384
rc = opal_pmix_convert_status(pret);
380385
if (OPAL_SUCCESS != rc) {

ompi/runtime/ompi_mpi_params.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ bool ompi_mpi_compat_mpi3 = false;
8686

8787
char *ompi_mpi_spc_attach_string = NULL;
8888
bool ompi_mpi_spc_dump_enabled = false;
89+
uint32_t ompi_pmix_connect_timeout;
8990

9091
static bool show_default_mca_params = false;
9192
static bool show_file_mca_params = false;
@@ -382,6 +383,13 @@ int ompi_mpi_register_params(void)
382383
&ompi_mpi_spc_dump_enabled);
383384
#endif // SPC_ENABLE
384385

386+
ompi_pmix_connect_timeout = 0; /* infinite timeout - see PMIx standard */
387+
(void) mca_base_var_register ("ompi", "mpi", NULL, "pmix_connect_timeout",
388+
"Timeout(secs) for calls to PMIx_Connect. Default is no timeout.",
389+
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
390+
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
391+
&ompi_pmix_connect_timeout);
392+
385393
return OMPI_SUCCESS;
386394
}
387395

ompi/runtime/params.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ OMPI_DECLSPEC extern char * ompi_mpi_spc_attach_string;
175175
*/
176176
OMPI_DECLSPEC extern bool ompi_mpi_spc_dump_enabled;
177177

178+
/**
179+
* Timeout for calls to PMIx_Connect(defaut 0, no timeout)
180+
*/
181+
OMPI_DECLSPEC extern uint32_t ompi_pmix_connect_timeout;
178182

179183
/**
180184
* Register MCA parameters used by the MPI layer.

0 commit comments

Comments
 (0)