Skip to content

Commit e26592e

Browse files
authored
Merge pull request #8998 from rhc54/topic/osub
Pass oversubscribe status to MPI layer
2 parents 6236284 + 2b335ed commit e26592e

File tree

7 files changed

+27
-17
lines changed

7 files changed

+27
-17
lines changed

3rd-party/prrte

Submodule prrte updated 52 files

ompi/runtime/ompi_mpi_init.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,11 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
560560
OMPI_TIMING_IMPORT_OPAL("rte_init");
561561

562562
ompi_rte_initialized = true;
563+
/* if we are oversubscribed, then set yield_when_idle
564+
* accordingly */
565+
if (ompi_mpi_oversubscribed) {
566+
ompi_mpi_yield_when_idle = true;
567+
}
563568

564569
/* Register the default errhandler callback */
565570
/* we want to go first */

ompi/runtime/ompi_mpi_params.c

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* and Technology (RIST). All rights reserved.
2323
* Copyright (c) 2021 Triad National Security, LLC. All rights
2424
* reserved.
25+
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
2526
* $COPYRIGHT$
2627
*
2728
* Additional copyrights may follow
@@ -91,7 +92,7 @@ static bool show_default_mca_params = false;
9192
static bool show_file_mca_params = false;
9293
static bool show_enviro_mca_params = false;
9394
static bool show_override_mca_params = false;
94-
static bool ompi_mpi_oversubscribe = false;
95+
bool ompi_mpi_oversubscribed = false;
9596

9697
#if OPAL_ENABLE_FT_MPI
9798
int ompi_ftmpi_output_handle = 0;
@@ -147,20 +148,7 @@ int ompi_mpi_register_params(void)
147148
ompi_mpi_param_check = false;
148149
}
149150

150-
/*
151-
* opal_progress: decide whether to yield and the event library
152-
* tick rate
153-
*/
154-
ompi_mpi_oversubscribe = false;
155-
(void) mca_base_var_register("ompi", "mpi", NULL, "oversubscribe",
156-
"Internal MCA parameter set by the runtime environment when oversubscribing nodes",
157-
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
158-
OPAL_INFO_LVL_9,
159-
MCA_BASE_VAR_SCOPE_READONLY,
160-
&ompi_mpi_oversubscribe);
161-
162151
/* yield if the node is oversubscribed and allow users to override */
163-
ompi_mpi_yield_when_idle |= ompi_mpi_oversubscribe;
164152
(void) mca_base_var_register("ompi", "mpi", NULL, "yield_when_idle",
165153
"Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)",
166154
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,

ompi/runtime/ompi_rte.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,16 @@ int ompi_rte_init(int *pargc, char ***pargv)
871871
}
872872
}
873873

874+
#ifdef PMIX_NODE_OVERSUBSCRIBED
875+
pname.jobid = opal_process_info.my_name.jobid;
876+
pname.vpid = OPAL_VPID_WILDCARD;
877+
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_NODE_OVERSUBSCRIBED, &pname,
878+
NULL, PMIX_BOOL);
879+
if (PMIX_SUCCESS == ret) {
880+
ompi_mpi_oversubscribed = true;
881+
}
882+
#endif
883+
874884
return OPAL_SUCCESS;
875885

876886
error:

ompi/runtime/params.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
* Copyright (c) 2013 Intel, Inc. All rights reserved
1919
* Copyright (c) 2021 Triad National Security, LLC. All rights
2020
* reserved.
21+
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
2122
* $COPYRIGHT$
2223
*
2324
* Additional copyrights may follow
@@ -198,6 +199,12 @@ OMPI_DECLSPEC int ompi_mpi_register_params(void);
198199
*/
199200
int ompi_show_all_mca_params(int32_t, int, char *);
200201

202+
/**
203+
* Set by checking PMIx to see if we are running in an oversubscribed
204+
* environment or not.
205+
*/
206+
OMPI_DECLSPEC extern bool ompi_mpi_oversubscribed;
207+
201208
END_C_DECLS
202209

203210
#endif /* OMPI_RUNTIME_PARAMS_H */

opal/mca/pmix/pmix-internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ typedef struct {
301301
(r) = PMIX_ERR_NOT_FOUND; \
302302
} else if (_kv->type != (t)) { \
303303
(r) = PMIX_ERR_TYPE_MISMATCH; \
304-
} else if (PMIX_SUCCESS == (r)) { \
304+
} else if (PMIX_SUCCESS == (r) && NULL != (d)) { \
305305
PMIX_VALUE_UNLOAD((r), _kv, (void **) (d), &_sz); \
306306
} \
307307
if (NULL != _kv) { \

0 commit comments

Comments
 (0)