Skip to content

Commit ee3e7f5

Browse files
authored
Merge pull request #9026 from rhc54/cmr50/osub
v5.0.x: Pass oversubscribe status to MPI layer
2 parents 27dee70 + a75847e commit ee3e7f5

File tree

7 files changed

+33
-19
lines changed

7 files changed

+33
-19
lines changed

ompi/runtime/ompi_mpi_init.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,11 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
559559
OMPI_TIMING_IMPORT_OPAL("rte_init");
560560

561561
ompi_rte_initialized = true;
562+
/* if we are oversubscribed, then set yield_when_idle
563+
* accordingly */
564+
if (ompi_mpi_oversubscribed) {
565+
ompi_mpi_yield_when_idle = true;
566+
}
562567

563568
/* Register the default errhandler callback */
564569
/* we want to go first */

ompi/runtime/ompi_mpi_params.c

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
1919
* Copyright (c) 2015 Mellanox Technologies, Inc.
2020
* All rights reserved.
21-
* Copyright (c) 2016-2019 Research Organization for Information Science
22-
* and Technology (RIST). All rights reserved.
21+
* Copyright (c) 2016-2021 Research Organization for Information Science
22+
* and Technology (RIST). All rights reserved.
23+
* Copyright (c) 2021 Triad National Security, LLC. All rights
24+
* reserved.
25+
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
2326
* $COPYRIGHT$
2427
*
2528
* Additional copyrights may follow
@@ -88,7 +91,7 @@ static bool show_default_mca_params = false;
8891
static bool show_file_mca_params = false;
8992
static bool show_enviro_mca_params = false;
9093
static bool show_override_mca_params = false;
91-
static bool ompi_mpi_oversubscribe = false;
94+
bool ompi_mpi_oversubscribed = false;
9295

9396
#if OPAL_ENABLE_FT_MPI
9497
int ompi_ftmpi_output_handle = 0;
@@ -144,20 +147,7 @@ int ompi_mpi_register_params(void)
144147
ompi_mpi_param_check = false;
145148
}
146149

147-
/*
148-
* opal_progress: decide whether to yield and the event library
149-
* tick rate
150-
*/
151-
ompi_mpi_oversubscribe = false;
152-
(void) mca_base_var_register("ompi", "mpi", NULL, "oversubscribe",
153-
"Internal MCA parameter set by the runtime environment when oversubscribing nodes",
154-
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
155-
OPAL_INFO_LVL_9,
156-
MCA_BASE_VAR_SCOPE_READONLY,
157-
&ompi_mpi_oversubscribe);
158-
159150
/* yield if the node is oversubscribed and allow users to override */
160-
ompi_mpi_yield_when_idle |= ompi_mpi_oversubscribe;
161151
(void) mca_base_var_register("ompi", "mpi", NULL, "yield_when_idle",
162152
"Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)",
163153
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,

ompi/runtime/ompi_rte.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,16 @@ int ompi_rte_init(int *pargc, char ***pargv)
871871
}
872872
}
873873

874+
#ifdef PMIX_NODE_OVERSUBSCRIBED
875+
pname.jobid = opal_process_info.my_name.jobid;
876+
pname.vpid = OPAL_VPID_WILDCARD;
877+
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_NODE_OVERSUBSCRIBED, &pname,
878+
NULL, PMIX_BOOL);
879+
if (PMIX_SUCCESS == ret) {
880+
ompi_mpi_oversubscribed = true;
881+
}
882+
#endif
883+
874884
return OPAL_SUCCESS;
875885

876886
error:

ompi/runtime/params.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
1717
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
1818
* Copyright (c) 2013 Intel, Inc. All rights reserved
19+
* Copyright (c) 2021 Triad National Security, LLC. All rights
20+
* reserved.
21+
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
1922
* $COPYRIGHT$
2023
*
2124
* Additional copyrights may follow
@@ -192,6 +195,12 @@ OMPI_DECLSPEC int ompi_mpi_register_params(void);
192195
*/
193196
int ompi_show_all_mca_params(int32_t, int, char *);
194197

198+
/**
199+
* Set by checking PMIx to see if we are running in an oversubscribed
200+
* environment or not.
201+
*/
202+
OMPI_DECLSPEC extern bool ompi_mpi_oversubscribed;
203+
195204
END_C_DECLS
196205

197206
#endif /* OMPI_RUNTIME_PARAMS_H */

opal/mca/pmix/pmix-internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ typedef struct {
298298
(r) = PMIX_ERR_NOT_FOUND; \
299299
} else if (_kv->type != (t)) { \
300300
(r) = PMIX_ERR_TYPE_MISMATCH; \
301-
} else if (PMIX_SUCCESS == (r)) { \
301+
} else if (PMIX_SUCCESS == (r) && NULL != (d)) { \
302302
PMIX_VALUE_UNLOAD((r), _kv, (void **) (d), &_sz); \
303303
} \
304304
if (NULL != _kv) { \

0 commit comments

Comments
 (0)