Skip to content

Commit 767ba1a

Browse files
committed
Allow for optimized scale launches
Many environments do not face the problem of inconsistent pml component selection. Yet the current pml implementation requires that either a modex be executed so that each process can check their pml selection against that of rank=0, or that each process engage in a very expensive all-to-all direct modex exchange of the pml selection. Provide an opportunity for users and system admins to bypass both of these methods by setting an MCA param (via one of the several OMPI-supported methods) that disables the check. This has significant launch performance impact for most large-scale systems, but we will leave the param set to default to executing the check since those who need it are probably not savvy enough to know that they do. Signed-off-by: Ralph Castain <rhc@pmix.org>
1 parent a75f933 commit 767ba1a

File tree

3 files changed

+15
-0
lines changed

3 files changed

+15
-0
lines changed

ompi/mca/pml/base/base.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ OMPI_DECLSPEC int mca_pml_base_revoke_comm(struct ompi_communicator_t *comm, boo
7272
OMPI_DECLSPEC extern mca_pml_base_component_t mca_pml_base_selected_component;
7373
OMPI_DECLSPEC extern mca_pml_base_module_t mca_pml;
7474
OMPI_DECLSPEC extern opal_pointer_array_t mca_pml_base_pml;
75+
OMPI_DECLSPEC extern bool ompi_pml_base_check_pml;
7576

7677
END_C_DECLS
7778

ompi/mca/pml/base/pml_base_frame.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ mca_pml_base_module_t mca_pml = {
9898
mca_pml_base_component_t mca_pml_base_selected_component = {{0}};
9999
opal_pointer_array_t mca_pml_base_pml = {{0}};
100100
char *ompi_pml_base_bsend_allocator_name = NULL;
101+
bool ompi_pml_base_check_pml = true;
101102

102103
#if !MCA_ompi_pml_DIRECT_CALL
103104
static char *ompi_pml_base_wrapper = NULL;
@@ -158,6 +159,15 @@ static int mca_pml_base_register(mca_base_register_flag_t flags)
158159
ompi_pml_base_warn_dep_cancel_send_level);
159160
}
160161
#endif
162+
163+
ompi_pml_base_check_pml = true;
164+
(void) mca_base_var_register("ompi", "pml", "base", "check_pml",
165+
"Whether to check the pml selections to ensure they all match",
166+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
167+
OPAL_INFO_LVL_9,
168+
MCA_BASE_VAR_SCOPE_READONLY,
169+
&ompi_pml_base_check_pml);
170+
161171
return OMPI_SUCCESS;
162172
}
163173

ompi/mca/pml/base/pml_base_select.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,10 @@ mca_pml_base_pml_check_selected(const char *my_pml,
346346
int ret = 0;
347347
size_t i;
348348

349+
if (!ompi_pml_base_check_pml) {
350+
return OMPI_SUCCESS;
351+
}
352+
349353
if (!opal_pmix_collect_all_data) {
350354
/*
351355
* If direct modex, then compare our PML with the peer's PML

0 commit comments

Comments
 (0)