Skip to content

Commit 6af82f0

Browse files
authored
Merge pull request #13312 from bwbarrett/bugfix/rdma-when-mtl-fix
osc: Fix rdma component when not using ob1
2 parents 4038fd6 + 4215325 commit 6af82f0

File tree

5 files changed

+41
-6
lines changed

5 files changed

+41
-6
lines changed

ompi/instance/instance.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,10 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
536536
return ompi_instance_print_error ("mca_pml_base_select() failed", ret);
537537
}
538538

539+
if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
540+
return ompi_instance_print_error ("ompi_osc_base_find_available() failed", ret);
541+
}
542+
539543
OMPI_TIMING_IMPORT_OPAL("orte_init");
540544
OMPI_TIMING_NEXT("rte_init-commit");
541545

@@ -617,10 +621,6 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
617621
return ompi_instance_print_error ("mca_coll_base_find_available() failed", ret);
618622
}
619623

620-
if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
621-
return ompi_instance_print_error ("ompi_osc_base_find_available() failed", ret);
622-
}
623-
624624
/* io and topo components are not selected here -- see comment
625625
above about the io and topo frameworks being loaded lazily */
626626

@@ -654,7 +654,8 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
654654
return ompi_instance_print_error ("ompi_attr_create_predefined_keyvals() failed", ret);
655655
}
656656

657-
if (mca_pml_base_requires_world ()) {
657+
if (mca_pml_base_requires_world() ||
658+
mca_osc_base_requires_world()) {
658659
/* need to set up comm world for this instance -- XXX -- FIXME -- probably won't always
659660
* be the case. */
660661
if (OMPI_SUCCESS != (ret = ompi_comm_init_mpi3 ())) {
@@ -699,7 +700,8 @@ static int ompi_mpi_instance_init_common (int argc, char **argv)
699700
/* some btls/mtls require we call add_procs with all procs in the job.
700701
* since the btls/mtls have no visibility here it is up to the pml to
701702
* convey this requirement */
702-
if (mca_pml_base_requires_world ()) {
703+
if (mca_pml_base_requires_world() ||
704+
mca_osc_base_requires_world()) {
703705
if (NULL == (procs = ompi_proc_world (&nprocs))) {
704706
return ompi_instance_print_error ("ompi_proc_get_allocated () failed", ret);
705707
}

ompi/mca/osc/base/osc_base_init.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include "ompi/communicator/communicator.h"
3131
#include "ompi/win/win.h"
3232

33+
bool ompi_osc_base_requires_world = false;
34+
3335
int
3436
ompi_osc_base_select(ompi_win_t *win,
3537
void **base,

ompi/mca/osc/osc.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ struct ompi_datatype_t;
5353
struct ompi_op_t;
5454
struct ompi_request_t;
5555

56+
57+
extern bool ompi_osc_base_requires_world;
58+
5659
/* ******************************************************************** */
5760

5861

@@ -419,6 +422,11 @@ typedef ompi_osc_base_module_4_0_0_t ompi_osc_base_module_t;
419422

420423
/* ******************************************************************** */
421424

425+
static inline bool mca_osc_base_requires_world (void)
426+
{
427+
return ompi_osc_base_requires_world;
428+
}
429+
422430

423431
END_C_DECLS
424432

ompi/mca/osc/portals4/osc_portals4_component.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,8 @@ component_init(bool enable_progress_threads, bool enable_mpi_threads)
349349
return ret;
350350
}
351351

352+
ompi_osc_base_requires_world = true;
353+
352354
return OMPI_SUCCESS;
353355
}
354356

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,27 @@ static int ompi_osc_rdma_component_init (bool enable_progress_threads,
345345
__FILE__, __LINE__, ret);
346346
}
347347

348+
ret = mca_bml_base_init(enable_progress_threads, enable_mpi_threads);
349+
if (OPAL_SUCCESS != ret) {
350+
opal_output_verbose(1, ompi_osc_base_framework.framework_output,
351+
"%s:%d: bml_base_init() failed: %d",
352+
__FILE__, __LINE__, ret);
353+
return ret;
354+
}
355+
356+
/* check if any btls do not support dynamic add_procs */
357+
mca_btl_base_selected_module_t* selected_btl;
358+
OPAL_LIST_FOREACH(selected_btl, &mca_btl_base_modules_initialized,
359+
mca_btl_base_selected_module_t) {
360+
mca_btl_base_module_t *btl = selected_btl->btl_module;
361+
362+
if (btl->btl_flags & MCA_BTL_FLAGS_SINGLE_ADD_PROCS) {
363+
ompi_osc_base_requires_world = true;
364+
break;
365+
}
366+
367+
}
368+
348369
return ret;
349370
}
350371

0 commit comments

Comments
 (0)