|
21 | 21 | * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
|
22 | 22 | * Copyright (c) 2019 Research Organization for Information Science
|
23 | 23 | * and Technology (RIST). All rights reserved.
|
24 |
| - * Copyright (c) 2020 Google, LLC. All rights reserved. |
| 24 | + * Copyright (c) 2020-2021 Google, LLC. All rights reserved. |
25 | 25 | * $COPYRIGHT$
|
26 | 26 | *
|
27 | 27 | * Additional copyrights may follow
|
@@ -815,6 +815,34 @@ static int ompi_osc_rdma_query_mtls (void)
|
815 | 815 | return -1;
|
816 | 816 | }
|
817 | 817 |
|
| 818 | +/** |
| 819 | + * @brief ensure that all local procs are added to the bml |
| 820 | + * |
| 821 | + * The sm btl requires that all local procs be added to work correctly. If pml/ob1 |
| 822 | + * was not selected then we can't rely on this property. Since osc/rdma may use |
| 823 | + * btl/sm we need to ensure that btl/sm is set up correctly. This function will |
| 824 | + * only (potentially) call add_procs on local procs. |
| 825 | + */ |
| 826 | +static void ompi_osc_rdma_ensure_local_add_procs (void) |
| 827 | +{ |
| 828 | + size_t nprocs; |
| 829 | + ompi_proc_t** procs = ompi_proc_get_allocated (&nprocs); |
| 830 | + if (NULL == procs) { |
| 831 | + /* weird, this should have caused MPI_Init to fail */ |
| 832 | + return; |
| 833 | + } |
| 834 | + |
| 835 | + for (size_t proc_index = 0 ; proc_index < nprocs ; ++proc_index) { |
| 836 | + ompi_proc_t *proc = procs[proc_index]; |
| 837 | + if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) { |
| 838 | + /* this will cause add_proc to get called if it has not already been called */ |
| 839 | + (void) mca_bml_base_get_endpoint (proc); |
| 840 | + } |
| 841 | + } |
| 842 | + |
| 843 | + free(procs); |
| 844 | +} |
| 845 | + |
818 | 846 | static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl)
|
819 | 847 | {
|
820 | 848 | struct mca_btl_base_module_t **possible_btls = NULL;
|
@@ -859,6 +887,9 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
|
859 | 887 | return OMPI_SUCCESS;
|
860 | 888 | }
|
861 | 889 |
|
| 890 | + /* if osc/rdma gets selected we need to ensure that all local procs have been added */ |
| 891 | + ompi_osc_rdma_ensure_local_add_procs (); |
| 892 | + |
862 | 893 | for (int rank = 0 ; rank < comm_size ; ++rank) {
|
863 | 894 | ompi_proc_t *proc = ompi_comm_peer_lookup (comm, rank);
|
864 | 895 | mca_bml_base_endpoint_t *endpoint;
|
|
0 commit comments