@@ -813,6 +813,7 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
813
813
{
814
814
struct mca_btl_base_module_t * * possible_btls = NULL ;
815
815
int comm_size = ompi_comm_size (comm );
816
+ int comm_rank = ompi_comm_rank (comm );
816
817
int rc = OMPI_SUCCESS , max_btls = 0 ;
817
818
unsigned int selected_latency = INT_MAX ;
818
819
struct mca_btl_base_module_t * selected_btl = NULL ;
@@ -852,10 +853,11 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
852
853
return OMPI_SUCCESS ;
853
854
}
854
855
855
- for (int i = 0 ; i < comm_size ; ++ i ) {
856
- ompi_proc_t * proc = ompi_comm_peer_lookup (comm , i );
856
+ for (int rank = 0 ; rank < comm_size ; ++ rank ) {
857
+ ompi_proc_t * proc = ompi_comm_peer_lookup (comm , rank );
857
858
mca_bml_base_endpoint_t * endpoint ;
858
859
int num_btls , prev_max ;
860
+ bool found_btl = false;
859
861
860
862
endpoint = mca_bml_base_get_endpoint (proc );
861
863
if (NULL == endpoint ) {
@@ -901,23 +903,30 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
901
903
for (int j = 0 ; j < max_btls ; ++ j ) {
902
904
if (endpoint -> btl_rdma .bml_btls [i_btl ].btl == possible_btls [j ]) {
903
905
++ btl_counts [j ];
906
+ found_btl = true;
904
907
break ;
905
908
} else if (NULL == possible_btls [j ]) {
906
909
possible_btls [j ] = endpoint -> btl_rdma .bml_btls [i_btl ].btl ;
907
910
btl_counts [j ] = 1 ;
911
+ found_btl = true;
908
912
break ;
909
913
}
910
914
}
911
915
}
912
916
}
917
+
918
+ /* any non-local rank must have a usable btl */
919
+ if (!found_btl && comm_rank == rank ) {
920
+ /* no btl = no rdma/atomics */
921
+ rc = OMPI_ERR_UNREACH ;
922
+ break ;
923
+ }
913
924
}
914
925
915
926
if (OMPI_SUCCESS != rc ) {
916
927
free (possible_btls );
917
928
free (btl_counts );
918
-
919
- /* no btl = no rdma/atomics */
920
- return OMPI_ERR_NOT_AVAILABLE ;
929
+ return rc ;
921
930
}
922
931
923
932
for (int i = 0 ; i < max_btls ; ++ i ) {
0 commit comments