Skip to content

Commit eebc451

Browse files
committed
osc/rdma: fail query_btls if no endpoint for non-local peer is found
Signed-off-by: Joseph Schuchart <schuchart@hlrs.de>
1 parent 7702dfc commit eebc451

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,7 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
813813
{
814814
struct mca_btl_base_module_t **possible_btls = NULL;
815815
int comm_size = ompi_comm_size (comm);
816+
int comm_rank = ompi_comm_rank (comm);
816817
int rc = OMPI_SUCCESS, max_btls = 0;
817818
unsigned int selected_latency = INT_MAX;
818819
struct mca_btl_base_module_t *selected_btl = NULL;
@@ -852,10 +853,11 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
852853
return OMPI_SUCCESS;
853854
}
854855

855-
for (int i = 0 ; i < comm_size ; ++i) {
856-
ompi_proc_t *proc = ompi_comm_peer_lookup (comm, i);
856+
for (int rank = 0 ; rank < comm_size ; ++rank) {
857+
ompi_proc_t *proc = ompi_comm_peer_lookup (comm, rank);
857858
mca_bml_base_endpoint_t *endpoint;
858859
int num_btls, prev_max;
860+
bool found_btl = false;
859861

860862
endpoint = mca_bml_base_get_endpoint (proc);
861863
if (NULL == endpoint) {
@@ -901,23 +903,30 @@ static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_b
901903
for (int j = 0 ; j < max_btls ; ++j) {
902904
if (endpoint->btl_rdma.bml_btls[i_btl].btl == possible_btls[j]) {
903905
++btl_counts[j];
906+
found_btl = true;
904907
break;
905908
} else if (NULL == possible_btls[j]) {
906909
possible_btls[j] = endpoint->btl_rdma.bml_btls[i_btl].btl;
907910
btl_counts[j] = 1;
911+
found_btl = true;
908912
break;
909913
}
910914
}
911915
}
912916
}
917+
918+
/* any non-local rank must have a usable btl */
919+
if (!found_btl && comm_rank == rank) {
920+
/* no btl = no rdma/atomics */
921+
rc = OMPI_ERR_UNREACH;
922+
break;
923+
}
913924
}
914925

915926
if (OMPI_SUCCESS != rc) {
916927
free (possible_btls);
917928
free (btl_counts);
918-
919-
/* no btl = no rdma/atomics */
920-
return OMPI_ERR_NOT_AVAILABLE;
929+
return rc;
921930
}
922931

923932
for (int i = 0 ; i < max_btls ; ++i) {

0 commit comments

Comments
 (0)