|
2 | 2 | /*
|
3 | 3 | * Copyright (c) 2018 Los Alamos National Security, LLC. All rights
|
4 | 4 | * reserved.
|
| 5 | + * Copyright (c) 2018 Triad National Security, LLC. All rights |
| 6 | + * reserved. |
5 | 7 | * $COPYRIGHT$
|
6 | 8 | *
|
7 | 9 | * Additional copyrights may follow
|
@@ -137,11 +139,26 @@ static void mca_btl_uct_connection_ep_destruct (mca_btl_uct_connection_ep_t *ep)
|
137 | 139 | OBJ_CLASS_INSTANCE(mca_btl_uct_connection_ep_t, opal_object_t, mca_btl_uct_connection_ep_construct,
|
138 | 140 | mca_btl_uct_connection_ep_destruct);
|
139 | 141 |
|
| 142 | +struct mca_btl_uct_conn_completion_t { |
| 143 | + uct_completion_t super; |
| 144 | + volatile bool complete; |
| 145 | +}; |
| 146 | +typedef struct mca_btl_uct_conn_completion_t mca_btl_uct_conn_completion_t; |
| 147 | + |
| 148 | +static void mca_btl_uct_endpoint_flush_complete (uct_completion_t *self, ucs_status_t status) |
| 149 | +{ |
| 150 | + mca_btl_uct_conn_completion_t *completion = (mca_btl_uct_conn_completion_t *) self; |
| 151 | + BTL_VERBOSE(("connection flush complete")); |
| 152 | + completion->complete = true; |
| 153 | +} |
| 154 | + |
140 | 155 | static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint,
|
141 | 156 | mca_btl_uct_device_context_t *conn_tl_context,
|
142 | 157 | mca_btl_uct_conn_req_t *request, size_t request_length)
|
143 | 158 | {
|
144 | 159 | mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep;
|
| 160 | + mca_btl_uct_conn_completion_t completion = {.super = {.count = 1, .func = mca_btl_uct_endpoint_flush_complete}, |
| 161 | + .complete = false}; |
145 | 162 | ucs_status_t ucs_status;
|
146 | 163 |
|
147 | 164 | BTL_VERBOSE(("sending connection request to peer. context id: %d, type: %d, length: %" PRIsize_t,
|
@@ -170,10 +187,18 @@ static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mc
|
170 | 187 | } while (1);
|
171 | 188 |
|
172 | 189 | /* for now we just wait for the connection request to complete before continuing */
|
173 |
| - do { |
174 |
| - ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL); |
175 |
| - mca_btl_uct_context_progress (conn_tl_context); |
176 |
| - } while (UCS_INPROGRESS == ucs_status); |
| 190 | + ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, &completion.super); |
| 191 | + if (UCS_OK != ucs_status && UCS_INPROGRESS != ucs_status) { |
| 192 | + /* NTH: I don't know if this path is needed. For some networks we must use a completion. */ |
| 193 | + do { |
| 194 | + ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL); |
| 195 | + mca_btl_uct_context_progress (conn_tl_context); |
| 196 | + } while (UCS_INPROGRESS == ucs_status); |
| 197 | + } else { |
| 198 | + do { |
| 199 | + mca_btl_uct_context_progress (conn_tl_context); |
| 200 | + } while (!completion.complete); |
| 201 | + } |
177 | 202 |
|
178 | 203 | opal_mutex_lock (&endpoint->ep_lock);
|
179 | 204 |
|
@@ -284,8 +309,8 @@ int mca_btl_uct_endpoint_connect (mca_btl_uct_module_t *uct_btl, mca_btl_uct_end
|
284 | 309 | void *ep_addr, int tl_index)
|
285 | 310 | {
|
286 | 311 | mca_btl_uct_tl_endpoint_t *tl_endpoint = endpoint->uct_eps[context_id] + tl_index;
|
287 |
| - mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_rdma_context_specific (uct_btl, context_id); |
288 | 312 | mca_btl_uct_tl_t *tl = (tl_index == uct_btl->rdma_tl->tl_index) ? uct_btl->rdma_tl : uct_btl->am_tl;
|
| 313 | + mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_tl_context_specific (uct_btl, tl, context_id); |
289 | 314 | uint8_t *rdma_tl_data = NULL, *conn_tl_data = NULL, *am_tl_data = NULL, *tl_data;
|
290 | 315 | mca_btl_uct_connection_ep_t *conn_ep = NULL;
|
291 | 316 | mca_btl_uct_modex_t *modex;
|
|
0 commit comments