Skip to content

Commit 2a128b1

Browse files
committed
btl/uct: clean up retain/release of the connection endpoint
Connection endpoints are created to assist with connection connect-to-endpoint UCT endpoints. In most situations it is created and destroyed for each thread context but it may be used by multiple simultaneous threads. The code retains and releases the endpoint at multiple points where needed. This is overkill. This commit updates the code to retain once per connecting thread (or the original creation) and then releases when the connection is complete. Signed-off-by: Nathan Hjelm <hjelmn@google.com>
1 parent f99cbcd commit 2a128b1

File tree

2 files changed

+17
-26
lines changed

2 files changed

+17
-26
lines changed

opal/mca/btl/uct/btl_uct_endpoint.c

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ static void mca_btl_uct_endpoint_destruct(mca_btl_uct_endpoint_t *endpoint)
3838
}
3939

4040
OBJ_DESTRUCT(&endpoint->ep_lock);
41+
if (endpoint->conn_ep) {
42+
OBJ_RELEASE(endpoint->conn_ep);
43+
}
4144
}
4245

4346
OBJ_CLASS_INSTANCE(mca_btl_uct_endpoint_t, opal_object_t, mca_btl_uct_endpoint_construct,
@@ -206,7 +209,6 @@ static int mca_btl_uct_endpoint_send_conn_req(mca_btl_uct_module_t *uct_btl,
206209
mca_btl_uct_conn_req_t *request,
207210
size_t request_length)
208211
{
209-
mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep;
210212
mca_btl_uct_conn_completion_t completion
211213
= {.super = {.count = 1, .func = mca_btl_uct_endpoint_flush_complete}, .complete = false};
212214
ucs_status_t ucs_status;
@@ -215,15 +217,13 @@ static int mca_btl_uct_endpoint_send_conn_req(mca_btl_uct_module_t *uct_btl,
215217
("sending connection request to peer. context id: %d, type: %d, length: %" PRIsize_t,
216218
request->context_id, request->type, request_length));
217219

218-
OBJ_RETAIN(endpoint->conn_ep);
219-
220220
/* need to drop the lock to avoid hold-and-wait */
221221
opal_mutex_unlock(&endpoint->ep_lock);
222222

223223
do {
224224
MCA_BTL_UCT_CONTEXT_SERIALIZE(conn_tl_context, {
225-
ucs_status = uct_ep_am_short(conn_ep->uct_ep, MCA_BTL_UCT_CONNECT_RDMA, request->type,
226-
request, request_length);
225+
ucs_status = uct_ep_am_short(endpoint->conn_ep->uct_ep, MCA_BTL_UCT_CONNECT_RDMA,
226+
request->type, request, request_length);
227227
});
228228
if (OPAL_LIKELY(UCS_OK == ucs_status)) {
229229
break;
@@ -238,11 +238,11 @@ static int mca_btl_uct_endpoint_send_conn_req(mca_btl_uct_module_t *uct_btl,
238238
} while (1);
239239

240240
/* for now we just wait for the connection request to complete before continuing */
241-
ucs_status = uct_ep_flush(conn_ep->uct_ep, 0, &completion.super);
241+
ucs_status = uct_ep_flush(endpoint->conn_ep->uct_ep, 0, &completion.super);
242242
if (UCS_OK != ucs_status && UCS_INPROGRESS != ucs_status) {
243243
/* NTH: I don't know if this path is needed. For some networks we must use a completion. */
244244
do {
245-
ucs_status = uct_ep_flush(conn_ep->uct_ep, 0, NULL);
245+
ucs_status = uct_ep_flush(endpoint->conn_ep->uct_ep, 0, NULL);
246246
mca_btl_uct_context_progress(conn_tl_context);
247247
} while (UCS_INPROGRESS == ucs_status);
248248
} else {
@@ -253,8 +253,6 @@ static int mca_btl_uct_endpoint_send_conn_req(mca_btl_uct_module_t *uct_btl,
253253

254254
opal_mutex_lock(&endpoint->ep_lock);
255255

256-
OBJ_RELEASE(endpoint->conn_ep);
257-
258256
return OPAL_SUCCESS;
259257
}
260258

@@ -265,7 +263,6 @@ static int mca_btl_uct_endpoint_send_connection_data(
265263
{
266264
mca_btl_uct_tl_t *conn_tl = uct_btl->conn_tl;
267265
mca_btl_uct_device_context_t *conn_tl_context = conn_tl->uct_dev_contexts[0];
268-
mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep;
269266
uct_device_addr_t *device_addr = NULL;
270267
uct_iface_addr_t *iface_addr;
271268
ucs_status_t ucs_status;
@@ -274,31 +271,33 @@ static int mca_btl_uct_endpoint_send_connection_data(
274271

275272
BTL_VERBOSE(("connecting endpoint to remote endpoint"));
276273

277-
if (NULL == conn_ep) {
274+
if (NULL == endpoint->conn_ep) {
278275
BTL_VERBOSE(("creating a temporary endpoint for handling connections to %p",
279276
opal_process_name_print(endpoint->ep_proc->proc_name)));
280277

281278
iface_addr = (uct_iface_addr_t *) conn_tl_data;
282279
device_addr = (uct_device_addr_t *) ((uintptr_t) conn_tl_data
283280
+ MCA_BTL_UCT_TL_ATTR(conn_tl, 0).iface_addr_len);
284281

285-
endpoint->conn_ep = conn_ep = OBJ_NEW(mca_btl_uct_connection_ep_t);
286-
if (OPAL_UNLIKELY(NULL == conn_ep)) {
282+
endpoint->conn_ep = OBJ_NEW(mca_btl_uct_connection_ep_t);
283+
if (OPAL_UNLIKELY(NULL == endpoint->conn_ep)) {
287284
return OPAL_ERR_OUT_OF_RESOURCE;
288285
}
289286

290287
/* create a temporary endpoint for setting up the rdma endpoint */
291288
MCA_BTL_UCT_CONTEXT_SERIALIZE(conn_tl_context, {
292289
ucs_status = mca_btl_uct_ep_create_connected_compat(conn_tl_context->uct_iface,
293290
device_addr, iface_addr,
294-
&conn_ep->uct_ep);
291+
&endpoint->conn_ep->uct_ep);
295292
});
296293
if (UCS_OK != ucs_status) {
297294
BTL_VERBOSE(
298295
("could not create an endpoint for forming connection to remote peer. code = %d",
299296
ucs_status));
300297
return OPAL_ERROR;
301298
}
299+
} else {
300+
OBJ_RETAIN(endpoint->conn_ep);
302301
}
303302

304303
size_t request_length = sizeof(mca_btl_uct_conn_req_t)
@@ -394,7 +393,6 @@ int mca_btl_uct_endpoint_connect(mca_btl_uct_module_t *uct_btl, mca_btl_uct_endp
394393
mca_btl_uct_device_context_t *tl_context
395394
= mca_btl_uct_module_get_tl_context_specific(uct_btl, tl, context_id);
396395
uint8_t *rdma_tl_data = NULL, *conn_tl_data = NULL, *am_tl_data = NULL, *tl_data;
397-
mca_btl_uct_connection_ep_t *conn_ep = NULL;
398396
mca_btl_uct_modex_t *modex;
399397
uint8_t *modex_data;
400398
size_t msg_size;
@@ -477,19 +475,8 @@ int mca_btl_uct_endpoint_connect(mca_btl_uct_module_t *uct_btl, mca_btl_uct_endp
477475

478476
} while (0);
479477

480-
/* to avoid a possible hold-and wait deadlock. destroy the endpoint after dropping the endpoint
481-
* lock. */
482-
if (endpoint->conn_ep && 1 == endpoint->conn_ep->super.obj_reference_count) {
483-
conn_ep = endpoint->conn_ep;
484-
endpoint->conn_ep = NULL;
485-
}
486-
487478
opal_mutex_unlock(&endpoint->ep_lock);
488479

489-
if (conn_ep) {
490-
OBJ_RELEASE(conn_ep);
491-
}
492-
493480
BTL_VERBOSE(("endpoint%s ready for use", (OPAL_ERR_OUT_OF_RESOURCE != rc) ? "" : " not yet"));
494481

495482
return rc;

opal/mca/btl/uct/btl_uct_endpoint.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ static inline void mca_btl_uct_endpoint_set_flag(mca_btl_uct_module_t *module, m
124124
frag->ready = true;
125125
}
126126
}
127+
128+
if (endpoint->conn_ep) {
129+
OBJ_RELEASE(endpoint->conn_ep);
130+
}
127131
}
128132
}
129133

0 commit comments

Comments
 (0)