Skip to content

Commit 72296e1

Browse files
author
Tomislav Janjusic
committed
opal/common/ucx:
-mutex lock/unlock suggestions -common destructor/cleanup Co-authored-with: Artem Y. Polyakov <artemp@mellanox.com> Signed-off-by: Tomislav Janjusic <tomislavj@mellanox.com>
1 parent 27ba4b6 commit 72296e1

File tree

2 files changed

+16
-31
lines changed

2 files changed

+16
-31
lines changed

opal/mca/common/ucx/common_ucx_wpool.c

Lines changed: 12 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -305,20 +305,6 @@ opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool)
305305
return completed;
306306
}
307307

308-
// static int
309-
// _wpool_list_return_item(opal_common_ucx_wpool_t *wpool, opal_list_t *idle_list, opal_list_t *active_list,
310-
// opal_common_ucx_winfo_t *winfo)
311-
// {
312-
// opal_list_item_t *tmp = NULL;
313-
// opal_mutex_lock(&wpool->mutex);
314-
// tmp = opal_list_remove_item(active_list, &(winfo->self->super));
315-
// assert(NULL != tmp);
316-
// opal_list_append(idle_list, &(winfo->self->super));
317-
// opal_mutex_unlock(&wpool->mutex);
318-
//
319-
// return OPAL_SUCCESS;
320-
// }
321-
322308
static int
323309
_wpool_list_put(opal_common_ucx_wpool_t *wpool, opal_list_t *list,
324310
opal_common_ucx_winfo_t *winfo)
@@ -358,15 +344,6 @@ _wpool_list_get(opal_common_ucx_wpool_t *wpool, opal_list_t *list)
358344
OBJ_RELEASE(item);
359345
}
360346
return winfo;
361-
362-
/*XXX*/
363-
// opal_mutex_lock(&wpool->mutex);
364-
// _winfo_list_item_t *item = (_winfo_list_item_t *)opal_list_remove_first(list);
365-
// opal_mutex_unlock(&wpool->mutex);
366-
// winfo = item->ptr;
367-
// OBJ_DESTRUCT(item);
368-
//
369-
// return winfo;
370347
}
371348

372349
static opal_common_ucx_winfo_t *
@@ -407,7 +384,7 @@ opal_common_ucx_wpctx_create(opal_common_ucx_wpool_t *wpool, int comm_size,
407384
opal_common_ucx_ctx_t *ctx = calloc(1, sizeof(*ctx));
408385
int ret = OPAL_SUCCESS;
409386

410-
OBJ_CONSTRUCT(&ctx->mutex, opal_recursive_mutex_t);
387+
OBJ_CONSTRUCT(&ctx->mutex, opal_mutex_t);
411388
OBJ_CONSTRUCT(&ctx->ctx_records, opal_list_t);
412389

413390
ctx->wpool = wpool;
@@ -492,6 +469,7 @@ int opal_common_ucx_wpmem_create(opal_common_ucx_ctx_t *ctx,
492469
mem->mem_addrs = NULL;
493470
mem->mem_displs = NULL;
494471

472+
OBJ_CONSTRUCT(&mem->mutex, opal_mutex_t);
495473
OBJ_CONSTRUCT(&mem->mem_records, opal_list_t);
496474

497475
ret = _comm_ucx_wpmem_map(ctx->wpool, mem_base, mem_size, &mem->memh,
@@ -639,7 +617,9 @@ _tlocal_ctx_rec_cleanup(_ctx_record_t *ctx_rec)
639617
* Remove the context record from the ctx list */
640618
opal_list_remove_item(&wpool->active_workers, &winfo->self->super);
641619
opal_list_prepend(&wpool->idle_workers, &winfo->self->super);
620+
opal_mutex_lock(&ctx_rec->gctx->mutex);
642621
opal_list_remove_item(&ctx_rec->gctx->ctx_records, &ctx_rec->super);
622+
opal_mutex_unlock(&ctx_rec->gctx->mutex);
643623

644624
opal_mutex_unlock(&wpool->mutex);
645625

@@ -705,6 +685,7 @@ static int _tlocal_ctx_connect(_ctx_record_t *ctx_rec, int target)
705685
if (status != UCS_OK) {
706686
opal_mutex_unlock(&winfo->mutex);
707687
MCA_COMMON_UCX_VERBOSE(1, "ucp_ep_create failed: %d", status);
688+
opal_mutex_unlock(&winfo->mutex);
708689
return OPAL_ERROR;
709690
}
710691
opal_mutex_unlock(&winfo->mutex);
@@ -733,9 +714,9 @@ _tlocal_mem_rec_cleanup(_mem_record_t *mem_rec)
733714
free(mem_rec->rkeys);
734715

735716
/* Remove item from the list */
736-
opal_mutex_lock(&mem_rec->gmem->ctx->mutex);
717+
opal_mutex_lock(&mem_rec->gmem->mutex);
737718
opal_list_remove_item(&mem_rec->gmem->mem_records, &mem_rec->super);
738-
opal_mutex_unlock(&mem_rec->gmem->ctx->mutex);
719+
opal_mutex_unlock(&mem_rec->gmem->mutex);
739720

740721
OBJ_RELEASE(mem_rec);
741722

@@ -750,9 +731,9 @@ static _mem_record_t *_tlocal_add_mem_rec(opal_common_ucx_wpmem_t *mem, _ctx_rec
750731
return NULL;
751732
}
752733

753-
opal_mutex_lock(&ctx_rec->gctx->mutex);
734+
opal_mutex_lock(&mem->mutex);
754735
opal_list_append(&mem->mem_records, &mem_rec->super);
755-
opal_mutex_unlock(&ctx_rec->gctx->mutex);
736+
opal_mutex_unlock(&mem->mutex);
756737

757738

758739
mem_rec->gmem = mem;
@@ -777,8 +758,10 @@ _tlocal_mem_create_rkey(_mem_record_t *mem_rec, ucp_ep_h ep, int target)
777758
int displ = gmem->mem_displs[target];
778759
ucs_status_t status;
779760

761+
opal_mutex_lock(&mem_rec->winfo->mutex);
780762
status = ucp_ep_rkey_unpack(ep, &gmem->mem_addrs[displ],
781763
&mem_rec->rkeys[target]);
764+
opal_mutex_unlock(&mem_rec->winfo->mutex);
782765
if (status != UCS_OK) {
783766
MCA_COMMON_UCX_VERBOSE(1, "ucp_ep_rkey_unpack failed: %d", status);
784767
return OPAL_ERROR;
@@ -890,9 +873,9 @@ opal_common_ucx_wpmem_flush(opal_common_ucx_wpmem_t *mem,
890873
(NULL == winfo->endpoints[target])) {
891874
continue;
892875
}
893-
opal_mutex_lock(&winfo->mutex);
894876
rc = opal_common_ucx_winfo_flush(winfo, target, OPAL_COMMON_UCX_FLUSH_B,
895877
scope, NULL);
878+
opal_mutex_lock(&winfo->mutex);
896879
switch (scope) {
897880
case OPAL_COMMON_UCX_SCOPE_WORKER:
898881
winfo->global_inflight_ops = 0;

opal/mca/common/ucx/common_ucx_wpool.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ BEGIN_C_DECLS
3838
typedef struct {
3939
/* Ref counting & locking*/
4040
int refcnt;
41-
opal_recursive_mutex_t mutex;
41+
opal_mutex_t mutex;
4242

4343
/* UCX data */
4444
ucp_context_h ucp_ctx;
@@ -61,7 +61,7 @@ typedef struct {
6161
* Context is bound to a particular Worker Pool object.
6262
*/
6363
typedef struct {
64-
opal_recursive_mutex_t mutex;
64+
opal_mutex_t mutex;
6565

6666
/* the reference to a Worker pool this context belongs to*/
6767
opal_common_ucx_wpool_t *wpool;
@@ -89,6 +89,8 @@ typedef struct {
8989
* be possible to have one context for multiple windows.
9090
*/
9191
typedef struct {
92+
opal_mutex_t mutex;
93+
9294
/* reference context to which memory region belongs */
9395
opal_common_ucx_ctx_t *ctx;
9496

0 commit comments

Comments
 (0)