Skip to content

Commit 4962651

Browse files
authored
Merge pull request #5366 from hoopoepg/topic/mca-common-ucx-unify-2
MCA/COMMON/UCX: minor unification of del_proces calls
2 parents bd5cd62 + 13331ba commit 4962651

File tree

5 files changed

+46
-23
lines changed

5 files changed

+46
-23
lines changed

config/ompi_check_ucx.m4

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[
108108
[AC_DEFINE([HAVE_UCP_TAG_SEND_NBR],[1],
109109
[have ucp_tag_send_nbr()])], [],
110110
[#include <ucp/api/ucp.h>])
111-
AC_CHECK_DECLS([ucp_ep_flush_nb, ucp_worker_flush_nb, ucp_request_check_status],
111+
AC_CHECK_DECLS([ucp_ep_flush_nb, ucp_worker_flush_nb,
112+
ucp_request_check_status, ucp_put_nb, ucp_get_nb],
112113
[], [],
113114
[#include <ucp/api/ucp.h>])
114115
CPPFLAGS=$old_CPPFLAGS

ompi/mca/pml/ucx/pml_ucx.c

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "opal/mca/pmix/pmix.h"
1919
#include "ompi/message/message.h"
2020
#include "ompi/mca/pml/base/pml_base_bsend.h"
21+
#include "opal/mca/common/ucx/common_ucx.h"
2122
#include "pml_ucx_request.h"
2223

2324
#include <inttypes.h>
@@ -374,29 +375,19 @@ static void mca_pml_ucx_waitall(void **reqs, int *count_p)
374375

375376
PML_UCX_VERBOSE(2, "waiting for %d disconnect requests", *count_p);
376377
for (i = 0; i < *count_p; ++i) {
377-
do {
378-
opal_progress();
379-
status = ucp_request_test(reqs[i], NULL);
380-
} while (status == UCS_INPROGRESS);
378+
status = opal_common_ucx_wait_request(reqs[i], ompi_pml_ucx.ucp_worker);
381379
if (status != UCS_OK) {
382380
PML_UCX_ERROR("disconnect request failed: %s",
383381
ucs_status_string(status));
384382
}
385-
ucp_request_free(reqs[i]);
386383
reqs[i] = NULL;
387384
}
388385

389386
*count_p = 0;
390387
}
391388

392-
static void mca_pml_fence_complete_cb(int status, void *fenced)
393-
{
394-
*(int*)fenced = 1;
395-
}
396-
397389
int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs)
398390
{
399-
volatile int fenced = 0;
400391
ompi_proc_t *proc;
401392
int num_reqs;
402393
size_t max_reqs;
@@ -447,10 +438,7 @@ int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs)
447438
mca_pml_ucx_waitall(dreqs, &num_reqs);
448439
free(dreqs);
449440

450-
opal_pmix.fence_nb(NULL, 0, mca_pml_fence_complete_cb, (void*)&fenced);
451-
while (!fenced) {
452-
ucp_worker_progress(ompi_pml_ucx.ucp_worker);
453-
}
441+
opal_common_ucx_mca_pmix_fence(ompi_pml_ucx.ucp_worker);
454442

455443
return OMPI_SUCCESS;
456444
}

opal/mca/common/ucx/common_ucx.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "common_ucx.h"
1313
#include "opal/mca/base/mca_base_var.h"
14+
#include "opal/mca/pmix/pmix.h"
1415

1516
/***********************************************************************/
1617

@@ -36,3 +37,19 @@ OPAL_DECLSPEC void opal_common_ucx_mca_register(void)
3637
void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status)
3738
{
3839
}
40+
41+
static void opal_common_ucx_mca_fence_complete_cb(int status, void *fenced)
42+
{
43+
*(int*)fenced = 1;
44+
}
45+
46+
OPAL_DECLSPEC void opal_common_ucx_mca_pmix_fence(ucp_worker_h worker)
47+
{
48+
volatile int fenced = 0;
49+
50+
opal_pmix.fence_nb(NULL, 0, opal_common_ucx_mca_fence_complete_cb, (void*)&fenced);
51+
while (!fenced) {
52+
ucp_worker_progress(worker);
53+
}
54+
}
55+

opal/mca/common/ucx/common_ucx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ extern int opal_common_ucx_progress_iterations;
2727

2828
OPAL_DECLSPEC void opal_common_ucx_mca_register(void);
2929
OPAL_DECLSPEC void opal_common_ucx_empty_complete_cb(void *request, ucs_status_t status);
30+
OPAL_DECLSPEC void opal_common_ucx_mca_pmix_fence(ucp_worker_h worker);
3031

3132
static inline
3233
ucs_status_t opal_common_ucx_wait_request(ucs_status_ptr_t request, ucp_worker_h worker)

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,11 @@ static void mca_spml_ucx_waitall(void **reqs, int *count_p)
103103

104104
SPML_VERBOSE(10, "waiting for %d disconnect requests", *count_p);
105105
for (i = 0; i < *count_p; ++i) {
106-
do {
107-
opal_progress();
108-
status = ucp_request_test(reqs[i], NULL);
109-
} while (status == UCS_INPROGRESS);
106+
status = opal_common_ucx_wait_request(reqs[i], mca_spml_ucx.ucp_worker);
110107
if (status != UCS_OK) {
111108
SPML_ERROR("disconnect request failed: %s",
112109
ucs_status_string(status));
113110
}
114-
ucp_request_release(reqs[i]);
115111
reqs[i] = NULL;
116112
}
117113

@@ -175,8 +171,9 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs)
175171
mca_spml_ucx_waitall(dreqs, &num_reqs);
176172
free(dreqs);
177173

178-
opal_pmix.fence(NULL, 0);
174+
opal_common_ucx_mca_pmix_fence(mca_spml_ucx.ucp_worker);
179175
free(mca_spml_ucx.ucp_peers);
176+
mca_spml_ucx.ucp_peers = NULL;
180177
return OSHMEM_SUCCESS;
181178
}
182179

@@ -560,10 +557,20 @@ int mca_spml_ucx_get(void *src_addr, size_t size, void *dst_addr, int src)
560557
void *rva;
561558
ucs_status_t status;
562559
spml_ucx_mkey_t *ucx_mkey;
560+
#if HAVE_DECL_UCP_GET_NB
561+
ucs_status_ptr_t request;
562+
#endif
563563

564564
ucx_mkey = mca_spml_ucx_get_mkey(src, src_addr, &rva, &mca_spml_ucx);
565+
#if HAVE_DECL_UCP_GET_NB
566+
request = ucp_get_nb(mca_spml_ucx.ucp_peers[src].ucp_conn, dst_addr, size,
567+
(uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb);
568+
/* TODO: replace wait_request by opal_common_ucx_wait_request_opal_status */
569+
status = opal_common_ucx_wait_request(request, mca_spml_ucx.ucp_worker);
570+
#else
565571
status = ucp_get(mca_spml_ucx.ucp_peers[src].ucp_conn, dst_addr, size,
566572
(uint64_t)rva, ucx_mkey->rkey);
573+
#endif
567574

568575
return ucx_status_to_oshmem(status);
569576
}
@@ -586,11 +593,20 @@ int mca_spml_ucx_put(void* dst_addr, size_t size, void* src_addr, int dst)
586593
void *rva;
587594
ucs_status_t status;
588595
spml_ucx_mkey_t *ucx_mkey;
596+
#if HAVE_DECL_UCP_PUT_NB
597+
ucs_status_ptr_t request;
598+
#endif
589599

590600
ucx_mkey = mca_spml_ucx_get_mkey(dst, dst_addr, &rva, &mca_spml_ucx);
601+
#if HAVE_DECL_UCP_PUT_NB
602+
request = ucp_put_nb(mca_spml_ucx.ucp_peers[dst].ucp_conn, src_addr, size,
603+
(uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb);
604+
/* TODO: replace wait_request by opal_common_ucx_wait_request_opal_status */
605+
status = opal_common_ucx_wait_request(request, mca_spml_ucx.ucp_worker);
606+
#else
591607
status = ucp_put(mca_spml_ucx.ucp_peers[dst].ucp_conn, src_addr, size,
592608
(uint64_t)rva, ucx_mkey->rkey);
593-
609+
#endif
594610
return ucx_status_to_oshmem(status);
595611
}
596612

0 commit comments

Comments
 (0)