Skip to content

Commit 781ac54

Browse files
committed
Allow MPI_WIN_SHARED_QUERY on regular windows
MPI 4.0 introduced allows applications to query regular windows for shared memory. This patch enables it for osc/rdma and osc/ucx and otherwise makes sure we fail gracefully if the component does not provide the query callback. For osc/rdma, this is currently supported only for allocated windows but could later be extended to windows with application-provided memory through xpmem. Signed-off-by: Joseph Schuchart <joseph.schuchart@stonybrook.edu>
1 parent 42b17ae commit 781ac54

File tree

3 files changed

+122
-13
lines changed

3 files changed

+122
-13
lines changed

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@
6969
#include "ompi/mca/bml/base/base.h"
7070
#include "ompi/mca/mtl/base/base.h"
7171

72+
static int ompi_osc_rdma_shared_query(struct ompi_win_t *win, int rank, size_t *size,
73+
ptrdiff_t *disp_unit, void *baseptr);
7274
static int ompi_osc_rdma_component_register (void);
7375
static int ompi_osc_rdma_component_init (bool enable_progress_threads, bool enable_mpi_threads);
7476
static int ompi_osc_rdma_component_finalize (void);
@@ -112,6 +114,7 @@ ompi_osc_rdma_component_t mca_osc_rdma_component = {
112114
};
113115

114116
ompi_osc_base_module_t ompi_osc_rdma_module_rdma_template = {
117+
.osc_win_shared_query = ompi_osc_rdma_shared_query,
115118
.osc_win_attach = ompi_osc_rdma_attach,
116119
.osc_win_detach = ompi_osc_rdma_detach,
117120
.osc_free = ompi_osc_rdma_free,
@@ -876,7 +879,7 @@ static void ompi_osc_rdma_ensure_local_add_procs (void)
876879
/* this will cause add_proc to get called if it has not already been called */
877880
(void) mca_bml_base_get_endpoint (proc);
878881
}
879-
}
882+
}
880883

881884
free(procs);
882885
}
@@ -1610,3 +1613,58 @@ ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, const char *key, cons
16101613
*/
16111614
return module->no_locks ? "true" : "false";
16121615
}
1616+
1617+
int ompi_osc_rdma_shared_query(
1618+
struct ompi_win_t *win, int rank, size_t *size,
1619+
ptrdiff_t *disp_unit, void *baseptr)
1620+
{
1621+
ompi_osc_rdma_peer_t *peer;
1622+
int actual_rank = rank;
1623+
ompi_osc_rdma_module_t *module = GET_MODULE(win);
1624+
1625+
peer = ompi_osc_rdma_module_peer (module, actual_rank);
1626+
if (NULL == peer) {
1627+
return OMPI_ERR_BAD_PARAM;
1628+
}
1629+
1630+
/* currently only supported for allocated windows */
1631+
if (MPI_WIN_FLAVOR_ALLOCATE != module->flavor) {
1632+
return OMPI_ERR_NOT_SUPPORTED;
1633+
}
1634+
1635+
if (!ompi_osc_rdma_peer_local_base(peer)) {
1636+
return OMPI_ERR_NOT_SUPPORTED;
1637+
}
1638+
1639+
if (MPI_PROC_NULL == rank) {
1640+
/* iterate until we find a rank that has a non-zero size */
1641+
for (int i = 0 ; i < ompi_comm_size(module->comm) ; ++i) {
1642+
peer = ompi_osc_rdma_module_peer (module, i);
1643+
ompi_osc_rdma_peer_extended_t *ex_peer = (ompi_osc_rdma_peer_extended_t *) peer;
1644+
if (!ompi_osc_rdma_peer_local_base(peer)) {
1645+
continue;
1646+
} else if (module->same_size && ex_peer->super.base) {
1647+
break;
1648+
} else if (ex_peer->size > 0) {
1649+
break;
1650+
}
1651+
}
1652+
}
1653+
1654+
if (module->same_size && module->same_disp_unit) {
1655+
*size = module->size;
1656+
*disp_unit = module->disp_unit;
1657+
ompi_osc_rdma_peer_basic_t *ex_peer = (ompi_osc_rdma_peer_basic_t *) peer;
1658+
*((void**) baseptr) = (void *) (intptr_t)ex_peer->base;
1659+
} else {
1660+
ompi_osc_rdma_peer_extended_t *ex_peer = (ompi_osc_rdma_peer_extended_t *) peer;
1661+
if (ex_peer->super.base != 0) {
1662+
/* we know the base of the peer */
1663+
*((void**) baseptr) = (void *) (intptr_t)ex_peer->super.base;
1664+
*size = ex_peer->size;
1665+
*disp_unit = ex_peer->disp_unit;
1666+
return OMPI_SUCCESS;
1667+
}
1668+
}
1669+
return OMPI_ERR_NOT_SUPPORTED;
1670+
}

ompi/mca/osc/ucx/osc_ucx_component.c

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -465,30 +465,70 @@ static const char* ompi_osc_ucx_set_no_lock_info(opal_infosubscriber_t *obj, con
465465
return module->no_locks ? "true" : "false";
466466
}
467467

468+
static int ompi_osc_ucx_shared_query_peer(ompi_osc_ucx_module_t *module, int rank, size_t *size,
469+
ptrdiff_t *disp_unit, void *baseptr) {
470+
471+
ucp_ep_h *dflt_ep;
472+
ucp_ep_h ep; // ignored
473+
OSC_UCX_GET_DEFAULT_EP(dflt_ep, module, peer); // TODO: needed?
474+
ucs_status_t status;
475+
opal_common_ucx_winfo_t *winfo; // ignored
476+
rc = opal_common_ucx_tlocal_fetch(module->mem, peer, &ep, &rkey, &winfo, dflt_ep);
477+
if (OMPI_SUCCESS != rc) {
478+
return rc;
479+
}
480+
uint64_t raddr;
481+
void *addr_p;
482+
if (UCS_OK != ucp_rkey_ptr(rkey, module->addrs[peer], &addr_p)) {
483+
return OMPI_ERR_NOT_AVAILABLE;
484+
}
485+
*size = module->sizes[i];
486+
*((void**) baseptr) = (void *)module->shmem_addrs[i];
487+
*disp_unit = module->disp_units[i];
488+
489+
return OMPI_SUCCESS;
490+
}
491+
468492
int ompi_osc_ucx_shared_query(struct ompi_win_t *win, int rank, size_t *size,
469493
ptrdiff_t *disp_unit, void *baseptr)
470494
{
471495
ompi_osc_ucx_module_t *module =
472496
(ompi_osc_ucx_module_t*) win->w_osc_module;
473497

498+
*size = 0;
499+
*((void**) baseptr) = NULL;
500+
*disp_unit = 0;
501+
474502
if (module->flavor != MPI_WIN_FLAVOR_SHARED) {
475-
return MPI_ERR_WIN;
476-
}
477503

478-
if (MPI_PROC_NULL != rank) {
504+
if (MPI_PROC_NULL == rank) {
505+
for (int i = 0 ; i < ompi_comm_size(module->comm) ; ++i) {
506+
if (0 != module->sizes[i]) {
507+
if (OMPI_SUCCESS == ompi_osc_ucx_shared_query_peer(module, i, size, disp_unit, baseptr)) {
508+
return OMPI_SUCCESS;
509+
}
510+
}
511+
}
512+
} else {
513+
if (0 != module->sizes[i]) {
514+
if (OMPI_SUCCESS == ompi_osc_ucx_shared_query_peer(module, i, size, disp_unit, baseptr)) {
515+
return OMPI_SUCCESS;
516+
}
517+
}
518+
}
519+
return OMPI_ERR_NOT_SUPPORTED;
520+
521+
} else if (MPI_PROC_NULL != rank) { // shared memory window with given rank
479522
*size = module->sizes[rank];
480523
*((void**) baseptr) = (void *)module->shmem_addrs[rank];
481524
if (module->disp_unit == -1) {
482525
*disp_unit = module->disp_units[rank];
483526
} else {
484527
*disp_unit = module->disp_unit;
485528
}
486-
} else {
529+
} else { // shared memory window with MPI_PROC_NULL
487530
int i = 0;
488531

489-
*size = 0;
490-
*((void**) baseptr) = NULL;
491-
*disp_unit = 0;
492532
for (i = 0 ; i < ompi_comm_size(module->comm) ; ++i) {
493533
if (0 != module->sizes[i]) {
494534
*size = module->sizes[i];

ompi/mpi/c/win_shared_query.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ static const char FUNC_NAME[] = "MPI_Win_shared_query";
3434

3535
int MPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit, void *baseptr)
3636
{
37-
int rc;
3837
size_t tsize;
3938
ptrdiff_t du;
39+
int rc = OMPI_SUCCESS;
4040

4141
if (MPI_PARAM_CHECK) {
4242
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
@@ -48,12 +48,23 @@ int MPI_Win_shared_query(MPI_Win win, int rank, MPI_Aint *size, int *disp_unit,
4848
}
4949
}
5050

51+
rc = OMPI_ERR_NOT_SUPPORTED;
52+
5153
if (NULL != win->w_osc_module->osc_win_shared_query) {
5254
rc = win->w_osc_module->osc_win_shared_query(win, rank, &tsize, &du, baseptr);
53-
*size = tsize;
54-
*disp_unit = du;
55-
} else {
56-
rc = MPI_ERR_RMA_FLAVOR;
55+
if (OMPI_SUCCESS == rc) {
56+
*size = tsize;
57+
*disp_unit = du;
58+
}
59+
}
60+
61+
if (OMPI_ERR_NOT_SUPPORTED == rc) {
62+
/* gracefully bail out */
63+
*size = 0;
64+
*disp_unit = 0;
65+
*(void**) baseptr = NULL;
66+
rc = MPI_SUCCESS; // don't raise an error if the function is not supported
5767
}
68+
5869
OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME);
5970
}

0 commit comments

Comments
 (0)