Skip to content

Commit f8fd37d

Browse files
authored
Merge pull request #9570 from hoopoepg/topic/added-strong-sync-infra-v5.0
SPML/UCX: added strong sync for fence - v5.0
2 parents 1ee4092 + 1ed08ca commit f8fd37d

File tree

4 files changed

+107
-22
lines changed

4 files changed

+107
-22
lines changed

config/ompi_check_ucx.m4

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[
123123
[#include <ucp/api/ucp.h>])
124124
AC_CHECK_DECLS([ucp_ep_flush_nb, ucp_worker_flush_nb,
125125
ucp_request_check_status, ucp_put_nb, ucp_get_nb,
126-
ucp_put_nbx, ucp_get_nbx, ucp_atomic_op_nbx],
126+
ucp_put_nbx, ucp_get_nbx, ucp_atomic_op_nbx,
127+
ucp_ep_flush_nbx],
127128
[], [],
128129
[#include <ucp/api/ucp.h>])
129130
AC_CHECK_DECLS([ucm_test_events,

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 78 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = {
8585
.ucp_worker = NULL,
8686
.ucp_peers = NULL,
8787
.options = 0,
88-
.synchronized_quiet = false
88+
.synchronized_quiet = false,
89+
.strong_sync = SPML_UCX_STRONG_ORDERING_NONE
8990
};
9091

9192
#if HAVE_DECL_UCP_ATOMIC_OP_NBX
@@ -404,7 +405,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
404405
{
405406
int res;
406407

407-
if (mca_spml_ucx.synchronized_quiet) {
408+
if (mca_spml_ucx_is_strong_ordering(ctx)) {
408409
ctx->put_proc_indexes = malloc(nprocs * sizeof(*ctx->put_proc_indexes));
409410
if (NULL == ctx->put_proc_indexes) {
410411
return OSHMEM_ERR_OUT_OF_RESOURCE;
@@ -426,7 +427,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
426427

427428
int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx)
428429
{
429-
if (mca_spml_ucx.synchronized_quiet && ctx->put_proc_indexes) {
430+
if (mca_spml_ucx_is_strong_ordering(ctx) && ctx->put_proc_indexes) {
430431
OBJ_DESTRUCT(&ctx->put_op_bitmap);
431432
free(ctx->put_proc_indexes);
432433
}
@@ -844,6 +845,7 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
844845
ucx_ctx->ucp_worker = calloc(1, sizeof(ucp_worker_h));
845846
ucx_ctx->ucp_workers = 1;
846847
ucx_ctx->synchronized_quiet = mca_spml_ucx_ctx_default.synchronized_quiet;
848+
ucx_ctx->strong_sync = mca_spml_ucx_ctx_default.strong_sync;
847849

848850
params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
849851
if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED) {
@@ -1178,14 +1180,81 @@ int mca_spml_ucx_put_nb_wprogress(shmem_ctx_t ctx, void* dst_addr, size_t size,
11781180
return ucx_status_to_oshmem_nb(status);
11791181
}
11801182

1183+
static int mca_spml_ucx_strong_sync(shmem_ctx_t ctx)
1184+
{
1185+
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
1186+
ucs_status_ptr_t request;
1187+
static int flush_get_data;
1188+
unsigned i;
1189+
int ret;
1190+
int idx;
1191+
#if !(HAVE_DECL_UCP_EP_FLUSH_NBX || HAVE_DECL_UCP_EP_FLUSH_NB)
1192+
ucs_status_t status;
1193+
#endif
1194+
1195+
for (i = 0; i < ucx_ctx->put_proc_count; i++) {
1196+
idx = ucx_ctx->put_proc_indexes[i];
1197+
1198+
switch (ucx_ctx->strong_sync) {
1199+
case SPML_UCX_STRONG_ORDERING_NONE:
1200+
case SPML_UCX_STRONG_ORDERING_GETNB:
1201+
ret = mca_spml_ucx_get_nb(ctx,
1202+
ucx_ctx->ucp_peers[idx].mkeys[SPML_UCX_SERVICE_SEG]->super.super.va_base,
1203+
sizeof(flush_get_data), &flush_get_data, idx, NULL);
1204+
break;
1205+
case SPML_UCX_STRONG_ORDERING_GET:
1206+
ret = mca_spml_ucx_get(ctx,
1207+
ucx_ctx->ucp_peers[idx].mkeys[SPML_UCX_SERVICE_SEG]->super.super.va_base,
1208+
sizeof(flush_get_data), &flush_get_data, idx);
1209+
break;
1210+
#if HAVE_DECL_UCP_EP_FLUSH_NBX
1211+
case SPML_UCX_STRONG_ORDERING_FLUSH:
1212+
request = ucp_ep_flush_nbx(ucx_ctx->ucp_peers[idx].ucp_conn,
1213+
&mca_spml_ucx_request_param_b);
1214+
ret = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker[0], "ucp_flush_nbx");
1215+
#elif HAVE_DECL_UCP_EP_FLUSH_NB
1216+
request = ucp_ep_flush_nb(ucx_ctx->ucp_peers[idx].ucp_conn, 0, opal_common_ucx_empty_complete_cb);
1217+
ret = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker[0], "ucp_flush_nb");
1218+
#else
1219+
status = ucp_ep_flush(ucx_ctx->ucp_peers[idx].ucp_conn);
1220+
ret = (status == UCS_OK) ? OPAL_SUCCESS : OPAL_ERROR;
1221+
#endif
1222+
break;
1223+
default:
1224+
/* unknown mode */
1225+
ret = OMPI_SUCCESS;
1226+
break;
1227+
}
1228+
1229+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1230+
oshmem_shmem_abort(-1);
1231+
return ret;
1232+
}
1233+
1234+
opal_bitmap_clear_bit(&ucx_ctx->put_op_bitmap, idx);
1235+
}
1236+
1237+
ucx_ctx->put_proc_count = 0;
1238+
return OSHMEM_SUCCESS;
1239+
}
1240+
11811241
int mca_spml_ucx_fence(shmem_ctx_t ctx)
11821242
{
1243+
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
11831244
ucs_status_t err;
1245+
int ret;
11841246
unsigned int i = 0;
1185-
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
11861247

11871248
opal_atomic_wmb();
11881249

1250+
if (ucx_ctx->strong_sync != SPML_UCX_STRONG_ORDERING_NONE) {
1251+
ret = mca_spml_ucx_strong_sync(ctx);
1252+
if (ret != OSHMEM_SUCCESS) {
1253+
oshmem_shmem_abort(-1);
1254+
return ret;
1255+
}
1256+
}
1257+
11891258
for (i=0; i < ucx_ctx->ucp_workers; i++) {
11901259
if (ucx_ctx->ucp_worker[i] != NULL) {
11911260
err = ucp_worker_fence(ucx_ctx->ucp_worker[i]);
@@ -1201,26 +1270,16 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx)
12011270

12021271
int mca_spml_ucx_quiet(shmem_ctx_t ctx)
12031272
{
1204-
int flush_get_data;
12051273
int ret;
12061274
unsigned i;
1207-
int idx;
12081275
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
12091276

1210-
if (mca_spml_ucx.synchronized_quiet) {
1211-
for (i = 0; i < ucx_ctx->put_proc_count; i++) {
1212-
idx = ucx_ctx->put_proc_indexes[i];
1213-
ret = mca_spml_ucx_get_nb(ctx,
1214-
ucx_ctx->ucp_peers[idx].mkeys[SPML_UCX_SERVICE_SEG]->super.super.va_base,
1215-
sizeof(flush_get_data), &flush_get_data, idx, NULL);
1216-
if (OMPI_SUCCESS != ret) {
1217-
oshmem_shmem_abort(-1);
1218-
return ret;
1219-
}
1220-
1221-
opal_bitmap_clear_bit(&ucx_ctx->put_op_bitmap, idx);
1277+
if (ucx_ctx->synchronized_quiet) {
1278+
ret = mca_spml_ucx_strong_sync(ctx);
1279+
if (ret != OSHMEM_SUCCESS) {
1280+
oshmem_shmem_abort(-1);
1281+
return ret;
12221282
}
1223-
ucx_ctx->put_proc_count = 0;
12241283
}
12251284

12261285
opal_atomic_wmb();

oshmem/mca/spml/ucx/spml_ucx.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ BEGIN_C_DECLS
4848
#define SPML_UCX_TRANSP_CNT 1
4949
#define SPML_UCX_SERVICE_SEG 0
5050

51+
enum {
52+
SPML_UCX_STRONG_ORDERING_NONE = 0, /* don't use strong ordering */
53+
SPML_UCX_STRONG_ORDERING_GETNB = 1, /* use non-blocking read to provide ordering */
54+
SPML_UCX_STRONG_ORDERING_GET = 2, /* use blocking read to provide ordering*/
55+
SPML_UCX_STRONG_ORDERING_FLUSH = 3 /* flush EP to provide ordering */
56+
};
57+
5158
/**
5259
* UCX SPML module
5360
*/
@@ -80,6 +87,7 @@ struct mca_spml_ucx_ctx {
8087
int *put_proc_indexes;
8188
unsigned put_proc_count;
8289
bool synchronized_quiet;
90+
int strong_sync;
8391
};
8492
typedef struct mca_spml_ucx_ctx mca_spml_ucx_ctx_t;
8593

@@ -114,7 +122,6 @@ struct mca_spml_ucx {
114122
mca_spml_ucx_ctx_t *aux_ctx;
115123
pthread_spinlock_t async_lock;
116124
int aux_refcnt;
117-
bool synchronized_quiet;
118125
unsigned long nb_progress_thresh_global;
119126
unsigned long nb_put_progress_thresh;
120127
unsigned long nb_get_progress_thresh;
@@ -294,9 +301,15 @@ static inline int ucx_status_to_oshmem_nb(ucs_status_t status)
294301
#endif
295302
}
296303

304+
static inline int mca_spml_ucx_is_strong_ordering(mca_spml_ucx_ctx_t *ctx)
305+
{
306+
return (ctx->strong_sync != SPML_UCX_STRONG_ORDERING_NONE) ||
307+
ctx->synchronized_quiet;
308+
}
309+
297310
static inline void mca_spml_ucx_remote_op_posted(mca_spml_ucx_ctx_t *ctx, int dst)
298311
{
299-
if (OPAL_UNLIKELY(ctx->synchronized_quiet)) {
312+
if (OPAL_UNLIKELY(mca_spml_ucx_is_strong_ordering(ctx))) {
300313
if (!opal_bitmap_is_set_bit(&ctx->put_op_bitmap, dst)) {
301314
ctx->put_proc_indexes[ctx->put_proc_count++] = dst;
302315
opal_bitmap_set_bit(&ctx->put_op_bitmap, dst);

oshmem/mca/spml/ucx/spml_ucx_component.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,11 @@ static int mca_spml_ucx_component_register(void)
161161
"Use synchronized quiet on shmem_quiet or shmem_barrier_all operations",
162162
&mca_spml_ucx_ctx_default.synchronized_quiet);
163163

164+
mca_spml_ucx_param_register_int("strong_sync", 0,
165+
"Use strong synchronization on shmem_quiet, shmem_fence or shmem_barrier_all operations: "
166+
"0 - don't do strong synchronization, 1 - use non blocking get, 2 - use blocking get, 3 - use flush operation",
167+
&mca_spml_ucx_ctx_default.strong_sync);
168+
164169
mca_spml_ucx_param_register_ulong("nb_progress_thresh_global", 0,
165170
"Number of nb_put or nb_get operations before ucx progress is triggered. Disabled by default (0). Setting this value will override nb_put/get_progress_thresh.",
166171
&mca_spml_ucx.nb_progress_thresh_global);
@@ -383,7 +388,14 @@ mca_spml_ucx_component_init(int* priority,
383388
if (OSHMEM_SUCCESS != spml_ucx_init())
384389
return NULL ;
385390

391+
if ((mca_spml_ucx_ctx_default.strong_sync < SPML_UCX_STRONG_ORDERING_NONE) ||
392+
(mca_spml_ucx_ctx_default.strong_sync > SPML_UCX_STRONG_ORDERING_FLUSH)) {
393+
SPML_UCX_ERROR("incorrect value of strong_sync parameter: %d",
394+
mca_spml_ucx_ctx_default.strong_sync);
395+
}
396+
386397
SPML_UCX_VERBOSE(50, "*** ucx initialized ****");
398+
387399
return &mca_spml_ucx.super;
388400
}
389401

0 commit comments

Comments
 (0)