Skip to content

Commit 4db61cd

Browse files
authored
Merge pull request #9437 from hoopoepg/topic/added-strong-sync-infra
SPML/UCX: added strong sync for fence
2 parents 0a8942b + 78185f9 commit 4db61cd

File tree

4 files changed

+108
-25
lines changed

4 files changed

+108
-25
lines changed

config/ompi_check_ucx.m4

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[
123123
[#include <ucp/api/ucp.h>])
124124
AC_CHECK_DECLS([ucp_ep_flush_nb, ucp_worker_flush_nb,
125125
ucp_request_check_status, ucp_put_nb, ucp_get_nb,
126-
ucp_put_nbx, ucp_get_nbx, ucp_atomic_op_nbx],
126+
ucp_put_nbx, ucp_get_nbx, ucp_atomic_op_nbx,
127+
ucp_ep_flush_nbx],
127128
[], [],
128129
[#include <ucp/api/ucp.h>])
129130
AC_CHECK_DECLS([ucm_test_events,

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 78 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,15 @@ mca_spml_ucx_t mca_spml_ucx = {
7878
.num_disconnect = 1,
7979
.heap_reg_nb = 0,
8080
.enabled = 0,
81-
.get_mkey_slow = NULL
81+
.get_mkey_slow = NULL,
82+
.synchronized_quiet = false,
83+
.strong_sync = SPML_UCX_STRONG_ORDERING_NONE
8284
};
8385

8486
mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = {
8587
.ucp_worker = NULL,
8688
.ucp_peers = NULL,
87-
.options = 0,
88-
.synchronized_quiet = false
89+
.options = 0
8990
};
9091

9192
#ifdef HAVE_UCP_REQUEST_PARAM_T
@@ -401,7 +402,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
401402
{
402403
int res;
403404

404-
if (mca_spml_ucx.synchronized_quiet) {
405+
if (mca_spml_ucx_is_strong_ordering()) {
405406
ctx->put_proc_indexes = malloc(nprocs * sizeof(*ctx->put_proc_indexes));
406407
if (NULL == ctx->put_proc_indexes) {
407408
return OSHMEM_ERR_OUT_OF_RESOURCE;
@@ -423,7 +424,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
423424

424425
int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx)
425426
{
426-
if (mca_spml_ucx.synchronized_quiet && ctx->put_proc_indexes) {
427+
if (mca_spml_ucx_is_strong_ordering() && ctx->put_proc_indexes) {
427428
OBJ_DESTRUCT(&ctx->put_op_bitmap);
428429
free(ctx->put_proc_indexes);
429430
}
@@ -840,7 +841,6 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
840841
ucx_ctx->options = options;
841842
ucx_ctx->ucp_worker = calloc(1, sizeof(ucp_worker_h));
842843
ucx_ctx->ucp_workers = 1;
843-
ucx_ctx->synchronized_quiet = mca_spml_ucx_ctx_default.synchronized_quiet;
844844

845845
params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
846846
if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED) {
@@ -1175,14 +1175,81 @@ int mca_spml_ucx_put_nb_wprogress(shmem_ctx_t ctx, void* dst_addr, size_t size,
11751175
return ucx_status_to_oshmem_nb(status);
11761176
}
11771177

1178+
static int mca_spml_ucx_strong_sync(shmem_ctx_t ctx)
1179+
{
1180+
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
1181+
ucs_status_ptr_t request;
1182+
static int flush_get_data;
1183+
unsigned i;
1184+
int ret;
1185+
int idx;
1186+
#if !(HAVE_DECL_UCP_EP_FLUSH_NBX || HAVE_DECL_UCP_EP_FLUSH_NB)
1187+
ucs_status_t status;
1188+
#endif
1189+
1190+
for (i = 0; i < ucx_ctx->put_proc_count; i++) {
1191+
idx = ucx_ctx->put_proc_indexes[i];
1192+
1193+
switch (mca_spml_ucx.strong_sync) {
1194+
case SPML_UCX_STRONG_ORDERING_NONE:
1195+
case SPML_UCX_STRONG_ORDERING_GETNB:
1196+
ret = mca_spml_ucx_get_nb(ctx,
1197+
ucx_ctx->ucp_peers[idx].mkeys[SPML_UCX_SERVICE_SEG]->super.super.va_base,
1198+
sizeof(flush_get_data), &flush_get_data, idx, NULL);
1199+
break;
1200+
case SPML_UCX_STRONG_ORDERING_GET:
1201+
ret = mca_spml_ucx_get(ctx,
1202+
ucx_ctx->ucp_peers[idx].mkeys[SPML_UCX_SERVICE_SEG]->super.super.va_base,
1203+
sizeof(flush_get_data), &flush_get_data, idx);
1204+
break;
1205+
#if HAVE_DECL_UCP_EP_FLUSH_NBX
1206+
case SPML_UCX_STRONG_ORDERING_FLUSH:
1207+
request = ucp_ep_flush_nbx(ucx_ctx->ucp_peers[idx].ucp_conn,
1208+
&mca_spml_ucx_request_param_b);
1209+
ret = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker[0], "ucp_flush_nbx");
1210+
#elif HAVE_DECL_UCP_EP_FLUSH_NB
1211+
request = ucp_ep_flush_nb(ucx_ctx->ucp_peers[idx].ucp_conn, 0, opal_common_ucx_empty_complete_cb);
1212+
ret = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker[0], "ucp_flush_nb");
1213+
#else
1214+
status = ucp_ep_flush(ucx_ctx->ucp_peers[idx].ucp_conn);
1215+
ret = (status == UCS_OK) ? OPAL_SUCCESS : OPAL_ERROR;
1216+
#endif
1217+
break;
1218+
default:
1219+
/* unknown mode */
1220+
ret = OMPI_SUCCESS;
1221+
break;
1222+
}
1223+
1224+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1225+
oshmem_shmem_abort(-1);
1226+
return ret;
1227+
}
1228+
1229+
opal_bitmap_clear_bit(&ucx_ctx->put_op_bitmap, idx);
1230+
}
1231+
1232+
ucx_ctx->put_proc_count = 0;
1233+
return OSHMEM_SUCCESS;
1234+
}
1235+
11781236
int mca_spml_ucx_fence(shmem_ctx_t ctx)
11791237
{
1238+
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
11801239
ucs_status_t err;
1240+
int ret;
11811241
unsigned int i = 0;
1182-
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
11831242

11841243
opal_atomic_wmb();
11851244

1245+
if (mca_spml_ucx.strong_sync != SPML_UCX_STRONG_ORDERING_NONE) {
1246+
ret = mca_spml_ucx_strong_sync(ctx);
1247+
if (ret != OSHMEM_SUCCESS) {
1248+
oshmem_shmem_abort(-1);
1249+
return ret;
1250+
}
1251+
}
1252+
11861253
for (i=0; i < ucx_ctx->ucp_workers; i++) {
11871254
if (ucx_ctx->ucp_worker[i] != NULL) {
11881255
err = ucp_worker_fence(ucx_ctx->ucp_worker[i]);
@@ -1198,26 +1265,16 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx)
11981265

11991266
int mca_spml_ucx_quiet(shmem_ctx_t ctx)
12001267
{
1201-
int flush_get_data;
12021268
int ret;
12031269
unsigned i;
1204-
int idx;
12051270
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
12061271

12071272
if (mca_spml_ucx.synchronized_quiet) {
1208-
for (i = 0; i < ucx_ctx->put_proc_count; i++) {
1209-
idx = ucx_ctx->put_proc_indexes[i];
1210-
ret = mca_spml_ucx_get_nb(ctx,
1211-
ucx_ctx->ucp_peers[idx].mkeys[SPML_UCX_SERVICE_SEG]->super.super.va_base,
1212-
sizeof(flush_get_data), &flush_get_data, idx, NULL);
1213-
if (OMPI_SUCCESS != ret) {
1214-
oshmem_shmem_abort(-1);
1215-
return ret;
1216-
}
1217-
1218-
opal_bitmap_clear_bit(&ucx_ctx->put_op_bitmap, idx);
1273+
ret = mca_spml_ucx_strong_sync(ctx);
1274+
if (ret != OSHMEM_SUCCESS) {
1275+
oshmem_shmem_abort(-1);
1276+
return ret;
12191277
}
1220-
ucx_ctx->put_proc_count = 0;
12211278
}
12221279

12231280
opal_atomic_wmb();

oshmem/mca/spml/ucx/spml_ucx.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ BEGIN_C_DECLS
4848
#define SPML_UCX_TRANSP_CNT 1
4949
#define SPML_UCX_SERVICE_SEG 0
5050

51+
enum {
52+
SPML_UCX_STRONG_ORDERING_NONE = 0, /* don't use strong ordering */
53+
SPML_UCX_STRONG_ORDERING_GETNB = 1, /* use non-blocking read to provide ordering */
54+
SPML_UCX_STRONG_ORDERING_GET = 2, /* use blocking read to provide ordering*/
55+
SPML_UCX_STRONG_ORDERING_FLUSH = 3 /* flush EP to provide ordering */
56+
};
57+
5158
/**
5259
* UCX SPML module
5360
*/
@@ -79,7 +86,6 @@ struct mca_spml_ucx_ctx {
7986
unsigned int ucp_workers;
8087
int *put_proc_indexes;
8188
unsigned put_proc_count;
82-
bool synchronized_quiet;
8389
};
8490
typedef struct mca_spml_ucx_ctx mca_spml_ucx_ctx_t;
8591

@@ -115,6 +121,7 @@ struct mca_spml_ucx {
115121
pthread_spinlock_t async_lock;
116122
int aux_refcnt;
117123
bool synchronized_quiet;
124+
int strong_sync;
118125
unsigned long nb_progress_thresh_global;
119126
unsigned long nb_put_progress_thresh;
120127
unsigned long nb_get_progress_thresh;
@@ -295,9 +302,15 @@ static inline int ucx_status_to_oshmem_nb(ucs_status_t status)
295302
#endif
296303
}
297304

305+
static inline int mca_spml_ucx_is_strong_ordering(void)
306+
{
307+
return (mca_spml_ucx.strong_sync != SPML_UCX_STRONG_ORDERING_NONE) ||
308+
mca_spml_ucx.synchronized_quiet;
309+
}
310+
298311
static inline void mca_spml_ucx_remote_op_posted(mca_spml_ucx_ctx_t *ctx, int dst)
299312
{
300-
if (OPAL_UNLIKELY(ctx->synchronized_quiet)) {
313+
if (OPAL_UNLIKELY(mca_spml_ucx_is_strong_ordering())) {
301314
if (!opal_bitmap_is_set_bit(&ctx->put_op_bitmap, dst)) {
302315
ctx->put_proc_indexes[ctx->put_proc_count++] = dst;
303316
opal_bitmap_set_bit(&ctx->put_op_bitmap, dst);

oshmem/mca/spml/ucx/spml_ucx_component.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,12 @@ static int mca_spml_ucx_component_register(void)
159159

160160
mca_spml_ucx_param_register_bool("synchronized_quiet", 0,
161161
"Use synchronized quiet on shmem_quiet or shmem_barrier_all operations",
162-
&mca_spml_ucx_ctx_default.synchronized_quiet);
162+
&mca_spml_ucx.synchronized_quiet);
163+
164+
mca_spml_ucx_param_register_int("strong_sync", 0,
165+
"Use strong synchronization on shmem_quiet, shmem_fence or shmem_barrier_all operations: "
166+
"0 - don't do strong synchronization, 1 - use non blocking get, 2 - use blocking get, 3 - use flush operation",
167+
&mca_spml_ucx.strong_sync);
163168

164169
mca_spml_ucx_param_register_ulong("nb_progress_thresh_global", 0,
165170
"Number of nb_put or nb_get operations before ucx progress is triggered. Disabled by default (0). Setting this value will override nb_put/get_progress_thresh.",
@@ -384,6 +389,13 @@ mca_spml_ucx_component_init(int* priority,
384389
return NULL ;
385390

386391
SPML_UCX_VERBOSE(50, "*** ucx initialized ****");
392+
393+
if ((mca_spml_ucx.strong_sync < SPML_UCX_STRONG_ORDERING_NONE) ||
394+
(mca_spml_ucx.strong_sync > SPML_UCX_STRONG_ORDERING_FLUSH)) {
395+
SPML_UCX_ERROR("incorrect value of strong_sync parameter: %d",
396+
mca_spml_ucx.strong_sync);
397+
}
398+
387399
return &mca_spml_ucx.super;
388400
}
389401

0 commit comments

Comments
 (0)