Skip to content

Commit 5f37fe6

Browse files
authored
Merge pull request #6694 from hoopoepg/topic/pci-flush-on-quiet
SPML/UCX: added synchronized flush on quiet
2 parents 2469f6c + 0b10841 commit 5f37fe6

File tree

5 files changed

+115
-4
lines changed

5 files changed

+115
-4
lines changed

oshmem/mca/atomic/ucx/atomic_ucx_cswap.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ int mca_atomic_ucx_cswap(shmem_ctx_t ctx,
4545
UCP_ATOMIC_FETCH_OP_CSWAP, cond, prev, size,
4646
rva, ucx_mkey->rkey,
4747
opal_common_ucx_empty_complete_cb);
48+
49+
if (OPAL_LIKELY(!UCS_PTR_IS_ERR(status_ptr))) {
50+
mca_spml_ucx_remote_op_posted(ucx_ctx, pe);
51+
}
52+
4853
return opal_common_ucx_wait_request(status_ptr, ucx_ctx->ucp_worker,
4954
"ucp_atomic_fetch_nb");
5055
}

oshmem/mca/atomic/ucx/atomic_ucx_module.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ int mca_atomic_ucx_op(shmem_ctx_t ctx,
5151
status = ucp_atomic_post(ucx_ctx->ucp_peers[pe].ucp_conn,
5252
op, value, size, rva,
5353
ucx_mkey->rkey);
54+
55+
if (OPAL_LIKELY(UCS_OK == status)) {
56+
mca_spml_ucx_remote_op_posted(ucx_ctx, pe);
57+
}
58+
5459
return ucx_status_to_oshmem(status);
5560
}
5661

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ mca_spml_ucx_t mca_spml_ucx = {
7777
.num_disconnect = 1,
7878
.heap_reg_nb = 0,
7979
.enabled = 0,
80-
.get_mkey_slow = NULL
80+
.get_mkey_slow = NULL,
81+
.synchronized_quiet = false
8182
};
8283

8384
mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = {
@@ -213,6 +214,40 @@ static void dump_address(int pe, char *addr, size_t len)
213214

214215
static char spml_ucx_transport_ids[1] = { 0 };
215216

217+
int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
218+
{
219+
int res;
220+
221+
if (mca_spml_ucx.synchronized_quiet) {
222+
ctx->put_proc_indexes = malloc(nprocs * sizeof(*ctx->put_proc_indexes));
223+
if (NULL == ctx->put_proc_indexes) {
224+
return OSHMEM_ERR_OUT_OF_RESOURCE;
225+
}
226+
227+
OBJ_CONSTRUCT(&ctx->put_op_bitmap, opal_bitmap_t);
228+
res = opal_bitmap_init(&ctx->put_op_bitmap, nprocs);
229+
if (OPAL_SUCCESS != res) {
230+
free(ctx->put_proc_indexes);
231+
ctx->put_proc_indexes = NULL;
232+
return res;
233+
}
234+
235+
ctx->put_proc_count = 0;
236+
}
237+
238+
return OSHMEM_SUCCESS;
239+
}
240+
241+
int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx)
242+
{
243+
if (mca_spml_ucx.synchronized_quiet && ctx->put_proc_indexes) {
244+
OBJ_DESTRUCT(&ctx->put_op_bitmap);
245+
free(ctx->put_proc_indexes);
246+
}
247+
248+
return OSHMEM_SUCCESS;
249+
}
250+
216251
int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
217252
{
218253
size_t i, j, n;
@@ -232,6 +267,11 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
232267
goto error;
233268
}
234269

270+
rc = mca_spml_ucx_init_put_op_mask(&mca_spml_ucx_ctx_default, nprocs);
271+
if (OSHMEM_SUCCESS != rc) {
272+
goto error;
273+
}
274+
235275
err = ucp_worker_get_address(mca_spml_ucx_ctx_default.ucp_worker, &wk_local_addr, &wk_addr_len);
236276
if (err != UCS_OK) {
237277
goto error;
@@ -294,6 +334,8 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
294334
free(mca_spml_ucx.remote_addrs_tbl[i]);
295335
}
296336
}
337+
338+
mca_spml_ucx_clear_put_op_mask(&mca_spml_ucx_ctx_default);
297339
if (mca_spml_ucx_ctx_default.ucp_peers)
298340
free(mca_spml_ucx_ctx_default.ucp_peers);
299341
if (mca_spml_ucx.remote_addrs_tbl)
@@ -581,6 +623,11 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
581623
goto error;
582624
}
583625

626+
rc = mca_spml_ucx_init_put_op_mask(ucx_ctx, nprocs);
627+
if (OSHMEM_SUCCESS != rc) {
628+
goto error2;
629+
}
630+
584631
for (i = 0; i < nprocs; i++) {
585632
ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
586633
ep_params.address = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]);
@@ -619,6 +666,8 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
619666
}
620667
}
621668

669+
mca_spml_ucx_clear_put_op_mask(ucx_ctx);
670+
622671
if (ucx_ctx->ucp_peers)
623672
free(ucx_ctx->ucp_peers);
624673

@@ -713,6 +762,7 @@ int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_add
713762
void *rva;
714763
spml_ucx_mkey_t *ucx_mkey;
715764
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
765+
int res;
716766
#if HAVE_DECL_UCP_PUT_NB
717767
ucs_status_ptr_t request;
718768
#else
@@ -723,12 +773,18 @@ int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_add
723773
#if HAVE_DECL_UCP_PUT_NB
724774
request = ucp_put_nb(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size,
725775
(uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb);
726-
return opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker, "ucp_put_nb");
776+
res = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker, "ucp_put_nb");
727777
#else
728778
status = ucp_put(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size,
729779
(uint64_t)rva, ucx_mkey->rkey);
730-
return ucx_status_to_oshmem(status);
780+
res = ucx_status_to_oshmem(status);
731781
#endif
782+
783+
if (OPAL_LIKELY(OSHMEM_SUCCESS == res)) {
784+
mca_spml_ucx_remote_op_posted(ucx_ctx, dst);
785+
}
786+
787+
return res;
732788
}
733789

734790
int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_addr, int dst, void **handle)
@@ -742,6 +798,10 @@ int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_
742798
status = ucp_put_nbi(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size,
743799
(uint64_t)rva, ucx_mkey->rkey);
744800

801+
if (OPAL_LIKELY(status >= 0)) {
802+
mca_spml_ucx_remote_op_posted(ucx_ctx, dst);
803+
}
804+
745805
return ucx_status_to_oshmem_nb(status);
746806
}
747807

@@ -765,9 +825,28 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx)
765825

766826
int mca_spml_ucx_quiet(shmem_ctx_t ctx)
767827
{
828+
int flush_get_data;
768829
int ret;
830+
unsigned i;
831+
int idx;
769832
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
770833

834+
if (mca_spml_ucx.synchronized_quiet) {
835+
for (i = 0; i < ucx_ctx->put_proc_count; i++) {
836+
idx = ucx_ctx->put_proc_indexes[i];
837+
ret = mca_spml_ucx_get_nb(ctx,
838+
ucx_ctx->ucp_peers[idx].mkeys->super.super.va_base,
839+
sizeof(flush_get_data), &flush_get_data, idx, NULL);
840+
if (OMPI_SUCCESS != ret) {
841+
oshmem_shmem_abort(-1);
842+
return ret;
843+
}
844+
845+
opal_bitmap_clear_bit(&ucx_ctx->put_op_bitmap, idx);
846+
}
847+
ucx_ctx->put_proc_count = 0;
848+
}
849+
771850
opal_atomic_wmb();
772851

773852
ret = opal_common_ucx_worker_flush(ucx_ctx->ucp_worker);

oshmem/mca/spml/ucx/spml_ucx.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
#include "opal/class/opal_free_list.h"
3535
#include "opal/class/opal_list.h"
36+
#include "opal/class/opal_bitmap.h"
3637

3738
#include "opal/mca/common/ucx/common_ucx.h"
3839

@@ -69,6 +70,9 @@ struct mca_spml_ucx_ctx {
6970
ucp_worker_h ucp_worker;
7071
ucp_peer_t *ucp_peers;
7172
long options;
73+
opal_bitmap_t put_op_bitmap;
74+
int *put_proc_indexes;
75+
unsigned put_proc_count;
7276
};
7377
typedef struct mca_spml_ucx_ctx mca_spml_ucx_ctx_t;
7478

@@ -103,7 +107,7 @@ struct mca_spml_ucx {
103107
mca_spml_ucx_ctx_t *aux_ctx;
104108
pthread_spinlock_t async_lock;
105109
int aux_refcnt;
106-
110+
bool synchronized_quiet;
107111
};
108112
typedef struct mca_spml_ucx mca_spml_ucx_t;
109113

@@ -170,6 +174,9 @@ extern int spml_ucx_ctx_progress(void);
170174
extern int spml_ucx_progress_aux_ctx(void);
171175
void mca_spml_ucx_async_cb(int fd, short event, void *cbdata);
172176

177+
int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs);
178+
int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx);
179+
173180
static inline void mca_spml_ucx_aux_lock(void)
174181
{
175182
if (mca_spml_ucx.async_progress) {
@@ -224,6 +231,16 @@ static inline int ucx_status_to_oshmem_nb(ucs_status_t status)
224231
#endif
225232
}
226233

234+
static inline void mca_spml_ucx_remote_op_posted(mca_spml_ucx_ctx_t *ctx, int dst)
235+
{
236+
if (OPAL_UNLIKELY(mca_spml_ucx.synchronized_quiet)) {
237+
if (!opal_bitmap_is_set_bit(&ctx->put_op_bitmap, dst)) {
238+
ctx->put_proc_indexes[ctx->put_proc_count++] = dst;
239+
opal_bitmap_set_bit(&ctx->put_op_bitmap, dst);
240+
}
241+
}
242+
}
243+
227244
#define MCA_SPML_UCX_CTXS_ARRAY_SIZE 64
228245
#define MCA_SPML_UCX_CTXS_ARRAY_INC 64
229246

oshmem/mca/spml/ucx/spml_ucx_component.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ static int mca_spml_ucx_component_register(void)
128128
"Asynchronous progress tick granularity (in usec)",
129129
&mca_spml_ucx.async_tick);
130130

131+
mca_spml_ucx_param_register_bool("synchronized_quiet", 0,
132+
"Use synchronized quiet on shmem_quiet or shmem_barrier_all operations",
133+
&mca_spml_ucx.synchronized_quiet);
134+
131135
opal_common_ucx_mca_var_register(&mca_spml_ucx_component.spmlm_version);
132136

133137
return OSHMEM_SUCCESS;
@@ -329,6 +333,7 @@ static void _ctx_cleanup(mca_spml_ucx_ctx_t *ctx)
329333
mca_spml_ucx.num_disconnect,
330334
ctx->ucp_worker);
331335
free(del_procs);
336+
mca_spml_ucx_clear_put_op_mask(ctx);
332337
free(ctx->ucp_peers);
333338
}
334339

0 commit comments

Comments
 (0)