@@ -78,14 +78,15 @@ mca_spml_ucx_t mca_spml_ucx = {
78
78
.num_disconnect = 1 ,
79
79
.heap_reg_nb = 0 ,
80
80
.enabled = 0 ,
81
- .get_mkey_slow = NULL
81
+ .get_mkey_slow = NULL ,
82
+ .synchronized_quiet = false,
83
+ .strong_sync = SPML_UCX_STRONG_ORDERING_NONE
82
84
};
83
85
84
86
mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = {
85
87
.ucp_worker = NULL ,
86
88
.ucp_peers = NULL ,
87
- .options = 0 ,
88
- .synchronized_quiet = false
89
+ .options = 0
89
90
};
90
91
91
92
#ifdef HAVE_UCP_REQUEST_PARAM_T
@@ -401,7 +402,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
401
402
{
402
403
int res ;
403
404
404
- if (mca_spml_ucx . synchronized_quiet ) {
405
+ if (mca_spml_ucx_is_strong_ordering () ) {
405
406
ctx -> put_proc_indexes = malloc (nprocs * sizeof (* ctx -> put_proc_indexes ));
406
407
if (NULL == ctx -> put_proc_indexes ) {
407
408
return OSHMEM_ERR_OUT_OF_RESOURCE ;
@@ -423,7 +424,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
423
424
424
425
int mca_spml_ucx_clear_put_op_mask (mca_spml_ucx_ctx_t * ctx )
425
426
{
426
- if (mca_spml_ucx . synchronized_quiet && ctx -> put_proc_indexes ) {
427
+ if (mca_spml_ucx_is_strong_ordering () && ctx -> put_proc_indexes ) {
427
428
OBJ_DESTRUCT (& ctx -> put_op_bitmap );
428
429
free (ctx -> put_proc_indexes );
429
430
}
@@ -840,7 +841,6 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
840
841
ucx_ctx -> options = options ;
841
842
ucx_ctx -> ucp_worker = calloc (1 , sizeof (ucp_worker_h ));
842
843
ucx_ctx -> ucp_workers = 1 ;
843
- ucx_ctx -> synchronized_quiet = mca_spml_ucx_ctx_default .synchronized_quiet ;
844
844
845
845
params .field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE ;
846
846
if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED ) {
@@ -1175,14 +1175,81 @@ int mca_spml_ucx_put_nb_wprogress(shmem_ctx_t ctx, void* dst_addr, size_t size,
1175
1175
return ucx_status_to_oshmem_nb (status );
1176
1176
}
1177
1177
1178
+ static int mca_spml_ucx_strong_sync (shmem_ctx_t ctx )
1179
+ {
1180
+ mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1181
+ ucs_status_ptr_t request ;
1182
+ static int flush_get_data ;
1183
+ unsigned i ;
1184
+ int ret ;
1185
+ int idx ;
1186
+ #if !(HAVE_DECL_UCP_EP_FLUSH_NBX || HAVE_DECL_UCP_EP_FLUSH_NB )
1187
+ ucs_status_t status ;
1188
+ #endif
1189
+
1190
+ for (i = 0 ; i < ucx_ctx -> put_proc_count ; i ++ ) {
1191
+ idx = ucx_ctx -> put_proc_indexes [i ];
1192
+
1193
+ switch (mca_spml_ucx .strong_sync ) {
1194
+ case SPML_UCX_STRONG_ORDERING_NONE :
1195
+ case SPML_UCX_STRONG_ORDERING_GETNB :
1196
+ ret = mca_spml_ucx_get_nb (ctx ,
1197
+ ucx_ctx -> ucp_peers [idx ].mkeys [SPML_UCX_SERVICE_SEG ]-> super .super .va_base ,
1198
+ sizeof (flush_get_data ), & flush_get_data , idx , NULL );
1199
+ break ;
1200
+ case SPML_UCX_STRONG_ORDERING_GET :
1201
+ ret = mca_spml_ucx_get (ctx ,
1202
+ ucx_ctx -> ucp_peers [idx ].mkeys [SPML_UCX_SERVICE_SEG ]-> super .super .va_base ,
1203
+ sizeof (flush_get_data ), & flush_get_data , idx );
1204
+ break ;
1205
+ #if HAVE_DECL_UCP_EP_FLUSH_NBX
1206
+ case SPML_UCX_STRONG_ORDERING_FLUSH :
1207
+ request = ucp_ep_flush_nbx (ucx_ctx -> ucp_peers [idx ].ucp_conn ,
1208
+ & mca_spml_ucx_request_param_b );
1209
+ ret = opal_common_ucx_wait_request (request , ucx_ctx -> ucp_worker [0 ], "ucp_flush_nbx" );
1210
+ #elif HAVE_DECL_UCP_EP_FLUSH_NB
1211
+ request = ucp_ep_flush_nb (ucx_ctx -> ucp_peers [idx ].ucp_conn , 0 , opal_common_ucx_empty_complete_cb );
1212
+ ret = opal_common_ucx_wait_request (request , ucx_ctx -> ucp_worker [0 ], "ucp_flush_nb" );
1213
+ #else
1214
+ status = ucp_ep_flush (ucx_ctx -> ucp_peers [idx ].ucp_conn );
1215
+ ret = (status == UCS_OK ) ? OPAL_SUCCESS : OPAL_ERROR ;
1216
+ #endif
1217
+ break ;
1218
+ default :
1219
+ /* unknown mode */
1220
+ ret = OMPI_SUCCESS ;
1221
+ break ;
1222
+ }
1223
+
1224
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
1225
+ oshmem_shmem_abort (-1 );
1226
+ return ret ;
1227
+ }
1228
+
1229
+ opal_bitmap_clear_bit (& ucx_ctx -> put_op_bitmap , idx );
1230
+ }
1231
+
1232
+ ucx_ctx -> put_proc_count = 0 ;
1233
+ return OSHMEM_SUCCESS ;
1234
+ }
1235
+
1178
1236
int mca_spml_ucx_fence (shmem_ctx_t ctx )
1179
1237
{
1238
+ mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1180
1239
ucs_status_t err ;
1240
+ int ret ;
1181
1241
unsigned int i = 0 ;
1182
- mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1183
1242
1184
1243
opal_atomic_wmb ();
1185
1244
1245
+ if (mca_spml_ucx .strong_sync != SPML_UCX_STRONG_ORDERING_NONE ) {
1246
+ ret = mca_spml_ucx_strong_sync (ctx );
1247
+ if (ret != OSHMEM_SUCCESS ) {
1248
+ oshmem_shmem_abort (-1 );
1249
+ return ret ;
1250
+ }
1251
+ }
1252
+
1186
1253
for (i = 0 ; i < ucx_ctx -> ucp_workers ; i ++ ) {
1187
1254
if (ucx_ctx -> ucp_worker [i ] != NULL ) {
1188
1255
err = ucp_worker_fence (ucx_ctx -> ucp_worker [i ]);
@@ -1198,26 +1265,16 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx)
1198
1265
1199
1266
int mca_spml_ucx_quiet (shmem_ctx_t ctx )
1200
1267
{
1201
- int flush_get_data ;
1202
1268
int ret ;
1203
1269
unsigned i ;
1204
- int idx ;
1205
1270
mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1206
1271
1207
1272
if (mca_spml_ucx .synchronized_quiet ) {
1208
- for (i = 0 ; i < ucx_ctx -> put_proc_count ; i ++ ) {
1209
- idx = ucx_ctx -> put_proc_indexes [i ];
1210
- ret = mca_spml_ucx_get_nb (ctx ,
1211
- ucx_ctx -> ucp_peers [idx ].mkeys [SPML_UCX_SERVICE_SEG ]-> super .super .va_base ,
1212
- sizeof (flush_get_data ), & flush_get_data , idx , NULL );
1213
- if (OMPI_SUCCESS != ret ) {
1214
- oshmem_shmem_abort (-1 );
1215
- return ret ;
1216
- }
1217
-
1218
- opal_bitmap_clear_bit (& ucx_ctx -> put_op_bitmap , idx );
1273
+ ret = mca_spml_ucx_strong_sync (ctx );
1274
+ if (ret != OSHMEM_SUCCESS ) {
1275
+ oshmem_shmem_abort (-1 );
1276
+ return ret ;
1219
1277
}
1220
- ucx_ctx -> put_proc_count = 0 ;
1221
1278
}
1222
1279
1223
1280
opal_atomic_wmb ();
0 commit comments