@@ -85,7 +85,8 @@ mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = {
85
85
.ucp_worker = NULL ,
86
86
.ucp_peers = NULL ,
87
87
.options = 0 ,
88
- .synchronized_quiet = false
88
+ .synchronized_quiet = false,
89
+ .strong_sync = SPML_UCX_STRONG_ORDERING_NONE
89
90
};
90
91
91
92
#if HAVE_DECL_UCP_ATOMIC_OP_NBX
@@ -404,7 +405,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
404
405
{
405
406
int res ;
406
407
407
- if (mca_spml_ucx . synchronized_quiet ) {
408
+ if (mca_spml_ucx_is_strong_ordering ( ctx ) ) {
408
409
ctx -> put_proc_indexes = malloc (nprocs * sizeof (* ctx -> put_proc_indexes ));
409
410
if (NULL == ctx -> put_proc_indexes ) {
410
411
return OSHMEM_ERR_OUT_OF_RESOURCE ;
@@ -426,7 +427,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
426
427
427
428
int mca_spml_ucx_clear_put_op_mask (mca_spml_ucx_ctx_t * ctx )
428
429
{
429
- if (mca_spml_ucx . synchronized_quiet && ctx -> put_proc_indexes ) {
430
+ if (mca_spml_ucx_is_strong_ordering ( ctx ) && ctx -> put_proc_indexes ) {
430
431
OBJ_DESTRUCT (& ctx -> put_op_bitmap );
431
432
free (ctx -> put_proc_indexes );
432
433
}
@@ -844,6 +845,7 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
844
845
ucx_ctx -> ucp_worker = calloc (1 , sizeof (ucp_worker_h ));
845
846
ucx_ctx -> ucp_workers = 1 ;
846
847
ucx_ctx -> synchronized_quiet = mca_spml_ucx_ctx_default .synchronized_quiet ;
848
+ ucx_ctx -> strong_sync = mca_spml_ucx_ctx_default .strong_sync ;
847
849
848
850
params .field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE ;
849
851
if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED ) {
@@ -1178,14 +1180,81 @@ int mca_spml_ucx_put_nb_wprogress(shmem_ctx_t ctx, void* dst_addr, size_t size,
1178
1180
return ucx_status_to_oshmem_nb (status );
1179
1181
}
1180
1182
1183
+ static int mca_spml_ucx_strong_sync (shmem_ctx_t ctx )
1184
+ {
1185
+ mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1186
+ ucs_status_ptr_t request ;
1187
+ static int flush_get_data ;
1188
+ unsigned i ;
1189
+ int ret ;
1190
+ int idx ;
1191
+ #if !(HAVE_DECL_UCP_EP_FLUSH_NBX || HAVE_DECL_UCP_EP_FLUSH_NB )
1192
+ ucs_status_t status ;
1193
+ #endif
1194
+
1195
+ for (i = 0 ; i < ucx_ctx -> put_proc_count ; i ++ ) {
1196
+ idx = ucx_ctx -> put_proc_indexes [i ];
1197
+
1198
+ switch (ucx_ctx -> strong_sync ) {
1199
+ case SPML_UCX_STRONG_ORDERING_NONE :
1200
+ case SPML_UCX_STRONG_ORDERING_GETNB :
1201
+ ret = mca_spml_ucx_get_nb (ctx ,
1202
+ ucx_ctx -> ucp_peers [idx ].mkeys [SPML_UCX_SERVICE_SEG ]-> super .super .va_base ,
1203
+ sizeof (flush_get_data ), & flush_get_data , idx , NULL );
1204
+ break ;
1205
+ case SPML_UCX_STRONG_ORDERING_GET :
1206
+ ret = mca_spml_ucx_get (ctx ,
1207
+ ucx_ctx -> ucp_peers [idx ].mkeys [SPML_UCX_SERVICE_SEG ]-> super .super .va_base ,
1208
+ sizeof (flush_get_data ), & flush_get_data , idx );
1209
+ break ;
1210
+ #if HAVE_DECL_UCP_EP_FLUSH_NBX
1211
+ case SPML_UCX_STRONG_ORDERING_FLUSH :
1212
+ request = ucp_ep_flush_nbx (ucx_ctx -> ucp_peers [idx ].ucp_conn ,
1213
+ & mca_spml_ucx_request_param_b );
1214
+ ret = opal_common_ucx_wait_request (request , ucx_ctx -> ucp_worker [0 ], "ucp_flush_nbx" );
1215
+ #elif HAVE_DECL_UCP_EP_FLUSH_NB
1216
+ request = ucp_ep_flush_nb (ucx_ctx -> ucp_peers [idx ].ucp_conn , 0 , opal_common_ucx_empty_complete_cb );
1217
+ ret = opal_common_ucx_wait_request (request , ucx_ctx -> ucp_worker [0 ], "ucp_flush_nb" );
1218
+ #else
1219
+ status = ucp_ep_flush (ucx_ctx -> ucp_peers [idx ].ucp_conn );
1220
+ ret = (status == UCS_OK ) ? OPAL_SUCCESS : OPAL_ERROR ;
1221
+ #endif
1222
+ break ;
1223
+ default :
1224
+ /* unknown mode */
1225
+ ret = OMPI_SUCCESS ;
1226
+ break ;
1227
+ }
1228
+
1229
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
1230
+ oshmem_shmem_abort (-1 );
1231
+ return ret ;
1232
+ }
1233
+
1234
+ opal_bitmap_clear_bit (& ucx_ctx -> put_op_bitmap , idx );
1235
+ }
1236
+
1237
+ ucx_ctx -> put_proc_count = 0 ;
1238
+ return OSHMEM_SUCCESS ;
1239
+ }
1240
+
1181
1241
int mca_spml_ucx_fence (shmem_ctx_t ctx )
1182
1242
{
1243
+ mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1183
1244
ucs_status_t err ;
1245
+ int ret ;
1184
1246
unsigned int i = 0 ;
1185
- mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1186
1247
1187
1248
opal_atomic_wmb ();
1188
1249
1250
+ if (ucx_ctx -> strong_sync != SPML_UCX_STRONG_ORDERING_NONE ) {
1251
+ ret = mca_spml_ucx_strong_sync (ctx );
1252
+ if (ret != OSHMEM_SUCCESS ) {
1253
+ oshmem_shmem_abort (-1 );
1254
+ return ret ;
1255
+ }
1256
+ }
1257
+
1189
1258
for (i = 0 ; i < ucx_ctx -> ucp_workers ; i ++ ) {
1190
1259
if (ucx_ctx -> ucp_worker [i ] != NULL ) {
1191
1260
err = ucp_worker_fence (ucx_ctx -> ucp_worker [i ]);
@@ -1201,26 +1270,16 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx)
1201
1270
1202
1271
int mca_spml_ucx_quiet (shmem_ctx_t ctx )
1203
1272
{
1204
- int flush_get_data ;
1205
1273
int ret ;
1206
1274
unsigned i ;
1207
- int idx ;
1208
1275
mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1209
1276
1210
- if (mca_spml_ucx .synchronized_quiet ) {
1211
- for (i = 0 ; i < ucx_ctx -> put_proc_count ; i ++ ) {
1212
- idx = ucx_ctx -> put_proc_indexes [i ];
1213
- ret = mca_spml_ucx_get_nb (ctx ,
1214
- ucx_ctx -> ucp_peers [idx ].mkeys [SPML_UCX_SERVICE_SEG ]-> super .super .va_base ,
1215
- sizeof (flush_get_data ), & flush_get_data , idx , NULL );
1216
- if (OMPI_SUCCESS != ret ) {
1217
- oshmem_shmem_abort (-1 );
1218
- return ret ;
1219
- }
1220
-
1221
- opal_bitmap_clear_bit (& ucx_ctx -> put_op_bitmap , idx );
1277
+ if (ucx_ctx -> synchronized_quiet ) {
1278
+ ret = mca_spml_ucx_strong_sync (ctx );
1279
+ if (ret != OSHMEM_SUCCESS ) {
1280
+ oshmem_shmem_abort (-1 );
1281
+ return ret ;
1222
1282
}
1223
- ucx_ctx -> put_proc_count = 0 ;
1224
1283
}
1225
1284
1226
1285
opal_atomic_wmb ();
0 commit comments