@@ -78,14 +78,15 @@ mca_spml_ucx_t mca_spml_ucx = {
78
78
.num_disconnect = 1 ,
79
79
.heap_reg_nb = 0 ,
80
80
.enabled = 0 ,
81
- .get_mkey_slow = NULL
81
+ .get_mkey_slow = NULL ,
82
+ .synchronized_quiet = false,
83
+ .strong_sync = SPML_UCX_STRONG_ORDERING_NONE
82
84
};
83
85
84
86
mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = {
85
87
.ucp_worker = NULL ,
86
88
.ucp_peers = NULL ,
87
- .options = 0 ,
88
- .synchronized_quiet = false
89
+ .options = 0
89
90
};
90
91
91
92
#if HAVE_DECL_UCP_ATOMIC_OP_NBX
@@ -404,7 +405,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
404
405
{
405
406
int res ;
406
407
407
- if (mca_spml_ucx . synchronized_quiet ) {
408
+ if (mca_spml_ucx_is_strong_ordering () ) {
408
409
ctx -> put_proc_indexes = malloc (nprocs * sizeof (* ctx -> put_proc_indexes ));
409
410
if (NULL == ctx -> put_proc_indexes ) {
410
411
return OSHMEM_ERR_OUT_OF_RESOURCE ;
@@ -426,7 +427,7 @@ int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs)
426
427
427
428
int mca_spml_ucx_clear_put_op_mask (mca_spml_ucx_ctx_t * ctx )
428
429
{
429
- if (mca_spml_ucx . synchronized_quiet && ctx -> put_proc_indexes ) {
430
+ if (mca_spml_ucx_is_strong_ordering () && ctx -> put_proc_indexes ) {
430
431
OBJ_DESTRUCT (& ctx -> put_op_bitmap );
431
432
free (ctx -> put_proc_indexes );
432
433
}
@@ -843,7 +844,6 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx
843
844
ucx_ctx -> options = options ;
844
845
ucx_ctx -> ucp_worker = calloc (1 , sizeof (ucp_worker_h ));
845
846
ucx_ctx -> ucp_workers = 1 ;
846
- ucx_ctx -> synchronized_quiet = mca_spml_ucx_ctx_default .synchronized_quiet ;
847
847
848
848
params .field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE ;
849
849
if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED ) {
@@ -1178,14 +1178,81 @@ int mca_spml_ucx_put_nb_wprogress(shmem_ctx_t ctx, void* dst_addr, size_t size,
1178
1178
return ucx_status_to_oshmem_nb (status );
1179
1179
}
1180
1180
1181
+ static int mca_spml_ucx_strong_sync (shmem_ctx_t ctx )
1182
+ {
1183
+ mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1184
+ ucs_status_ptr_t request ;
1185
+ static int flush_get_data ;
1186
+ unsigned i ;
1187
+ int ret ;
1188
+ int idx ;
1189
+ #if !(HAVE_DECL_UCP_EP_FLUSH_NBX || HAVE_DECL_UCP_EP_FLUSH_NB )
1190
+ ucs_status_t status ;
1191
+ #endif
1192
+
1193
+ for (i = 0 ; i < ucx_ctx -> put_proc_count ; i ++ ) {
1194
+ idx = ucx_ctx -> put_proc_indexes [i ];
1195
+
1196
+ switch (mca_spml_ucx .strong_sync ) {
1197
+ case SPML_UCX_STRONG_ORDERING_NONE :
1198
+ case SPML_UCX_STRONG_ORDERING_GETNB :
1199
+ ret = mca_spml_ucx_get_nb (ctx ,
1200
+ ucx_ctx -> ucp_peers [idx ].mkeys [SPML_UCX_SERVICE_SEG ]-> super .super .va_base ,
1201
+ sizeof (flush_get_data ), & flush_get_data , idx , NULL );
1202
+ break ;
1203
+ case SPML_UCX_STRONG_ORDERING_GET :
1204
+ ret = mca_spml_ucx_get (ctx ,
1205
+ ucx_ctx -> ucp_peers [idx ].mkeys [SPML_UCX_SERVICE_SEG ]-> super .super .va_base ,
1206
+ sizeof (flush_get_data ), & flush_get_data , idx );
1207
+ break ;
1208
+ #if HAVE_DECL_UCP_EP_FLUSH_NBX
1209
+ case SPML_UCX_STRONG_ORDERING_FLUSH :
1210
+ request = ucp_ep_flush_nbx (ucx_ctx -> ucp_peers [idx ].ucp_conn ,
1211
+ & mca_spml_ucx_request_param_b );
1212
+ ret = opal_common_ucx_wait_request (request , ucx_ctx -> ucp_worker [0 ], "ucp_flush_nbx" );
1213
+ #elif HAVE_DECL_UCP_EP_FLUSH_NB
1214
+ request = ucp_ep_flush_nb (ucx_ctx -> ucp_peers [idx ].ucp_conn , 0 , opal_common_ucx_empty_complete_cb );
1215
+ ret = opal_common_ucx_wait_request (request , ucx_ctx -> ucp_worker [0 ], "ucp_flush_nb" );
1216
+ #else
1217
+ status = ucp_ep_flush (ucx_ctx -> ucp_peers [idx ].ucp_conn );
1218
+ ret = (status == UCS_OK ) ? OPAL_SUCCESS : OPAL_ERROR ;
1219
+ #endif
1220
+ break ;
1221
+ default :
1222
+ /* unknown mode */
1223
+ ret = OMPI_SUCCESS ;
1224
+ break ;
1225
+ }
1226
+
1227
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
1228
+ oshmem_shmem_abort (-1 );
1229
+ return ret ;
1230
+ }
1231
+
1232
+ opal_bitmap_clear_bit (& ucx_ctx -> put_op_bitmap , idx );
1233
+ }
1234
+
1235
+ ucx_ctx -> put_proc_count = 0 ;
1236
+ return OSHMEM_SUCCESS ;
1237
+ }
1238
+
1181
1239
int mca_spml_ucx_fence (shmem_ctx_t ctx )
1182
1240
{
1241
+ mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1183
1242
ucs_status_t err ;
1243
+ int ret ;
1184
1244
unsigned int i = 0 ;
1185
- mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1186
1245
1187
1246
opal_atomic_wmb ();
1188
1247
1248
+ if (mca_spml_ucx .strong_sync != SPML_UCX_STRONG_ORDERING_NONE ) {
1249
+ ret = mca_spml_ucx_strong_sync (ctx );
1250
+ if (ret != OSHMEM_SUCCESS ) {
1251
+ oshmem_shmem_abort (-1 );
1252
+ return ret ;
1253
+ }
1254
+ }
1255
+
1189
1256
for (i = 0 ; i < ucx_ctx -> ucp_workers ; i ++ ) {
1190
1257
if (ucx_ctx -> ucp_worker [i ] != NULL ) {
1191
1258
err = ucp_worker_fence (ucx_ctx -> ucp_worker [i ]);
@@ -1201,26 +1268,16 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx)
1201
1268
1202
1269
int mca_spml_ucx_quiet (shmem_ctx_t ctx )
1203
1270
{
1204
- int flush_get_data ;
1205
1271
int ret ;
1206
1272
unsigned i ;
1207
- int idx ;
1208
1273
mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
1209
1274
1210
1275
if (mca_spml_ucx .synchronized_quiet ) {
1211
- for (i = 0 ; i < ucx_ctx -> put_proc_count ; i ++ ) {
1212
- idx = ucx_ctx -> put_proc_indexes [i ];
1213
- ret = mca_spml_ucx_get_nb (ctx ,
1214
- ucx_ctx -> ucp_peers [idx ].mkeys [SPML_UCX_SERVICE_SEG ]-> super .super .va_base ,
1215
- sizeof (flush_get_data ), & flush_get_data , idx , NULL );
1216
- if (OMPI_SUCCESS != ret ) {
1217
- oshmem_shmem_abort (-1 );
1218
- return ret ;
1219
- }
1220
-
1221
- opal_bitmap_clear_bit (& ucx_ctx -> put_op_bitmap , idx );
1276
+ ret = mca_spml_ucx_strong_sync (ctx );
1277
+ if (ret != OSHMEM_SUCCESS ) {
1278
+ oshmem_shmem_abort (-1 );
1279
+ return ret ;
1222
1280
}
1223
- ucx_ctx -> put_proc_count = 0 ;
1224
1281
}
1225
1282
1226
1283
opal_atomic_wmb ();
0 commit comments