Skip to content

Commit c4c9945

Browse files
committed
SPML/UCX: CR comments p1
Signed-off-by: Mikhail Brinskii <mikhailb@mellanox.com>
1 parent 2ef5bd8 commit c4c9945

File tree

4 files changed

+41
-21
lines changed

4 files changed

+41
-21
lines changed

oshmem/include/shmemx.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,20 @@ OSHMEM_DECLSPEC void shmemx_int16_prod_to_all(int16_t *target, const int16_t *so
168168
OSHMEM_DECLSPEC void shmemx_int32_prod_to_all(int32_t *target, const int32_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int32_t *pWrk, long *pSync);
169169
OSHMEM_DECLSPEC void shmemx_int64_prod_to_all(int64_t *target, const int64_t *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int64_t *pWrk, long *pSync);
170170

171-
/* Alltoall put with atomic counter increase */
172-
OSHMEM_DECLSPEC void shmemx_put_with_long_inc_all(void *target, const void *source, size_t size, long *counter);
171+
/* shmemx_alltoall_global_nb is a nonblocking collective routine, where each PE
172+
* exchanges “size” bytes of data with all other PEs in the OpenSHMEM job.
173+
174+
* @param dest A symmetric data object that is large enough to receive
175+
* “size” bytes of data.
176+
* @param source A symmetric data object that contains “size” bytes of data
177+
* for each PE in the OpenSHMEM job.
178+
* @param size The number of bytes to be sent to each PE in the job.
179+
* @param counter A symmetric data object to be atomically incremented after
180+
* the target buffer is updated.
181+
*
182+
* @return OSHMEM_SUCCESS or failure status.
183+
*/
184+
OSHMEM_DECLSPEC void shmemx_alltoall_global_nb(void *dest, const void *source, size_t size, long *counter);
173185

174186
/*
175187
* Backward compatibility section

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -630,12 +630,12 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx)
630630
mca_spml_ucx_ctx_t *ucx_ctx;
631631
int rc;
632632

633-
/* Take a lock controlling aux context. AUX context may set specific
633+
/* Take a lock controlling context creation. AUX context may set specific
634634
* UCX parameters affecting worker creation, which are not needed for
635635
* regular contexts. */
636-
mca_spml_ucx_aux_lock();
636+
pthread_mutex_lock(&mca_spml_ucx.ctx_create_mutex);
637637
rc = mca_spml_ucx_ctx_create_common(options, &ucx_ctx);
638-
mca_spml_ucx_aux_unlock();
638+
pthread_mutex_unlock(&mca_spml_ucx.ctx_create_mutex);
639639
if (rc != OSHMEM_SUCCESS) {
640640
return rc;
641641
}
@@ -813,6 +813,7 @@ int mca_spml_ucx_send(void* buf,
813813
return rc;
814814
}
815815

816+
/* this can be called with request==NULL in case of immediate completion */
816817
static void mca_spml_ucx_put_all_complete_cb(void *request, ucs_status_t status)
817818
{
818819
if (mca_spml_ucx.async_progress && (--mca_spml_ucx.aux_refcnt == 0)) {
@@ -838,14 +839,14 @@ static int mca_spml_ucx_create_aux_ctx(void)
838839
rand_dci_supp = UCX_VERSION(major, minor, rel_number) >= UCX_VERSION(1, 6, 0);
839840

840841
if (rand_dci_supp) {
841-
opal_setenv("UCX_DC_TX_POLICY", "rand", 1, &environ);
842+
pthread_mutex_lock(&mca_spml_ucx.ctx_create_mutex);
842843
opal_setenv("UCX_DC_MLX5_TX_POLICY", "rand", 1, &environ);
843844
}
844845

845846
rc = mca_spml_ucx_ctx_create_common(SHMEM_CTX_PRIVATE, &mca_spml_ucx.aux_ctx);
846847

847848
if (rand_dci_supp) {
848-
opal_unsetenv("UCX_DC_TX_POLICY", &environ);
849+
pthread_mutex_unlock(&mca_spml_ucx.ctx_create_mutex);
849850
opal_unsetenv("UCX_DC_MLX5_TX_POLICY", &environ);
850851
}
851852

@@ -871,14 +872,13 @@ int mca_spml_ucx_put_all_nb(void *dest, const void *source, size_t size, long *c
871872
}
872873
}
873874

874-
if (!mca_spml_ucx.aux_refcnt) {
875+
if (mca_spml_ucx.aux_refcnt++ == 0) {
875876
tv.tv_sec = 0;
876877
tv.tv_usec = mca_spml_ucx.async_tick;
877878
opal_event_evtimer_add(mca_spml_ucx.tick_event, &tv);
878879
opal_progress_register(spml_ucx_progress_aux_ctx);
879880
}
880881
ctx = (shmem_ctx_t)mca_spml_ucx.aux_ctx;
881-
++mca_spml_ucx.aux_refcnt;
882882
} else {
883883
ctx = oshmem_ctx_default;
884884
}

oshmem/mca/spml/ucx/spml_ucx.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ struct mca_spml_ucx {
9494
mca_spml_ucx_ctx_array_t idle_array;
9595
int priority; /* component priority */
9696
shmem_internal_mutex_t internal_mutex;
97+
pthread_mutex_t ctx_create_mutex;
9798
/* Fields controlling aux context for put_all_nb SPML routine */
9899
bool async_progress;
99100
int async_tick;
@@ -169,16 +170,18 @@ extern int spml_ucx_ctx_progress(void);
169170
extern int spml_ucx_progress_aux_ctx(void);
170171
void mca_spml_ucx_async_cb(int fd, short event, void *cbdata);
171172

172-
static inline int mca_spml_ucx_aux_lock(void)
173+
static inline void mca_spml_ucx_aux_lock(void)
173174
{
174-
return mca_spml_ucx.async_progress ?
175-
pthread_spin_lock(&mca_spml_ucx.async_lock) : 0;
175+
if (mca_spml_ucx.async_progress) {
176+
pthread_spin_lock(&mca_spml_ucx.async_lock);
177+
}
176178
}
177179

178-
static inline int mca_spml_ucx_aux_unlock(void)
180+
static inline void mca_spml_ucx_aux_unlock(void)
179181
{
180-
return mca_spml_ucx.async_progress ?
181-
pthread_spin_unlock(&mca_spml_ucx.async_lock) : 0;
182+
if (mca_spml_ucx.async_progress) {
183+
pthread_spin_unlock(&mca_spml_ucx.async_lock);
184+
}
182185
}
183186

184187
static void mca_spml_ucx_cache_mkey(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe)

oshmem/mca/spml/ucx/spml_ucx_component.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,18 +150,20 @@ int spml_ucx_default_progress(void)
150150

151151
int spml_ucx_progress_aux_ctx(void)
152152
{
153+
unsigned count;
154+
153155
if (OPAL_UNLIKELY(!mca_spml_ucx.aux_ctx)) {
154-
return 1;
156+
return 0;
155157
}
156158

157159
if (pthread_spin_trylock(&mca_spml_ucx.async_lock)) {
158-
return 1;
160+
return 0;
159161
}
160162

161-
ucp_worker_progress(mca_spml_ucx.aux_ctx->ucp_worker);
163+
count = ucp_worker_progress(mca_spml_ucx.aux_ctx->ucp_worker);
162164
pthread_spin_unlock(&mca_spml_ucx.async_lock);
163165

164-
return 1;
166+
return count;
165167
}
166168

167169
void mca_spml_ucx_async_cb(int fd, short event, void *cbdata)
@@ -240,6 +242,7 @@ static int spml_ucx_init(void)
240242
sizeof(mca_spml_ucx_ctx_t *));
241243

242244
SHMEM_MUTEX_INIT(mca_spml_ucx.internal_mutex);
245+
pthread_mutex_init(&mca_spml_ucx.ctx_create_mutex, NULL);
243246

244247
wkr_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
245248
if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE) {
@@ -265,7 +268,7 @@ static int spml_ucx_init(void)
265268
if (mca_spml_ucx.async_progress) {
266269
pthread_spin_init(&mca_spml_ucx.async_lock, 0);
267270
mca_spml_ucx.async_event_base = opal_progress_thread_init(NULL);
268-
if (NULL == mca_spml_ucx.async_event_base) {
271+
if (NULL == mca_spml_ucx.async_event_base) {
269272
SPML_UCX_ERROR("failed to init async progress thread");
270273
return OSHMEM_ERROR;
271274
}
@@ -274,6 +277,7 @@ static int spml_ucx_init(void)
274277
opal_event_set(mca_spml_ucx.async_event_base, mca_spml_ucx.tick_event,
275278
-1, EV_PERSIST, mca_spml_ucx_async_cb, NULL);
276279
}
280+
277281
mca_spml_ucx.aux_ctx = NULL;
278282
mca_spml_ucx.aux_refcnt = 0;
279283

@@ -342,8 +346,8 @@ static int mca_spml_ucx_component_fini(void)
342346
return OSHMEM_SUCCESS; /* never selected.. return success.. */
343347

344348
if (mca_spml_ucx.async_progress) {
345-
opal_event_evtimer_del(mca_spml_ucx.tick_event);
346349
opal_progress_thread_finalize(NULL);
350+
opal_event_evtimer_del(mca_spml_ucx.tick_event);
347351
if (mca_spml_ucx.aux_ctx != NULL) {
348352
_ctx_cleanup(mca_spml_ucx.aux_ctx);
349353
}
@@ -408,6 +412,7 @@ static int mca_spml_ucx_component_fini(void)
408412
free(mca_spml_ucx.aux_ctx);
409413

410414
SHMEM_MUTEX_DESTROY(mca_spml_ucx.internal_mutex);
415+
pthread_mutex_destroy(&mca_spml_ucx.ctx_create_mutex);
411416

412417
if (mca_spml_ucx.ucp_context) {
413418
ucp_cleanup(mca_spml_ucx.ucp_context);

0 commit comments

Comments
 (0)