Skip to content

Commit b00209e

Browse files
xinzhao3Tomislav Janjusic
authored andcommitted
Revert "OMPI/OSHMEM: bug-fix: store mkeys for each oshmem ctx."
This reverts commit f1b095c. Signed-off-by: Tomislav Janjusic <tomislavj@mellanox.com>
1 parent ad8c842 commit b00209e

File tree

13 files changed

+54
-90
lines changed

13 files changed

+54
-90
lines changed

oshmem/mca/atomic/ucx/atomic_ucx_cswap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ int mca_atomic_ucx_cswap(shmem_ctx_t ctx,
4040
assert(NULL != prev);
4141

4242
*prev = value;
43-
ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self);
43+
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self);
4444
status_ptr = ucp_atomic_fetch_nb(ucx_ctx->ucp_peers[pe].ucp_conn,
4545
UCP_ATOMIC_FETCH_OP_CSWAP, cond, prev, size,
4646
rva, ucx_mkey->rkey,

oshmem/mca/atomic/ucx/atomic_ucx_module.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ int mca_atomic_ucx_op(shmem_ctx_t ctx,
4747

4848
assert((8 == size) || (4 == size));
4949

50-
ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self);
50+
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self);
5151
status = ucp_atomic_post(ucx_ctx->ucp_peers[pe].ucp_conn,
5252
op, value, size, rva,
5353
ucx_mkey->rkey);
@@ -70,7 +70,7 @@ int mca_atomic_ucx_fop(shmem_ctx_t ctx,
7070

7171
assert((8 == size) || (4 == size));
7272

73-
ucx_mkey = mca_spml_ucx_get_mkey(ctx, pe, target, (void *)&rva, mca_spml_self);
73+
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, pe, target, (void *)&rva, mca_spml_self);
7474
status_ptr = ucp_atomic_fetch_nb(ucx_ctx->ucp_peers[pe].ucp_conn,
7575
op, value, prev, size,
7676
rva, ucx_mkey->rkey,

oshmem/mca/memheap/base/base.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ void memheap_oob_destruct(void);
6969
OSHMEM_DECLSPEC int mca_memheap_base_is_symmetric_addr(const void* va);
7070
OSHMEM_DECLSPEC sshmem_mkey_t *mca_memheap_base_get_mkey(void* va,
7171
int tr_id);
72-
OSHMEM_DECLSPEC sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(shmem_ctx_t ctx,
73-
map_segment_t *s,
72+
OSHMEM_DECLSPEC sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s,
7473
int pe,
7574
void* va,
7675
int btl_id,
@@ -244,8 +243,7 @@ static inline map_segment_t *memheap_find_va(void* va)
244243
return s;
245244
}
246245

247-
static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(shmem_ctx_t ctx,
248-
int pe,
246+
static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(int pe,
249247
void* va,
250248
int btl_id,
251249
void** rva)
@@ -275,7 +273,7 @@ static inline sshmem_mkey_t *mca_memheap_base_get_cached_mkey(shmem_ctx_t ctx,
275273
return mkey;
276274
}
277275

278-
return mca_memheap_base_get_cached_mkey_slow(ctx, s, pe, va, btl_id, rva);
276+
return mca_memheap_base_get_cached_mkey_slow(s, pe, va, btl_id, rva);
279277
}
280278

281279
static inline int mca_memheap_base_num_transports(void)

oshmem/mca/memheap/base/memheap_base_mkey.c

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ struct oob_comm {
5555
oob_comm_request_t req_pool[MEMHEAP_RECV_REQS_MAX];
5656
opal_list_t req_list;
5757
int is_inited;
58-
shmem_ctx_t ctx;
5958
};
6059

6160
mca_memheap_map_t* memheap_map = NULL;
@@ -67,7 +66,7 @@ static int send_buffer(int pe, opal_buffer_t *msg);
6766
static int oshmem_mkey_recv_cb(void);
6867

6968
/* pickup list of rkeys and remote va */
70-
static int memheap_oob_get_mkeys(shmem_ctx_t ctx, int pe,
69+
static int memheap_oob_get_mkeys(int pe,
7170
uint32_t va_seg_num,
7271
sshmem_mkey_t *mkey);
7372

@@ -143,7 +142,7 @@ static void memheap_attach_segment(sshmem_mkey_t *mkey, int tr_id)
143142
}
144143

145144

146-
static void unpack_remote_mkeys(shmem_ctx_t ctx, opal_buffer_t *msg, int remote_pe)
145+
static void unpack_remote_mkeys(opal_buffer_t *msg, int remote_pe)
147146
{
148147
int32_t cnt;
149148
int32_t n;
@@ -183,7 +182,7 @@ static void unpack_remote_mkeys(shmem_ctx_t ctx, opal_buffer_t *msg, int remote_
183182
} else {
184183
memheap_oob.mkeys[tr_id].u.key = MAP_SEGMENT_SHM_INVALID;
185184
}
186-
MCA_SPML_CALL(rmkey_unpack(ctx, &memheap_oob.mkeys[tr_id], memheap_oob.segno, remote_pe, tr_id));
185+
MCA_SPML_CALL(rmkey_unpack(&memheap_oob.mkeys[tr_id], memheap_oob.segno, remote_pe, tr_id));
187186
}
188187

189188
MEMHEAP_VERBOSE(5,
@@ -243,7 +242,7 @@ static void do_recv(int source_pe, opal_buffer_t* buffer)
243242
case MEMHEAP_RKEY_RESP:
244243
MEMHEAP_VERBOSE(5, "*** RKEY RESP");
245244
OPAL_THREAD_LOCK(&memheap_oob.lck);
246-
unpack_remote_mkeys(memheap_oob.ctx, buffer, source_pe);
245+
unpack_remote_mkeys(buffer, source_pe);
247246
memheap_oob.mkeys_rcvd = MEMHEAP_RKEY_RESP;
248247
opal_condition_broadcast(&memheap_oob.cond);
249248
OPAL_THREAD_UNLOCK(&memheap_oob.lck);
@@ -456,14 +455,14 @@ static int send_buffer(int pe, opal_buffer_t *msg)
456455
return rc;
457456
}
458457

459-
static int memheap_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t seg, sshmem_mkey_t *mkeys)
458+
static int memheap_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
460459
{
461460
opal_buffer_t *msg;
462461
uint8_t cmd;
463462
int i;
464463
int rc;
465464

466-
if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(ctx, pe, seg, mkeys))) {
465+
if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(pe, seg, mkeys))) {
467466
for (i = 0; i < memheap_map->num_transports; i++) {
468467
MEMHEAP_VERBOSE(5,
469468
"MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d %s",
@@ -479,7 +478,6 @@ static int memheap_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t seg, sshmem_m
479478
memheap_oob.mkeys = mkeys;
480479
memheap_oob.segno = seg;
481480
memheap_oob.mkeys_rcvd = 0;
482-
memheap_oob.ctx = ctx;
483481

484482
msg = OBJ_NEW(opal_buffer_t);
485483
if (!msg) {
@@ -647,7 +645,7 @@ void mca_memheap_modex_recv_all(void)
647645
}
648646
memheap_oob.mkeys = s->mkeys_cache[i];
649647
memheap_oob.segno = j;
650-
unpack_remote_mkeys(oshmem_ctx_default, msg, i);
648+
unpack_remote_mkeys(msg, i);
651649
}
652650
}
653651

@@ -676,8 +674,7 @@ void mca_memheap_modex_recv_all(void)
676674
}
677675
}
678676

679-
sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(shmem_ctx_t ctx,
680-
map_segment_t *s,
677+
sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(map_segment_t *s,
681678
int pe,
682679
void* va,
683680
int btl_id,
@@ -695,7 +692,7 @@ sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(shmem_ctx_t ctx,
695692
if (!s->mkeys_cache[pe])
696693
return NULL ;
697694

698-
rc = memheap_oob_get_mkeys(ctx, pe,
695+
rc = memheap_oob_get_mkeys(pe,
699696
s - memheap_map->mem_segs,
700697
s->mkeys_cache[pe]);
701698
if (OSHMEM_SUCCESS != rc)

oshmem/mca/spml/base/base.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,11 @@ OSHMEM_DECLSPEC int mca_spml_base_test(void* addr,
7272
void* value,
7373
int datatype,
7474
int *out_value);
75-
OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(shmem_ctx_t ctx,
76-
int pe,
75+
OSHMEM_DECLSPEC int mca_spml_base_oob_get_mkeys(int pe,
7776
uint32_t seg,
7877
sshmem_mkey_t *mkeys);
7978

80-
OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t seg, int pe, int tr_id);
79+
OSHMEM_DECLSPEC void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t seg, int pe, int tr_id);
8180
OSHMEM_DECLSPEC void mca_spml_base_rmkey_free(sshmem_mkey_t *mkey);
8281
OSHMEM_DECLSPEC void *mca_spml_base_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *mkey, int pe);
8382

oshmem/mca/spml/base/spml_base.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,12 +247,12 @@ int mca_spml_base_wait_nb(void* handle)
247247
return OSHMEM_SUCCESS;
248248
}
249249

250-
int mca_spml_base_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t segno, sshmem_mkey_t *mkeys)
250+
int mca_spml_base_oob_get_mkeys(int pe, uint32_t segno, sshmem_mkey_t *mkeys)
251251
{
252252
return OSHMEM_ERROR;
253253
}
254254

255-
void mca_spml_base_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id)
255+
void mca_spml_base_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id)
256256
{
257257
}
258258

oshmem/mca/spml/ikrit/spml_ikrit.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ int mca_spml_ikrit_put_simple(void* dst_addr,
149149
void* src_addr,
150150
int dst);
151151

152-
static void mca_spml_ikrit_cache_mkeys(shmem_ctx_t ctx, sshmem_mkey_t *, uint32_t seg, int remote_pe, int tr_id);
152+
static void mca_spml_ikrit_cache_mkeys(sshmem_mkey_t *, uint32_t seg, int remote_pe, int tr_id);
153153

154154
static mxm_mem_key_t *mca_spml_ikrit_get_mkey_slow(int pe, void *va, int ptl_id, void **rva);
155155

@@ -185,7 +185,7 @@ mca_spml_ikrit_t mca_spml_ikrit = {
185185
mca_spml_ikrit_get_mkey_slow
186186
};
187187

188-
static void mca_spml_ikrit_cache_mkeys(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t seg, int dst_pe, int tr_id)
188+
static void mca_spml_ikrit_cache_mkeys(sshmem_mkey_t *mkey, uint32_t seg, int dst_pe, int tr_id)
189189
{
190190
mxm_peer_t *peer;
191191

@@ -504,7 +504,7 @@ sshmem_mkey_t *mca_spml_ikrit_register(void* addr,
504504
my_rank, i, addr, (unsigned long long)size,
505505
mca_spml_base_mkey2str(&mkeys[i]));
506506

507-
mca_spml_ikrit_cache_mkeys(NULL, &mkeys[i], memheap_find_segnum(addr), my_rank, i);
507+
mca_spml_ikrit_cache_mkeys(&mkeys[i], memheap_find_segnum(addr), my_rank, i);
508508
}
509509
*count = MXM_PTL_LAST;
510510

@@ -548,7 +548,7 @@ int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys)
548548

549549
}
550550

551-
int mca_spml_ikrit_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t seg, sshmem_mkey_t *mkeys)
551+
int mca_spml_ikrit_oob_get_mkeys(int pe, uint32_t seg, sshmem_mkey_t *mkeys)
552552
{
553553
int ptl;
554554

@@ -567,7 +567,7 @@ int mca_spml_ikrit_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t seg, sshmem_m
567567
mkeys[ptl].len = 0;
568568
mkeys[ptl].va_base = mca_memheap_seg2base_va(seg);
569569
mkeys[ptl].u.key = MAP_SEGMENT_SHM_INVALID;
570-
mca_spml_ikrit_cache_mkeys(NULL, &mkeys[ptl], seg, pe, ptl);
570+
mca_spml_ikrit_cache_mkeys(&mkeys[ptl], seg, pe, ptl);
571571
return OSHMEM_SUCCESS;
572572
}
573573

oshmem/mca/spml/ikrit/spml_ikrit.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ extern sshmem_mkey_t *mca_spml_ikrit_register(void* addr,
182182
uint64_t shmid,
183183
int *count);
184184
extern int mca_spml_ikrit_deregister(sshmem_mkey_t *mkeys);
185-
extern int mca_spml_ikrit_oob_get_mkeys(shmem_ctx_t ctx, int pe,
185+
extern int mca_spml_ikrit_oob_get_mkeys(int pe,
186186
uint32_t segno,
187187
sshmem_mkey_t *mkeys);
188188

oshmem/mca/spml/spml.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ typedef int (*mca_spml_base_module_test_fn_t)(void* addr,
132132
*
133133
* @param mkey remote mkey
134134
*/
135-
typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(shmem_ctx_t ctx, sshmem_mkey_t *, uint32_t segno, int remote_pe, int tr_id);
135+
typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(sshmem_mkey_t *, uint32_t segno, int remote_pe, int tr_id);
136136

137137
/**
138138
* If possible, get a pointer to the remote memory described by the mkey
@@ -180,7 +180,7 @@ typedef int (*mca_spml_base_module_deregister_fn_t)(sshmem_mkey_t *mkeys);
180180
*
181181
* @return OSHMEM_SUCCSESS if keys are found
182182
*/
183-
typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(shmem_ctx_t ctx, int pe,
183+
typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(int pe,
184184
uint32_t seg,
185185
sshmem_mkey_t *mkeys);
186186

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
#endif
4343

4444
static
45-
spml_ucx_mkey_t * mca_spml_ucx_get_mkey_slow(shmem_ctx_t ctx, int pe, void *va, void **rva);
45+
spml_ucx_mkey_t * mca_spml_ucx_get_mkey_slow(int pe, void *va, void **rva);
4646

4747
mca_spml_ucx_t mca_spml_ucx = {
4848
.super = {
@@ -308,11 +308,11 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs)
308308

309309

310310
static
311-
spml_ucx_mkey_t * mca_spml_ucx_get_mkey_slow(shmem_ctx_t ctx, int pe, void *va, void **rva)
311+
spml_ucx_mkey_t * mca_spml_ucx_get_mkey_slow(int pe, void *va, void **rva)
312312
{
313313
sshmem_mkey_t *r_mkey;
314314

315-
r_mkey = mca_memheap_base_get_cached_mkey(ctx, pe, va, 0, rva);
315+
r_mkey = mca_memheap_base_get_cached_mkey(pe, va, 0, rva);
316316
if (OPAL_UNLIKELY(!r_mkey)) {
317317
SPML_UCX_ERROR("pe=%d: %p is not address of symmetric variable",
318318
pe, va);
@@ -350,24 +350,31 @@ void *mca_spml_ucx_rmkey_ptr(const void *dst_addr, sshmem_mkey_t *mkey, int pe)
350350
#endif
351351
}
352352

353-
void mca_spml_ucx_rmkey_unpack(shmem_ctx_t ctx, sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id)
353+
static void mca_spml_ucx_cache_mkey(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe)
354+
{
355+
ucp_peer_t *peer;
356+
357+
peer = &(ucx_ctx->ucp_peers[dst_pe]);
358+
mkey_segment_init(&peer->mkeys[segno].super, mkey, segno);
359+
}
360+
361+
void mca_spml_ucx_rmkey_unpack(sshmem_mkey_t *mkey, uint32_t segno, int pe, int tr_id)
354362
{
355363
spml_ucx_mkey_t *ucx_mkey;
356-
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
357364
ucs_status_t err;
358365

359-
ucx_mkey = &ucx_ctx->ucp_peers[pe].mkeys[segno].key;
366+
ucx_mkey = &mca_spml_ucx_ctx_default.ucp_peers[pe].mkeys[segno].key;
360367

361-
err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[pe].ucp_conn,
362-
mkey->u.data,
368+
err = ucp_ep_rkey_unpack(mca_spml_ucx_ctx_default.ucp_peers[pe].ucp_conn,
369+
mkey->u.data,
363370
&ucx_mkey->rkey);
364371
if (UCS_OK != err) {
365372
SPML_UCX_ERROR("failed to unpack rkey: %s", ucs_status_string(err));
366373
goto error_fatal;
367374
}
368375

369376
mkey->spml_context = ucx_mkey;
370-
mca_spml_ucx_cache_mkey(ucx_ctx, mkey, segno, pe);
377+
mca_spml_ucx_cache_mkey(&mca_spml_ucx_ctx_default, mkey, segno, pe);
371378
return;
372379

373380
error_fatal:
@@ -628,7 +635,7 @@ int mca_spml_ucx_get(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_add
628635
ucs_status_t status;
629636
#endif
630637

631-
ucx_mkey = mca_spml_ucx_get_mkey(ctx, src, src_addr, &rva, &mca_spml_ucx);
638+
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, src, src_addr, &rva, &mca_spml_ucx);
632639
#if HAVE_DECL_UCP_GET_NB
633640
request = ucp_get_nb(ucx_ctx->ucp_peers[src].ucp_conn, dst_addr, size,
634641
(uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb);
@@ -647,7 +654,7 @@ int mca_spml_ucx_get_nb(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_
647654
spml_ucx_mkey_t *ucx_mkey;
648655
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
649656

650-
ucx_mkey = mca_spml_ucx_get_mkey(ctx, src, src_addr, &rva, &mca_spml_ucx);
657+
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, src, src_addr, &rva, &mca_spml_ucx);
651658
status = ucp_get_nbi(ucx_ctx->ucp_peers[src].ucp_conn, dst_addr, size,
652659
(uint64_t)rva, ucx_mkey->rkey);
653660

@@ -665,7 +672,7 @@ int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_add
665672
ucs_status_t status;
666673
#endif
667674

668-
ucx_mkey = mca_spml_ucx_get_mkey(ctx, dst, dst_addr, &rva, &mca_spml_ucx);
675+
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, dst, dst_addr, &rva, &mca_spml_ucx);
669676
#if HAVE_DECL_UCP_PUT_NB
670677
request = ucp_put_nb(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size,
671678
(uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb);
@@ -684,7 +691,7 @@ int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_
684691
spml_ucx_mkey_t *ucx_mkey;
685692
mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx;
686693

687-
ucx_mkey = mca_spml_ucx_get_mkey(ctx, dst, dst_addr, &rva, &mca_spml_ucx);
694+
ucx_mkey = mca_spml_ucx_get_mkey(ucx_ctx, dst, dst_addr, &rva, &mca_spml_ucx);
688695
status = ucp_put_nbi(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size,
689696
(uint64_t)rva, ucx_mkey->rkey);
690697

0 commit comments

Comments
 (0)