Skip to content

Commit d9498e4

Browse files
sb17vartpol84
andcommitted
oshmem: Fix mkey release for remote segments
The bug was introduced in PR #9058 and caused rmkey_free to fail when cleaning service data for remote PEs. Enable memheap_find_segnum to find a segment by va for both local and remote PEs. Co-authored-by: Artem Y. Polyakov <artemp@nvidia.com> Signed-off-by: Subhadeep Bhattacharya <subhadeepb@nvidia.com> Signed-off-by: Artem Polyakov <artemp@nvidia.com>
1 parent da9b92a commit d9498e4

File tree

2 files changed

+49
-10
lines changed

2 files changed

+49
-10
lines changed

oshmem/mca/memheap/base/base.h

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,35 @@ static inline int memheap_is_va_in_segment(void *va, int segno)
176176
return map_segment_is_va_in(&memheap_find_seg(segno)->super, va);
177177
}
178178

179-
static inline int memheap_find_segnum(void *va)
179+
static inline int memheap_find_segnum(void *va, int pe)
180180
{
181181
int i;
182-
183-
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
184-
if (memheap_is_va_in_segment(va, i)) {
185-
return i;
182+
int my_pe = oshmem_my_proc_id();
183+
184+
if (pe == my_pe) {
185+
/* Find segment number for local segment using va_base
186+
* TODO: Merge local and remote segment information in mkeys_cache
187+
*/
188+
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
189+
if (memheap_is_va_in_segment(va, i)) {
190+
return i;
191+
}
192+
}
193+
} else {
194+
/* Find segment number for remote segments using va_base */
195+
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
196+
map_segment_t *seg = memheap_find_seg(i);
197+
if (seg) {
198+
sshmem_mkey_t **mkeys_cache = seg->mkeys_cache;
199+
if (mkeys_cache) {
200+
if (mkeys_cache[pe]) {
201+
if ((va >= mkeys_cache[pe]->va_base) &&
202+
(va < mkeys_cache[pe]->va_base + mkeys_cache[pe]->len)) {
203+
return i;
204+
}
205+
}
206+
}
207+
}
186208
}
187209
}
188210
return MEMHEAP_SEG_INVALID;

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,13 @@ void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey, int pe)
571571
if (!mkey->spml_context) {
572572
return;
573573
}
574-
segno = memheap_find_segnum(mkey->va_base);
574+
segno = memheap_find_segnum(mkey->va_base, pe);
575+
if (MEMHEAP_SEG_INVALID == segno) {
576+
SPML_UCX_ERROR("mca_spml_ucx_rmkey_free failed because of invalid "
577+
"segment number: %d\n", segno);
578+
return;
579+
}
580+
575581
ucx_mkey = (spml_ucx_mkey_t *)(mkey->spml_context);
576582
rc = mca_spml_ucx_ctx_mkey_del(&mca_spml_ucx_ctx_default, pe, segno, ucx_mkey);
577583
if (OSHMEM_SUCCESS != rc) {
@@ -673,7 +679,12 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr,
673679
return NULL;
674680
}
675681

676-
segno = memheap_find_segnum(addr);
682+
segno = memheap_find_segnum(addr, my_pe);
683+
if (MEMHEAP_SEG_INVALID == segno) {
684+
SPML_UCX_ERROR("mca_spml_ucx_register failed because of invalid "
685+
"segment number: %d\n", segno);
686+
return NULL;
687+
}
677688
mem_seg = memheap_find_seg(segno);
678689

679690
/* if possible use mem handle already created by ucx allocator */
@@ -747,13 +758,19 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys)
747758
return OSHMEM_SUCCESS;
748759

749760
mem_seg = memheap_find_va(mkeys[SPML_UCX_TRANSP_IDX].va_base);
750-
ucx_mkey = (spml_ucx_mkey_t*)mkeys[SPML_UCX_TRANSP_IDX].spml_context;
751-
segno = memheap_find_segnum(mkeys[SPML_UCX_TRANSP_IDX].va_base);
752-
753761
if (OPAL_UNLIKELY(NULL == mem_seg)) {
754762
return OSHMEM_ERROR;
755763
}
756764

765+
segno = memheap_find_segnum(mkeys[SPML_UCX_TRANSP_IDX].va_base, my_pe);
766+
if (MEMHEAP_SEG_INVALID == segno) {
767+
SPML_UCX_ERROR("mca_spml_ucx_deregister failed because of invalid "
768+
"segment number: %d\n", segno);
769+
return OSHMEM_ERROR;
770+
}
771+
772+
ucx_mkey = (spml_ucx_mkey_t*)mkeys[SPML_UCX_TRANSP_IDX].spml_context;
773+
757774
if (MAP_SEGMENT_ALLOC_UCX != mem_seg->type) {
758775
ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h);
759776
}

0 commit comments

Comments
 (0)