Skip to content

Commit 557f672

Browse files
authored
Merge pull request #9336 from sb17v/topic/master/oshmem/fix_assertion_error_on_oshmem_local_vpids_cleanup_v2
oshmem: fix assertion error on oshmem local vpids cleanup
2 parents 231a122 + ad01c4a commit 557f672

File tree

3 files changed

+50
-12
lines changed

3 files changed

+50
-12
lines changed

oshmem/mca/memheap/base/base.h

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,35 @@ static inline int memheap_is_va_in_segment(void *va, int segno)
176176
return map_segment_is_va_in(&memheap_find_seg(segno)->super, va);
177177
}
178178

179-
static inline int memheap_find_segnum(void *va)
179+
static inline int memheap_find_segnum(void *va, int pe)
180180
{
181181
int i;
182-
183-
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
184-
if (memheap_is_va_in_segment(va, i)) {
185-
return i;
182+
int my_pe = oshmem_my_proc_id();
183+
184+
if (pe == my_pe) {
185+
/* Find segment number for local segment using va_base
186+
* TODO: Merge local and remote segment information in mkeys_cache
187+
*/
188+
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
189+
if (memheap_is_va_in_segment(va, i)) {
190+
return i;
191+
}
192+
}
193+
} else {
194+
/* Find segment number for remote segments using va_base */
195+
for (i = 0; i < mca_memheap_base_map.n_segments; i++) {
196+
map_segment_t *seg = memheap_find_seg(i);
197+
if (seg) {
198+
sshmem_mkey_t **mkeys_cache = seg->mkeys_cache;
199+
if (mkeys_cache) {
200+
if (mkeys_cache[pe]) {
201+
if ((va >= mkeys_cache[pe]->va_base) &&
202+
(va < mkeys_cache[pe]->va_base + mkeys_cache[pe]->len)) {
203+
return i;
204+
}
205+
}
206+
}
207+
}
186208
}
187209
}
188210
return MEMHEAP_SEG_INVALID;

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,13 @@ void mca_spml_ucx_rmkey_free(sshmem_mkey_t *mkey, int pe)
574574
if (!mkey->spml_context) {
575575
return;
576576
}
577-
segno = memheap_find_segnum(mkey->va_base);
577+
segno = memheap_find_segnum(mkey->va_base, pe);
578+
if (MEMHEAP_SEG_INVALID == segno) {
579+
SPML_UCX_ERROR("mca_spml_ucx_rmkey_free failed because of invalid "
580+
"segment number: %d\n", segno);
581+
return;
582+
}
583+
578584
ucx_mkey = (spml_ucx_mkey_t *)(mkey->spml_context);
579585
rc = mca_spml_ucx_ctx_mkey_del(&mca_spml_ucx_ctx_default, pe, segno, ucx_mkey);
580586
if (OSHMEM_SUCCESS != rc) {
@@ -676,7 +682,12 @@ sshmem_mkey_t *mca_spml_ucx_register(void* addr,
676682
return NULL;
677683
}
678684

679-
segno = memheap_find_segnum(addr);
685+
segno = memheap_find_segnum(addr, my_pe);
686+
if (MEMHEAP_SEG_INVALID == segno) {
687+
SPML_UCX_ERROR("mca_spml_ucx_register failed because of invalid "
688+
"segment number: %d\n", segno);
689+
return NULL;
690+
}
680691
mem_seg = memheap_find_seg(segno);
681692

682693
/* if possible use mem handle already created by ucx allocator */
@@ -750,13 +761,19 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys)
750761
return OSHMEM_SUCCESS;
751762

752763
mem_seg = memheap_find_va(mkeys[SPML_UCX_TRANSP_IDX].va_base);
753-
ucx_mkey = (spml_ucx_mkey_t*)mkeys[SPML_UCX_TRANSP_IDX].spml_context;
754-
segno = memheap_find_segnum(mkeys[SPML_UCX_TRANSP_IDX].va_base);
755-
756764
if (OPAL_UNLIKELY(NULL == mem_seg)) {
757765
return OSHMEM_ERROR;
758766
}
759767

768+
segno = memheap_find_segnum(mkeys[SPML_UCX_TRANSP_IDX].va_base, my_pe);
769+
if (MEMHEAP_SEG_INVALID == segno) {
770+
SPML_UCX_ERROR("mca_spml_ucx_deregister failed because of invalid "
771+
"segment number: %d\n", segno);
772+
return OSHMEM_ERROR;
773+
}
774+
775+
ucx_mkey = (spml_ucx_mkey_t*)mkeys[SPML_UCX_TRANSP_IDX].spml_context;
776+
760777
if (MAP_SEGMENT_ALLOC_UCX != mem_seg->type) {
761778
ucp_mem_unmap(mca_spml_ucx.ucp_context, ucx_mkey->mem_h);
762779
}

oshmem/proc/proc.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ int oshmem_proc_group_finalize(void)
179179
}
180180
}
181181

182+
OBJ_DESTRUCT(&_oshmem_local_vpids);
182183
OBJ_DESTRUCT(&oshmem_group_array);
183184

184185
oshmem_group_cache_destroy();
@@ -265,8 +266,6 @@ oshmem_proc_group_destroy_internal(oshmem_group_t* group, int scoll_unselect)
265266
mca_scoll_base_group_unselect(group);
266267
}
267268

268-
/* Destroy proc array */
269-
OBJ_DESTRUCT(&_oshmem_local_vpids);
270269
if (group->proc_vpids) {
271270
free(group->proc_vpids);
272271
}

0 commit comments

Comments
 (0)