Skip to content

Commit 7856a25

Browse files
committed
osc/rdma: Tighten up concurrent memory region access.
There were some instances where the exclusive lock needed some tightening around the region structure. Signed-off-by: Austen Lauria <awlauria@us.ibm.com>
1 parent b82b267 commit 7856a25

File tree

1 file changed

+20
-14
lines changed

1 file changed

+20
-14
lines changed

ompi/mca/osc/rdma/osc_rdma_dynamic.c

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
185185
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach: %s, %p, %lu", win->w_name, base, (unsigned long) len);
186186

187187
OPAL_THREAD_LOCK(&module->lock);
188+
ompi_osc_rdma_lock_acquire_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
188189

189190
region_count = module->state->region_count & 0xffffffffL;
190191
region_id = module->state->region_count >> 32;
@@ -208,16 +209,11 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
208209
/* validates that the region does not overlap with an existing region even if they are on the same page */
209210
ret = ompi_osc_rdma_add_attachment (module->dynamic_handles[region_index], (intptr_t) base, len);
210211
OPAL_THREAD_UNLOCK(&module->lock);
212+
ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
211213
/* no need to invalidate remote caches */
212214
return ret;
213215
}
214216

215-
/* region is in flux */
216-
module->state->region_count = -1;
217-
opal_atomic_wmb ();
218-
219-
ompi_osc_rdma_lock_acquire_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
220-
221217
/* do a binary seach for where the region should be inserted */
222218
if (region_count) {
223219
region = find_insertion_point ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1,
@@ -252,6 +248,7 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
252248
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
253249
OPAL_THREAD_UNLOCK(&module->lock);
254250
OBJ_RELEASE(rdma_region_handle);
251+
ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
255252
return OMPI_ERR_RMA_ATTACH;
256253
}
257254

@@ -274,9 +271,9 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
274271
}
275272
#endif
276273

277-
opal_atomic_mb ();
278274
/* the region state has changed */
279275
module->state->region_count = ((region_id + 1) << 32) | (region_count + 1);
276+
opal_atomic_wmb ();
280277

281278
ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
282279
OPAL_THREAD_UNLOCK(&module->lock);
@@ -303,6 +300,9 @@ int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
303300

304301
OPAL_THREAD_LOCK(&module->lock);
305302

303+
/* lock the region so it can't change while a peer is reading it */
304+
ompi_osc_rdma_lock_acquire_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
305+
306306
/* the upper 4 bytes of the region count are an instance counter */
307307
region_count = module->state->region_count & 0xffffffffL;
308308
region_id = module->state->region_count >> 32;
@@ -326,23 +326,22 @@ int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
326326
if (region_index == region_count) {
327327
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "could not find dynamic memory attachment for %p", base);
328328
OPAL_THREAD_UNLOCK(&module->lock);
329+
ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
329330
return OMPI_ERR_BASE;
330331
}
331332

332333
if (!opal_list_is_empty (&rdma_region_handle->attachments)) {
333334
/* another region is referencing this attachment */
335+
OPAL_THREAD_UNLOCK(&module->lock);
336+
ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
334337
return OMPI_SUCCESS;
335338
}
336339

337-
/* lock the region so it can't change while a peer is reading it */
338-
ompi_osc_rdma_lock_acquire_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
339-
340340
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "detaching dynamic memory region {%p, %p} from index %d",
341341
base, (void *)((intptr_t) base + region->len), region_index);
342342

343343
if (module->selected_btl->btl_register_mem) {
344344
ompi_osc_rdma_deregister (module, rdma_region_handle->btl_handle);
345-
346345
}
347346

348347
if (region_index < region_count - 1) {
@@ -357,6 +356,7 @@ int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
357356
module->dynamic_handles[region_count - 1] = NULL;
358357

359358
module->state->region_count = ((region_id + 1) << 32) | (region_count - 1);
359+
opal_atomic_wmb ();
360360

361361
ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
362362

@@ -460,14 +460,18 @@ int ompi_osc_rdma_find_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_
460460
ompi_osc_rdma_peer_dynamic_t *dy_peer = (ompi_osc_rdma_peer_dynamic_t *) peer;
461461
intptr_t bound = (intptr_t) base + len;
462462
ompi_osc_rdma_region_t *regions;
463-
int ret, region_count;
463+
int ret = OMPI_SUCCESS, region_count;
464464

465465
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "locating dynamic memory region matching: {%" PRIx64 ", %" PRIx64 "}"
466466
" (len %lu)", base, base + len, (unsigned long) len);
467467

468+
OPAL_THREAD_LOCK(&module->lock);
469+
// Make sure region isn't being touched.
470+
ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
468471
if (!ompi_osc_rdma_peer_local_state (peer)) {
469472
ret = ompi_osc_rdma_refresh_dynamic_region (module, dy_peer);
470473
if (OMPI_SUCCESS != ret) {
474+
ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
471475
return ret;
472476
}
473477

@@ -481,9 +485,11 @@ int ompi_osc_rdma_find_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_
481485

482486
*region = ompi_osc_rdma_find_region_containing (regions, 0, region_count - 1, (intptr_t) base, bound, module->region_size, NULL);
483487
if (!*region) {
484-
return OMPI_ERR_RMA_RANGE;
488+
ret = OMPI_ERR_RMA_RANGE;
485489
}
490+
OPAL_THREAD_UNLOCK(&module->lock);
491+
ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
486492

487493
/* round a matching region */
488-
return OMPI_SUCCESS;
494+
return ret;
489495
}

0 commit comments

Comments
 (0)