Skip to content

Commit 94fed3b

Browse files
committed
osc/rdma: Clean up use_cpu_atomics behaivor
The use_cpu_atomics member of the rdma module was only used during initialization, so there was no reason for it to be a module member. Also clean up the initialization logic for the field (and therefore, whether or not the shared state optimization is used). Previously, it appeared to be possible to use with alternate btls across multiple nodes (ie, we tested the GLOB value on the btls), but the use_cpu_atomics field was initialized to false in the multi-node case and so they would never actually be enabled. Make that more obvious, rather than try to enable the optimization, to reduce the risk of introducing new datapath bugs. Signed-off-by: Brian Barrett <bbarrett@amazon.com>
1 parent 6e2af18 commit 94fed3b

File tree

2 files changed

+23
-20
lines changed

2 files changed

+23
-20
lines changed

ompi/mca/osc/rdma/osc_rdma.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,6 @@ struct ompi_osc_rdma_module_t {
148148
/** value of same_size info key for this window */
149149
bool same_size;
150150

151-
/** CPU atomics can be used */
152-
bool use_cpu_atomics;
153-
154151
/** passive-target synchronization will not be used in this window */
155152
bool no_locks;
156153

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ static int ompi_osc_rdma_initialize_region (ompi_osc_rdma_module_t *module, void
427427
return OMPI_SUCCESS;
428428
}
429429

430-
static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, size_t size)
430+
static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, size_t size, bool use_cpu_atomics)
431431
{
432432
size_t total_size, local_rank_array_size, leader_peer_data_size, base_data_size;
433433
ompi_osc_rdma_peer_t *my_peer;
@@ -507,7 +507,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
507507
my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
508508
my_peer->state = (uint64_t) (uintptr_t) module->state;
509509

510-
if (module->use_cpu_atomics) {
510+
if (use_cpu_atomics) {
511511
/* all peers are local or it is safe to mix cpu and nic atomics */
512512
my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE;
513513
} else {
@@ -526,7 +526,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
526526
ex_peer->size = size;
527527
}
528528

529-
if (!module->use_cpu_atomics) {
529+
if (!use_cpu_atomics) {
530530
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
531531
/* base is local and cpu atomics are available */
532532
ex_peer->super.base_handle = module->state_handle;
@@ -570,6 +570,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
570570
struct _local_data *temp;
571571
char *data_file;
572572
size_t memory_alignment = module->memory_alignment;
573+
bool use_cpu_atomics;
573574

574575
shared_comm = module->shared_comm;
575576

@@ -578,21 +579,26 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
578579

579580
/* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */
580581
module->single_node = local_size == global_size;
581-
module->use_cpu_atomics = module->single_node;
582582

583-
if (!module->single_node) {
584-
if (module->use_accelerated_btl) {
585-
module->use_cpu_atomics = !!(module->accelerated_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
586-
} else {
587-
for (int i = 0 ; i < module->alternate_btl_count ; ++i) {
588-
module->use_cpu_atomics &= !!(module->alternate_btls[i]->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
589-
}
590-
}
583+
if (module->single_node) {
584+
use_cpu_atomics = true;
585+
} else if (module->use_accelerated_btl) {
586+
use_cpu_atomics = !!(module->accelerated_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
587+
} else {
588+
/* using the shared state optimization that is enabled by
589+
* being able to use cpu atomics was never enabled for
590+
* alternate btls, due to a previous bug in the enablement
591+
* logic when alternate btls were first supported. It is
592+
* likely that this optimization could work with sufficient
593+
* testing, but for now, always disable to not introduce new
594+
* correctness risks.
595+
*/
596+
use_cpu_atomics = false;
591597
}
592598

593599
if (1 == local_size) {
594600
/* no point using a shared segment if there are no other processes on this node */
595-
return allocate_state_single (module, base, size);
601+
return allocate_state_single (module, base, size, use_cpu_atomics);
596602
}
597603

598604
opal_output_verbose(MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output,
@@ -771,7 +777,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
771777
ex_peer = (ompi_osc_rdma_peer_extended_t *) peer;
772778

773779
/* set up peer state */
774-
if (module->use_cpu_atomics) {
780+
if (use_cpu_atomics) {
775781
/* all peers are local or it is safe to mix cpu and nic atomics */
776782
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE;
777783
peer->state = (osc_rdma_counter_t) peer_state;
@@ -796,7 +802,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
796802
}
797803

798804
if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor && MPI_WIN_FLAVOR_CREATE != module->flavor &&
799-
!module->use_cpu_atomics && temp[i].size && i > 0) {
805+
!use_cpu_atomics && temp[i].size && i > 0) {
800806
/* use the local leader's endpoint */
801807
peer->data_endpoint = local_leader->data_endpoint;
802808
peer->data_btl_index = local_leader->data_btl_index;
@@ -805,7 +811,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
805811
ompi_osc_module_add_peer (module, peer);
806812

807813
if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor) {
808-
if (module->use_cpu_atomics && peer_rank == my_rank) {
814+
if (use_cpu_atomics && peer_rank == my_rank) {
809815
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
810816
}
811817
/* nothing more to do */
@@ -821,7 +827,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
821827
ex_peer->size = temp[i].size;
822828
}
823829

824-
if (module->use_cpu_atomics && (MPI_WIN_FLAVOR_ALLOCATE == module->flavor || peer_rank == my_rank)) {
830+
if (use_cpu_atomics && (MPI_WIN_FLAVOR_ALLOCATE == module->flavor || peer_rank == my_rank)) {
825831
/* base is local and cpu atomics are available */
826832
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
827833
ex_peer->super.base = (uintptr_t) module->segment_base + offset;

0 commit comments

Comments
 (0)