20
20
* Copyright (c) 2022 Cisco Systems, Inc. All rights reserved.
21
21
* Copyright (c) Amazon.com, Inc. or its affiliates.
22
22
* All rights reserved.
23
+ * Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
23
24
* $COPYRIGHT$
24
25
*
25
26
* Additional copyrights may follow
@@ -983,14 +984,7 @@ int ompi_coll_base_allreduce_intra_redscat_allgather(
983
984
"coll:base:allreduce_intra_redscat_allgather: rank %d/%d" ,
984
985
rank , comm_size ));
985
986
986
- /* Find nearest power-of-two less than or equal to comm_size */
987
- int nsteps = opal_hibit (comm_size , comm -> c_cube_dim + 1 ); /* ilog2(comm_size) */
988
- if (-1 == nsteps ) {
989
- return MPI_ERR_ARG ;
990
- }
991
- int nprocs_pof2 = 1 << nsteps ; /* flp2(comm_size) */
992
-
993
- if (count < (size_t ) nprocs_pof2 || !ompi_op_is_commute (op )) {
987
+ if (!ompi_op_is_commute (op )) {
994
988
OPAL_OUTPUT ((ompi_coll_base_framework .framework_output ,
995
989
"coll:base:allreduce_intra_redscat_allgather: rank %d/%d "
996
990
"count %zu switching to basic linear allreduce" ,
@@ -999,6 +993,12 @@ int ompi_coll_base_allreduce_intra_redscat_allgather(
999
993
op , comm , module );
1000
994
}
1001
995
996
+ /* Find nearest power-of-two less than or equal to comm_size */
997
+ int nsteps = opal_hibit (comm_size , comm -> c_cube_dim + 1 ); /* ilog2(comm_size) */
998
+ if (-1 == nsteps ) {
999
+ return MPI_ERR_ARG ;
1000
+ }
1001
+ int nprocs_pof2 = 1 << nsteps ; /* flp2(comm_size) */
1002
1002
int err = MPI_SUCCESS ;
1003
1003
ptrdiff_t lb , extent , dsize , gap = 0 ;
1004
1004
ompi_datatype_get_extent (dtype , & lb , & extent );
0 commit comments