@@ -112,15 +112,14 @@ static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI
112
112
return OMPI_ERR_OUT_OF_RESOURCE ;
113
113
}
114
114
115
+ alg = NBC_ARED_RING ; /* default generic selection */
115
116
/* algorithm selection */
116
117
int nprocs_pof2 = opal_next_poweroftwo (p ) >> 1 ;
117
118
if (libnbc_iallreduce_algorithm == 0 ) {
118
119
if (p < 4 || size * count < 65536 || !ompi_op_is_commute (op ) || inplace ) {
119
120
alg = NBC_ARED_BINOMIAL ;
120
121
} else if (count >= nprocs_pof2 && ompi_op_is_commute (op )) {
121
122
alg = NBC_ARED_REDSCAT_ALLGATHER ;
122
- } else {
123
- alg = NBC_ARED_RING ;
124
123
}
125
124
} else {
126
125
if (libnbc_iallreduce_algorithm == 1 )
@@ -131,8 +130,6 @@ static int nbc_allreduce_init(const void* sendbuf, void* recvbuf, int count, MPI
131
130
alg = NBC_ARED_REDSCAT_ALLGATHER ;
132
131
else if (libnbc_iallreduce_algorithm == 4 )
133
132
alg = NBC_ARED_RDBL ;
134
- else
135
- alg = NBC_ARED_RING ;
136
133
}
137
134
#ifdef NBC_CACHE_SCHEDULE
138
135
/* search schedule in communicator specific tree */
@@ -633,38 +630,37 @@ static inline int allred_sched_recursivedoubling(int rank, int p, const void *se
633
630
return OMPI_SUCCESS ;
634
631
}
635
632
636
- static inline int allred_sched_ring (int r , int p , int count , MPI_Datatype datatype , const void * sendbuf , void * recvbuf , MPI_Op op ,
637
- int size , int ext , NBC_Schedule * schedule , void * tmpbuf ) {
633
+ static inline int
634
+ allred_sched_ring (int r , int p ,
635
+ int count , MPI_Datatype datatype , const void * sendbuf , void * recvbuf ,
636
+ MPI_Op op , int size , int ext , NBC_Schedule * schedule , void * tmpbuf )
637
+ {
638
638
int segsize , * segsizes , * segoffsets ; /* segment sizes and offsets per segment (number of segments == number of nodes */
639
- int speer , rpeer ; /* send and recvpeer */
639
+ int speer , rpeer ; /* send and recv peers */
640
640
int res = OMPI_SUCCESS ;
641
641
642
- if (count == 0 ) {
642
+ if (0 == count ) {
643
643
return OMPI_SUCCESS ;
644
644
}
645
645
646
- segsizes = (int * ) malloc (sizeof (int ) * p );
647
- segoffsets = (int * ) malloc (sizeof (int ) * p );
648
- if (NULL == segsizes || NULL == segoffsets ) {
649
- free (segsizes );
650
- free (segoffsets );
646
+ segsizes = (int * ) malloc ((2 * p + 1 ) * sizeof (int ));
647
+ if (NULL == segsizes ) {
651
648
return OMPI_ERR_OUT_OF_RESOURCE ;
652
649
}
650
+ segoffsets = segsizes + p ;
653
651
654
- segsize = (count + p - 1 ) / p ; /* size of the segments */
652
+ segsize = count / p ; /* size of the segments across the last ranks.
653
+ The remainder will be evenly distributed across the smaller ranks */
655
654
656
655
segoffsets [0 ] = 0 ;
657
- for (int i = 0 , mycount = count ; i < p ; ++ i ) {
658
- mycount -= segsize ;
656
+ for (int i = 0 , mycount = count % p ; i < p ; ++ i ) {
659
657
segsizes [i ] = segsize ;
660
- if ( mycount < 0 ) {
661
- segsizes [i ] = segsize + mycount ;
662
- mycount = 0 ;
658
+ if ( mycount > 0 ) { /* We have extra segments to distribute */
659
+ segsizes [i ]++ ;
660
+ mycount -- ;
663
661
}
664
662
665
- if (i ) {
666
- segoffsets [i ] = segoffsets [i - 1 ] + segsizes [i - 1 ];
667
- }
663
+ segoffsets [i + 1 ] = segoffsets [i ] + segsizes [i ];
668
664
}
669
665
670
666
/* reduce peers */
@@ -786,28 +782,29 @@ static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datat
786
782
}
787
783
788
784
if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
789
- break ;
785
+ goto free_and_return ;
790
786
}
791
-
792
- res = NBC_Sched_recv ((char * ) recvbuf + roffset , false, segsizes [relement ], datatype , rpeer ,
793
- schedule , true);
794
- if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
795
- break ;
787
+ if ( recvbuf != sendbuf ) { /* check for MPI_IN_PLACE */
788
+ res = NBC_Sched_recv ((char * ) recvbuf + roffset , false, segsizes [relement ], datatype , rpeer ,
789
+ schedule , true);
790
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
791
+ goto free_and_return ;
792
+ }
793
+ res = NBC_Sched_op ((char * ) sendbuf + roffset , false, (char * ) recvbuf + roffset , false,
794
+ segsizes [relement ], datatype , op , schedule , true);
795
+ } else {
796
+ res = NBC_Sched_recv ((char * ) tmpbuf , false, segsizes [relement ], datatype , rpeer ,
797
+ schedule , true);
798
+ if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
799
+ goto free_and_return ;
800
+ }
801
+ res = NBC_Sched_op ((char * ) tmpbuf , false, (char * ) recvbuf + roffset , false,
802
+ segsizes [relement ], datatype , op , schedule , true);
796
803
}
797
-
798
- res = NBC_Sched_op ((char * ) sendbuf + roffset , false, (char * ) recvbuf + roffset , false,
799
- segsizes [relement ], datatype , op , schedule , true);
800
804
if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
801
- break ;
805
+ goto free_and_return ;
802
806
}
803
807
}
804
-
805
- if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
806
- free (segsizes );
807
- free (segoffsets );
808
- return res ;
809
- }
810
-
811
808
for (int round = p - 1 ; round < 2 * p - 2 ; ++ round ) {
812
809
int selement = (r + 1 - round + 2 * p /*2*p avoids negative mod*/ )%p ; /* the element I am sending */
813
810
int soffset = segoffsets [selement ]* ext ;
@@ -819,16 +816,14 @@ static inline int allred_sched_ring (int r, int p, int count, MPI_Datatype datat
819
816
if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
820
817
break ;
821
818
}
822
-
823
819
res = NBC_Sched_recv ((char * ) recvbuf + roffset , false, segsizes [relement ], datatype , rpeer ,
824
820
schedule , true);
825
821
if (OPAL_UNLIKELY (OMPI_SUCCESS != res )) {
826
822
break ;
827
823
}
828
824
}
829
-
825
+ free_and_return :
830
826
free (segsizes );
831
- free (segoffsets );
832
827
833
828
return res ;
834
829
}
0 commit comments