@@ -172,7 +172,10 @@ mca_coll_han_reduce_intra(const void *sbuf,
172
172
mca_coll_task_t * t_next_seg = OBJ_NEW (mca_coll_task_t );
173
173
/* Setup up t_next_seg task arguments */
174
174
t -> cur_task = t_next_seg ;
175
- t -> sbuf = (char * ) t -> sbuf + extent * t -> seg_count ;
175
+ if (t -> sbuf != MPI_IN_PLACE ) {
176
+ t -> sbuf = (char * ) t -> sbuf + extent * t -> seg_count ;
177
+ }
178
+
176
179
if (up_rank == root_up_rank ) {
177
180
t -> rbuf = (char * ) t -> rbuf + extent * t -> seg_count ;
178
181
}
@@ -242,6 +245,7 @@ int mca_coll_han_reduce_t1_task(void *task_args) {
242
245
if (next_seg <= t -> num_segments - 1 ) {
243
246
int tmp_count = t -> seg_count ;
244
247
char * tmp_rbuf = NULL ;
248
+ char * tmp_sbuf = NULL ;
245
249
if (next_seg == t -> num_segments - 1 && t -> last_seg_count != t -> seg_count ) {
246
250
tmp_count = t -> last_seg_count ;
247
251
}
@@ -250,7 +254,10 @@ int mca_coll_han_reduce_t1_task(void *task_args) {
250
254
} else if (NULL != t -> rbuf ) {
251
255
tmp_rbuf = (char * )t -> rbuf + extent * t -> seg_count ;
252
256
}
253
- t -> low_comm -> c_coll -> coll_reduce ((char * ) t -> sbuf + extent * t -> seg_count ,
257
+
258
+ tmp_sbuf = (t -> sbuf == MPI_IN_PLACE ) ? MPI_IN_PLACE : (char * )t -> sbuf + extent * t -> seg_count ;
259
+
260
+ t -> low_comm -> c_coll -> coll_reduce ((char * ) tmp_sbuf ,
254
261
(char * ) tmp_rbuf , tmp_count ,
255
262
t -> dtype , t -> op , t -> root_low_rank , t -> low_comm ,
256
263
t -> low_comm -> c_coll -> coll_reduce_module );
0 commit comments