Skip to content

Commit 71ce6aa

Browse files
author
William Zhang
committed
coll/han: Fix simple gather in place
The simple path did not take in the changes of the non-simple path for MPI_IN_PLACE and this caused data corruption. Signed-off-by: William Zhang <wilzhang.amazon.com>
1 parent ca3a7d9 commit 71ce6aa

File tree

1 file changed

+15
-12
lines changed

1 file changed

+15
-12
lines changed

ompi/mca/coll/han/coll_han_gather.c

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -251,18 +251,16 @@ int mca_coll_han_gather_lg_task(void *task_args)
251251
&rgap);
252252
tmp_buf = (char *) malloc(rsize);
253253
tmp_rbuf = tmp_buf - rgap;
254-
if (t->w_rank == t->root) {
255-
if (MPI_IN_PLACE == t->sbuf) {
256-
ptrdiff_t rextent;
257-
ompi_datatype_type_extent(dtype, &rextent);
258-
ptrdiff_t block_size = rextent * (ptrdiff_t)count;
259-
ptrdiff_t src_shift = block_size * t->w_rank;
260-
ptrdiff_t dest_shift = block_size * low_rank;
261-
ompi_datatype_copy_content_same_ddt(dtype,
262-
(ptrdiff_t)count,
263-
tmp_rbuf + dest_shift,
264-
(char *)t->rbuf + src_shift);
265-
}
254+
if (t->w_rank == t->root && MPI_IN_PLACE == t->sbuf) {
255+
ptrdiff_t rextent;
256+
ompi_datatype_type_extent(dtype, &rextent);
257+
ptrdiff_t block_size = rextent * (ptrdiff_t)count;
258+
ptrdiff_t src_shift = block_size * t->w_rank;
259+
ptrdiff_t dest_shift = block_size * low_rank;
260+
ompi_datatype_copy_content_same_ddt(dtype,
261+
(ptrdiff_t)count,
262+
tmp_rbuf + dest_shift,
263+
(char *)t->rbuf + src_shift);
266264
}
267265
}
268266

@@ -405,6 +403,11 @@ mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
405403
char *reorder_buf = NULL; // allocated memory
406404
char *reorder_buf_start = NULL; // start of the data
407405
if (w_rank == root) {
406+
if (MPI_IN_PLACE == sbuf) {
407+
ptrdiff_t rextent;
408+
ompi_datatype_type_extent(rdtype, &rextent);
409+
sbuf = rbuf + rextent * (ptrdiff_t)rcount * w_rank;
410+
}
408411
if (han_module->is_mapbycore) {
409412
reorder_buf_start = (char *)rbuf;
410413
} else {

0 commit comments

Comments
 (0)