Skip to content

Commit c00f8b8

Browse files
authored
Merge pull request #11666 from bosilca/topic/fix_reordering_in_han_gather
Fix reordering of received data in the gather
2 parents b245ce4 + 2a93d97 commit c00f8b8

File tree

1 file changed

+14
-59
lines changed

1 file changed

+14
-59
lines changed

ompi/mca/coll/han/coll_han_gather.c

Lines changed: 14 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -189,32 +189,11 @@ mca_coll_han_gather_intra(const void *sbuf, int scount,
189189

190190
ompi_request_wait(&temp_request, MPI_STATUS_IGNORE);
191191

192-
/* Suppose, the expected message is 0 1 2 3 4 5 6 7 but the processes are
193-
* mapped on 2 nodes, for example |0 2 4 6| |1 3 5 7|. The messages from
194-
* low gather will be 0 2 4 6 and 1 3 5 7.
195-
* So the upper gather result is 0 2 4 6 1 3 5 7 which must be reordered.
196-
* The 3rd element (4) must be recopied at the 4th place. In general, the
197-
* i-th element must be recopied at the place given by the i-th entry of the
198-
* topology, which is topo[i*topolevel +1]
199-
*/
200192
/* reorder rbuf based on rank */
201193
if (w_rank == root && !han_module->is_mapbycore) {
202-
ptrdiff_t rextent;
203-
ompi_datatype_type_extent(rdtype, &rextent);
204-
for (int i = 0 ; i < w_size ; i++) {
205-
OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output,
206-
"[%d]: Han Gather copy from %d to %d\n",
207-
w_rank,
208-
i * 2 + 1,
209-
topo[i * 2 + 1]));
210-
ptrdiff_t block_size = rextent * (ptrdiff_t)rcount;
211-
ptrdiff_t src_shift = block_size * i;
212-
ptrdiff_t dest_shift = block_size * (ptrdiff_t)topo[i * 2 + 1];
213-
ompi_datatype_copy_content_same_ddt(rdtype,
214-
(ptrdiff_t)rcount,
215-
reorder_rbuf + src_shift,
216-
(char *)rbuf + dest_shift);
217-
}
194+
ompi_coll_han_reorder_gather(reorder_buf,
195+
rbuf, rcount, rdtype,
196+
comm, topo);
218197
free(reorder_buf);
219198
}
220199

@@ -227,15 +206,8 @@ int mca_coll_han_gather_lg_task(void *task_args)
227206
mca_coll_han_gather_args_t *t = (mca_coll_han_gather_args_t *) task_args;
228207
OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output, "[%d] Han Gather: lg\n",
229208
t->w_rank));
230-
ompi_datatype_t *dtype;
231-
size_t count;
232-
if (t->w_rank == t->root) {
233-
dtype = t->rdtype;
234-
count = t->rcount;
235-
} else {
236-
dtype = t->sdtype;
237-
count = t->scount;
238-
}
209+
ompi_datatype_t* dtype = (t->w_rank == t->root) ? t->rdtype : t->sdtype;
210+
size_t count = (t->w_rank == t->root) ? t->rcount : t->scount;
239211

240212
/* If the process is one of the node leader */
241213
char *tmp_buf = NULL;
@@ -299,15 +271,8 @@ int mca_coll_han_gather_ug_task(void *task_args)
299271
OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output,
300272
"[%d] Han Gather: ug noop\n", t->w_rank));
301273
} else {
302-
ompi_datatype_t *dtype;
303-
size_t count;
304-
if (t->w_rank == t->root) {
305-
dtype = t->rdtype;
306-
count = t->rcount;
307-
} else {
308-
dtype = t->sdtype;
309-
count = t->scount;
310-
}
274+
ompi_datatype_t* dtype = (t->w_rank == t->root) ? t->rdtype : t->sdtype;
275+
size_t count = (t->w_rank == t->root) ? t->rcount : t->scount;
311276

312277

313278
int low_size = ompi_comm_size(t->low_comm);
@@ -375,17 +340,9 @@ mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
375340

376341
ompi_communicator_t *low_comm = han_module->sub_comm[INTRA_NODE];
377342
ompi_communicator_t *up_comm = han_module->sub_comm[INTER_NODE];
378-
ompi_datatype_t *dtype;
379-
size_t count;
380-
381-
if (w_rank == root) {
382-
dtype = rdtype;
383-
count = rcount;
384-
} else {
385-
dtype = sdtype;
386-
count = scount;
387-
}
388343

344+
ompi_datatype_t* dtype = (w_rank == root) ? rdtype : sdtype;
345+
size_t count = (w_rank == root) ? rcount : scount;
389346

390347
/* Get the 'virtual ranks' mapping corresponding to the communicators */
391348
int *vranks = han_module->cached_vranks;
@@ -403,10 +360,10 @@ mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
403360
char *reorder_buf = NULL; // allocated memory
404361
char *reorder_buf_start = NULL; // start of the data
405362
if (w_rank == root) {
406-
if (MPI_IN_PLACE == sbuf) {
363+
if (MPI_IN_PLACE == sbuf) {
407364
ptrdiff_t rextent;
408365
ompi_datatype_type_extent(rdtype, &rextent);
409-
sbuf = rbuf + rextent * (ptrdiff_t)rcount * w_rank;
366+
sbuf = (char*)rbuf + rextent * (ptrdiff_t)rcount * w_rank;
410367
}
411368
if (han_module->is_mapbycore) {
412369
reorder_buf_start = (char *)rbuf;
@@ -507,13 +464,11 @@ ompi_coll_han_reorder_gather(const void *sbuf,
507464
int w_size = ompi_comm_size(comm);
508465
ptrdiff_t rextent;
509466
ompi_datatype_type_extent(dtype, &rextent);
467+
const ptrdiff_t block_size = rextent * (ptrdiff_t)count;
510468
for ( i = 0; i < w_size; i++ ) {
511469
OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output,
512-
"[%d]: Future reorder from %d to %d\n",
513-
w_rank,
514-
i * topolevel + 1,
515-
topo[i * topolevel + 1]));
516-
ptrdiff_t block_size = rextent * (ptrdiff_t)count;
470+
"[%d]: HAN Gather reorder from %d to %d\n",
471+
w_rank, i, topo[i * topolevel + 1]));
517472
ptrdiff_t src_shift = block_size * i;
518473
ptrdiff_t dest_shift = block_size * (ptrdiff_t)topo[i * topolevel + 1];
519474
ompi_datatype_copy_content_same_ddt(dtype,

0 commit comments

Comments
 (0)