@@ -189,32 +189,11 @@ mca_coll_han_gather_intra(const void *sbuf, int scount,
189
189
190
190
ompi_request_wait (& temp_request , MPI_STATUS_IGNORE );
191
191
192
- /* Suppose, the expected message is 0 1 2 3 4 5 6 7 but the processes are
193
- * mapped on 2 nodes, for example |0 2 4 6| |1 3 5 7|. The messages from
194
- * low gather will be 0 2 4 6 and 1 3 5 7.
195
- * So the upper gather result is 0 2 4 6 1 3 5 7 which must be reordered.
196
- * The 3rd element (4) must be recopied at the 4th place. In general, the
197
- * i-th element must be recopied at the place given by the i-th entry of the
198
- * topology, which is topo[i*topolevel +1]
199
- */
200
192
/* reorder rbuf based on rank */
201
193
if (w_rank == root && !han_module -> is_mapbycore ) {
202
- ptrdiff_t rextent ;
203
- ompi_datatype_type_extent (rdtype , & rextent );
204
- for (int i = 0 ; i < w_size ; i ++ ) {
205
- OPAL_OUTPUT_VERBOSE ((30 , mca_coll_han_component .han_output ,
206
- "[%d]: Han Gather copy from %d to %d\n" ,
207
- w_rank ,
208
- i * 2 + 1 ,
209
- topo [i * 2 + 1 ]));
210
- ptrdiff_t block_size = rextent * (ptrdiff_t )rcount ;
211
- ptrdiff_t src_shift = block_size * i ;
212
- ptrdiff_t dest_shift = block_size * (ptrdiff_t )topo [i * 2 + 1 ];
213
- ompi_datatype_copy_content_same_ddt (rdtype ,
214
- (ptrdiff_t )rcount ,
215
- reorder_rbuf + src_shift ,
216
- (char * )rbuf + dest_shift );
217
- }
194
+ ompi_coll_han_reorder_gather (reorder_buf ,
195
+ rbuf , rcount , rdtype ,
196
+ comm , topo );
218
197
free (reorder_buf );
219
198
}
220
199
@@ -227,15 +206,8 @@ int mca_coll_han_gather_lg_task(void *task_args)
227
206
mca_coll_han_gather_args_t * t = (mca_coll_han_gather_args_t * ) task_args ;
228
207
OPAL_OUTPUT_VERBOSE ((30 , mca_coll_han_component .han_output , "[%d] Han Gather: lg\n" ,
229
208
t -> w_rank ));
230
- ompi_datatype_t * dtype ;
231
- size_t count ;
232
- if (t -> w_rank == t -> root ) {
233
- dtype = t -> rdtype ;
234
- count = t -> rcount ;
235
- } else {
236
- dtype = t -> sdtype ;
237
- count = t -> scount ;
238
- }
209
+ ompi_datatype_t * dtype = (t -> w_rank == t -> root ) ? t -> rdtype : t -> sdtype ;
210
+ size_t count = (t -> w_rank == t -> root ) ? t -> rcount : t -> scount ;
239
211
240
212
/* If the process is one of the node leader */
241
213
char * tmp_buf = NULL ;
@@ -299,15 +271,8 @@ int mca_coll_han_gather_ug_task(void *task_args)
299
271
OPAL_OUTPUT_VERBOSE ((30 , mca_coll_han_component .han_output ,
300
272
"[%d] Han Gather: ug noop\n" , t -> w_rank ));
301
273
} else {
302
- ompi_datatype_t * dtype ;
303
- size_t count ;
304
- if (t -> w_rank == t -> root ) {
305
- dtype = t -> rdtype ;
306
- count = t -> rcount ;
307
- } else {
308
- dtype = t -> sdtype ;
309
- count = t -> scount ;
310
- }
274
+ ompi_datatype_t * dtype = (t -> w_rank == t -> root ) ? t -> rdtype : t -> sdtype ;
275
+ size_t count = (t -> w_rank == t -> root ) ? t -> rcount : t -> scount ;
311
276
312
277
313
278
int low_size = ompi_comm_size (t -> low_comm );
@@ -375,17 +340,9 @@ mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
375
340
376
341
ompi_communicator_t * low_comm = han_module -> sub_comm [INTRA_NODE ];
377
342
ompi_communicator_t * up_comm = han_module -> sub_comm [INTER_NODE ];
378
- ompi_datatype_t * dtype ;
379
- size_t count ;
380
-
381
- if (w_rank == root ) {
382
- dtype = rdtype ;
383
- count = rcount ;
384
- } else {
385
- dtype = sdtype ;
386
- count = scount ;
387
- }
388
343
344
+ ompi_datatype_t * dtype = (w_rank == root ) ? rdtype : sdtype ;
345
+ size_t count = (w_rank == root ) ? rcount : scount ;
389
346
390
347
/* Get the 'virtual ranks' mapping corresponding to the communicators */
391
348
int * vranks = han_module -> cached_vranks ;
@@ -403,10 +360,10 @@ mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
403
360
char * reorder_buf = NULL ; // allocated memory
404
361
char * reorder_buf_start = NULL ; // start of the data
405
362
if (w_rank == root ) {
406
- if (MPI_IN_PLACE == sbuf ) {
363
+ if (MPI_IN_PLACE == sbuf ) {
407
364
ptrdiff_t rextent ;
408
365
ompi_datatype_type_extent (rdtype , & rextent );
409
- sbuf = rbuf + rextent * (ptrdiff_t )rcount * w_rank ;
366
+ sbuf = ( char * ) rbuf + rextent * (ptrdiff_t )rcount * w_rank ;
410
367
}
411
368
if (han_module -> is_mapbycore ) {
412
369
reorder_buf_start = (char * )rbuf ;
@@ -507,13 +464,11 @@ ompi_coll_han_reorder_gather(const void *sbuf,
507
464
int w_size = ompi_comm_size (comm );
508
465
ptrdiff_t rextent ;
509
466
ompi_datatype_type_extent (dtype , & rextent );
467
+ const ptrdiff_t block_size = rextent * (ptrdiff_t )count ;
510
468
for ( i = 0 ; i < w_size ; i ++ ) {
511
469
OPAL_OUTPUT_VERBOSE ((30 , mca_coll_han_component .han_output ,
512
- "[%d]: Future reorder from %d to %d\n" ,
513
- w_rank ,
514
- i * topolevel + 1 ,
515
- topo [i * topolevel + 1 ]));
516
- ptrdiff_t block_size = rextent * (ptrdiff_t )count ;
470
+ "[%d]: HAN Gather reorder from %d to %d\n" ,
471
+ w_rank , i , topo [i * topolevel + 1 ]));
517
472
ptrdiff_t src_shift = block_size * i ;
518
473
ptrdiff_t dest_shift = block_size * (ptrdiff_t )topo [i * topolevel + 1 ];
519
474
ompi_datatype_copy_content_same_ddt (dtype ,
0 commit comments