39
39
#include "ompi/mca/coll/base/coll_base_functions.h"
40
40
#include "coll_base_topo.h"
41
41
#include "coll_base_util.h"
42
+ #include "opal/util/minmax.h"
42
43
43
44
/*
44
45
* We want to minimize the amount of temporary memory needed while allowing as many ranks
45
46
* to exchange data simultaneously. We use a variation of the ring algorithm, where in a
46
- * single step a process echange the data with both neighbors at distance k (on the left
47
+ * single step a process exchange the data with both neighbors at distance k (on the left
47
48
* and the right on a logical ring topology). With this approach we need to pack the data
48
49
* for a single of the two neighbors, as we can then use the original buffer (and datatype
49
50
* and count) to send the data to the other.
@@ -58,16 +59,22 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
58
59
ptrdiff_t extent ;
59
60
ompi_request_t * req = MPI_REQUEST_NULL ;
60
61
char * tmp_buffer ;
61
- size_t packed_size = 0 , max_size ;
62
+ size_t packed_size = 0 , max_size , type_size ;
62
63
opal_convertor_t convertor ;
63
64
64
65
/* Initialize. */
65
66
66
67
size = ompi_comm_size (comm );
67
68
rank = ompi_comm_rank (comm );
69
+ ompi_datatype_type_size (rdtype , & type_size );
68
70
69
- ompi_datatype_type_size (rdtype , & max_size );
70
- max_size *= rcounts [rank ];
71
+ for (i = 0 , max_size = 0 ; i < size ; ++ i ) {
72
+ if (i == rank ) {
73
+ continue ;
74
+ }
75
+ packed_size = rcounts [i ] * type_size ;
76
+ max_size = opal_max (packed_size , max_size );
77
+ }
71
78
72
79
/* Easy way out */
73
80
if ((1 == size ) || (0 == max_size ) ) {
0 commit comments