@@ -66,12 +66,14 @@ ompi_coll_base_scatter_intra_binomial(
66
66
int root , struct ompi_communicator_t * comm ,
67
67
mca_coll_base_module_t * module )
68
68
{
69
- int line = -1 , rank , vrank , size , err ;
70
- char * ptmp , * tempbuf = NULL ;
71
- MPI_Status status ;
72
69
mca_coll_base_module_t * base_module = (mca_coll_base_module_t * )module ;
73
70
mca_coll_base_comm_t * data = base_module -> base_data ;
74
- ptrdiff_t sextent , rextent , ssize , rsize , sgap = 0 , rgap = 0 ;
71
+ int line = -1 , rank , vrank , size , err , packed_size , curr_count ;
72
+ char * ptmp , * tempbuf = NULL ;
73
+ size_t max_data , packed_sizet ;
74
+ opal_convertor_t convertor ;
75
+ ptrdiff_t sextent ;
76
+ MPI_Status status ;
75
77
76
78
size = ompi_comm_size (comm );
77
79
rank = ompi_comm_rank (comm );
@@ -89,99 +91,95 @@ ompi_coll_base_scatter_intra_binomial(
89
91
vrank = (rank - root + size ) % size ;
90
92
ptmp = (char * )rbuf ; /* by default suppose leaf nodes, just use rbuf */
91
93
92
- if (rank == root ) {
94
+ if ( vrank % 2 ) { /* leaves */
95
+ /* recv from parent on leaf nodes */
96
+ err = MCA_PML_CALL (recv (rbuf , rcount , rdtype , bmtree -> tree_prev ,
97
+ MCA_COLL_BASE_TAG_SCATTER , comm , & status ));
98
+ if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
99
+ return MPI_SUCCESS ;
100
+
101
+ }
102
+ OBJ_CONSTRUCT ( & convertor , opal_convertor_t );
103
+ if (rank == root ) { /* root and non-leafs */
93
104
ompi_datatype_type_extent (sdtype , & sextent );
94
- ssize = opal_datatype_span (& sdtype -> super , (int64_t )scount * size , & sgap );
95
- if (0 == root ) {
96
- /* root on 0, just use the send buffer */
97
- ptmp = (char * )sbuf ;
98
- if (rbuf != MPI_IN_PLACE ) {
99
- /* local copy to rbuf */
100
- err = ompi_datatype_sndrcv (sbuf , scount , sdtype ,
101
- rbuf , rcount , rdtype );
102
- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
103
- }
104
- } else {
105
- /* root is not on 0, allocate temp buffer for send */
106
- tempbuf = (char * )malloc (ssize );
105
+ ptmp = (char * )sbuf ; /* if root == 0, just use the send buffer */
106
+ if (0 != root ) {
107
+ opal_convertor_copy_and_prepare_for_send ( ompi_mpi_local_convertor , & (sdtype -> super ),
108
+ scount * size , sbuf , 0 , & convertor );
109
+ opal_convertor_get_packed_size ( & convertor , & packed_sizet );
110
+ packed_size = (int )packed_sizet ;
111
+ packed_sizet = packed_sizet / size ;
112
+ ptmp = tempbuf = (char * )malloc (packed_size );
107
113
if (NULL == tempbuf ) {
108
114
err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto err_hndl ;
109
115
}
110
- ptmp = tempbuf - sgap ;
111
-
112
- /* and rotate data so they will eventually in the right place */
113
- err = ompi_datatype_copy_content_same_ddt (sdtype , (ptrdiff_t )scount * (ptrdiff_t )(size - root ),
114
- ptmp , (char * ) sbuf + sextent * (ptrdiff_t )root * (ptrdiff_t )scount );
115
- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
116
-
117
- err = ompi_datatype_copy_content_same_ddt (sdtype , (ptrdiff_t )scount * (ptrdiff_t )root ,
118
- ptmp + sextent * (ptrdiff_t )scount * (ptrdiff_t )(size - root ), (char * )sbuf );
119
- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
120
-
121
- if (rbuf != MPI_IN_PLACE ) {
122
- /* local copy to rbuf */
123
- err = ompi_datatype_sndrcv (ptmp , scount , sdtype ,
124
- rbuf , rcount , rdtype );
125
- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
126
- }
116
+ /* rotate data so they will eventually be in the right place */
117
+ struct iovec iov [1 ];
118
+ uint32_t iov_size = 1 ;
119
+
120
+ iov [0 ].iov_base = ptmp + (ptrdiff_t )(size - root ) * packed_sizet ;
121
+ iov [0 ].iov_len = max_data = packed_sizet * (ptrdiff_t )root ;
122
+ opal_convertor_pack (& convertor , iov , & iov_size , & max_data );
123
+
124
+ iov [0 ].iov_base = ptmp ;
125
+ iov [0 ].iov_len = max_data = packed_sizet * (ptrdiff_t )(size - root );
126
+ opal_convertor_pack (& convertor , iov , & iov_size , & max_data );
127
+ OBJ_DESTRUCT (& convertor );
128
+
129
+ sdtype = MPI_PACKED ;
130
+ sextent = 1 ; /* bytes */
131
+ scount = packed_size / size ;
127
132
}
128
- } else if (!(vrank % 2 )) {
129
- /* non-root, non-leaf nodes, allocate temp buffer for recv
130
- * the most we need is rcount*size/2 */
131
- ompi_datatype_type_extent (rdtype , & rextent );
132
- rsize = opal_datatype_span (& rdtype -> super , (int64_t )rcount * size , & rgap );
133
- tempbuf = (char * )malloc (rsize / 2 );
133
+ curr_count = scount * size ;
134
+ } else { /* (!(vrank % 2)) */
135
+ opal_convertor_copy_and_prepare_for_send ( ompi_mpi_local_convertor , & (rdtype -> super ),
136
+ rcount , NULL , 0 , & convertor );
137
+ opal_convertor_get_packed_size ( & convertor , & packed_sizet );
138
+ scount = (int )packed_sizet ;
139
+
140
+ sdtype = MPI_PACKED ; /* default to MPI_PACKED as the send type */
141
+ packed_size = scount * (size + 1 )/2 ; /* non-root, non-leaf nodes, allocate temp buffer for recv
142
+ * the most we need is rcount*size/2 */
143
+ ptmp = tempbuf = (char * )malloc (packed_size );
134
144
if (NULL == tempbuf ) {
135
145
err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto err_hndl ;
136
146
}
137
- ptmp = tempbuf - rgap ;
138
- sdtype = rdtype ;
139
- scount = rcount ;
140
- sextent = rextent ;
141
- }
142
147
143
- int curr_count = (rank == root ) ? scount * size : 0 ;
144
- if (!(vrank % 2 )) {
145
- if (rank != root ) {
146
- /* recv from parent on non-root */
147
- err = MCA_PML_CALL (recv (ptmp , (ptrdiff_t )rcount * (ptrdiff_t )size , rdtype , bmtree -> tree_prev ,
148
- MCA_COLL_BASE_TAG_SCATTER , comm , & status ));
149
- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
150
-
151
- /* Get received count */
152
- size_t rdtype_size ;
153
- ompi_datatype_type_size (rdtype , & rdtype_size );
154
- curr_count = (int )(status ._ucount / rdtype_size );
155
-
156
- /* local copy to rbuf */
157
- err = ompi_datatype_sndrcv (ptmp , scount , sdtype ,
158
- rbuf , rcount , rdtype );
159
- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
160
- }
161
- /* send to children on all non-leaf */
162
- for (int i = bmtree -> tree_nextsize - 1 ; i >= 0 ; i -- ) {
163
- /* figure out how much data I have to send to this child */
164
- int vchild = (bmtree -> tree_next [i ] - root + size ) % size ;
165
- int send_count = vchild - vrank ;
166
- if (send_count > size - vchild )
167
- send_count = size - vchild ;
168
- send_count *= scount ;
169
- err = MCA_PML_CALL (send (ptmp + (ptrdiff_t )(curr_count - send_count ) * sextent ,
170
- send_count , sdtype , bmtree -> tree_next [i ],
171
- MCA_COLL_BASE_TAG_SCATTER ,
172
- MCA_PML_BASE_SEND_STANDARD , comm ));
173
- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
174
- curr_count -= send_count ;
175
- }
176
- if (NULL != tempbuf )
177
- free (tempbuf );
178
- } else {
179
- /* recv from parent on leaf nodes */
180
- err = MCA_PML_CALL (recv (ptmp , rcount , rdtype , bmtree -> tree_prev ,
148
+ /* recv from parent on non-root */
149
+ err = MCA_PML_CALL (recv (ptmp , (ptrdiff_t )packed_size , MPI_PACKED , bmtree -> tree_prev ,
181
150
MCA_COLL_BASE_TAG_SCATTER , comm , & status ));
182
151
if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
152
+
153
+ /* Get received count */
154
+ curr_count = (int )status ._ucount ; /* no need for conversion, work in bytes */
155
+ sextent = 1 ; /* bytes */
183
156
}
184
157
158
+ if (rbuf != MPI_IN_PLACE ) { /* local copy to rbuf */
159
+ err = ompi_datatype_sndrcv (ptmp , scount , sdtype ,
160
+ rbuf , rcount , rdtype );
161
+ if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
162
+ }
163
+
164
+ /* send to children on all non-leaf */
165
+ for (int i = bmtree -> tree_nextsize - 1 ; i >= 0 ; i -- ) {
166
+ /* figure out how much data I have to send to this child */
167
+ int vchild = (bmtree -> tree_next [i ] - root + size ) % size ;
168
+ int send_count = vchild - vrank ;
169
+ if (send_count > size - vchild )
170
+ send_count = size - vchild ;
171
+ send_count *= scount ;
172
+
173
+ err = MCA_PML_CALL (send (ptmp + (ptrdiff_t )(curr_count - send_count ) * sextent ,
174
+ send_count , sdtype , bmtree -> tree_next [i ],
175
+ MCA_COLL_BASE_TAG_SCATTER ,
176
+ MCA_PML_BASE_SEND_STANDARD , comm ));
177
+ if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
178
+ curr_count -= send_count ;
179
+ }
180
+ if (NULL != tempbuf )
181
+ free (tempbuf );
182
+
185
183
return MPI_SUCCESS ;
186
184
187
185
err_hndl :
0 commit comments