Skip to content

Commit 6510f64

Browse files
Merge pull request #8875 from ggouaillardet/topic/sendrecv_replace_big
MPI_Sendrecv_replace: correctly handle large data
2 parents 5a2c9ed + 6a11873 commit 6510f64

File tree

1 file changed

+44
-11
lines changed

1 file changed

+44
-11
lines changed

ompi/mpi/c/sendrecv_replace.c

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
13-
* Copyright (c) 2015 Research Organization for Information Science
14-
* and Technology (RIST). All rights reserved.
13+
* Copyright (c) 2015-2021 Research Organization for Information Science
14+
* and Technology (RIST). All rights reserved.
1515
* Copyright (c) 2017 IBM Corporation. All rights reserved.
1616
* $COPYRIGHT$
1717
*
@@ -47,7 +47,11 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype,
4747
MPI_Comm comm, MPI_Status *status)
4848

4949
{
50+
ompi_request_t* req;
5051
int rc = MPI_SUCCESS;
52+
#if OPAL_ENABLE_FT_MPI
53+
int rcs = MPI_SUCCESS;
54+
#endif
5155

5256
SPC_RECORD(OMPI_SPC_SENDRECV_REPLACE, 1);
5357

@@ -104,7 +108,6 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype,
104108
struct iovec iov = { .iov_base = packed_data, .iov_len = sizeof(packed_data) };
105109
size_t packed_size, max_data;
106110
uint32_t iov_count;
107-
ompi_status_public_t recv_status;
108111
ompi_proc_t* proc = ompi_comm_peer_lookup(comm, dest);
109112
if(proc == NULL) {
110113
rc = MPI_ERR_RANK;
@@ -116,7 +119,7 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype,
116119
opal_convertor_copy_and_prepare_for_send( proc->super.proc_convertor, &(datatype->super),
117120
count, buf, 0, &convertor );
118121

119-
/* setup a buffer for recv */
122+
/* setup a temporary buffer to send */
120123
opal_convertor_get_packed_size( &convertor, &packed_size );
121124
if( packed_size > sizeof(packed_data) ) {
122125
rc = PMPI_Alloc_mem(packed_size, MPI_INFO_NULL, &iov.iov_base);
@@ -130,15 +133,45 @@ int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype,
130133
iov_count = 1;
131134
rc = opal_convertor_pack(&convertor, &iov, &iov_count, &max_data);
132135

133-
/* recv into temporary buffer */
134-
rc = PMPI_Sendrecv( iov.iov_base, packed_size, MPI_PACKED, dest, sendtag, buf, count,
135-
datatype, source, recvtag, comm, &recv_status );
136+
/* receive into the buffer */
137+
rc = MCA_PML_CALL(irecv(buf, count, datatype,
138+
source, recvtag, comm, &req));
139+
if(OMPI_SUCCESS != rc) {
140+
goto cleanup_and_return;
141+
}
136142

137-
cleanup_and_return:
138-
/* return status to user */
139-
if(status != MPI_STATUS_IGNORE) {
140-
*status = recv_status;
143+
/* send from the temporary buffer */
144+
rc = MCA_PML_CALL(send(iov.iov_base, packed_size, MPI_PACKED, dest,
145+
sendtag, MCA_PML_BASE_SEND_STANDARD, comm));
146+
#if OPAL_ENABLE_FT_MPI
147+
/* If ULFM is enabled we need to wait for the posted receive to
148+
* complete, hence we cannot return here */
149+
rcs = rc;
150+
#else
151+
if(OMPI_SUCCESS != rc) {
152+
goto cleanup_and_return;
153+
}
154+
#endif /* OPAL_ENABLE_FT_MPI */
155+
156+
rc = ompi_request_wait(&req, status);
157+
#if OPAL_ENABLE_FT_MPI
158+
/* Sendrecv_replace never returns ERR_PROC_FAILED_PENDING because it is
159+
* blocking. Lets complete now that irecv and promote the error
160+
* to ERR_PROC_FAILED */
161+
if( OPAL_UNLIKELY(MPI_ERR_PROC_FAILED_PENDING == rc) ) {
162+
ompi_request_cancel(req);
163+
ompi_request_wait(&req, MPI_STATUS_IGNORE);
164+
rc = MPI_ERR_PROC_FAILED;
141165
}
166+
#endif
167+
168+
#if OPAL_ENABLE_FT_MPI
169+
if( OPAL_UNLIKELY(MPI_SUCCESS != rcs && MPI_SUCCESS == rc) ) {
170+
rc = rcs;
171+
}
172+
#endif
173+
174+
cleanup_and_return:
142175

143176
/* release resources */
144177
if(packed_size > sizeof(packed_data)) {

0 commit comments

Comments
 (0)