@@ -93,12 +93,21 @@ int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
93
93
rc = MCA_PML_CALL (send (sendbuf , sendcount , sendtype , dest ,
94
94
sendtag , MCA_PML_BASE_SEND_STANDARD , comm ));
95
95
#if OPAL_ENABLE_FT_MPI
96
- /* If ULFM is enabled we need to wait for the posted receive to
97
- * complete, hence we cannot return here */
98
- rcs = rc ;
99
- #else
96
+ if (OPAL_UNLIKELY (MPI_ERR_PROC_FAILED == rc )) {
97
+ /* If this is a recoverable error (e.g., ULFM error class),
98
+ * we need to wait for the posted receive to complete so that the
99
+ * receive buffer doesn't get updated after the completion of the call.
100
+ * Hence we cannot return immediately, we need to wait on the recv
101
+ * req first. */
102
+ rcs = rc ;
103
+ }
104
+ else /* else intentionally spills outside ifdef */
105
+ #endif
106
+ /* If the error semantic does not garantee the completion of the wait on
107
+ * the recv-req for that error class, we just invoke the errhandler asap
108
+ * to avoid hanging. Note that in this case we are returning the recv
109
+ * buffer in an undefined state and the application may not recover. */
100
110
OMPI_ERRHANDLER_CHECK (rc , comm , rc , FUNC_NAME );
101
- #endif /* OPAL_ENABLE_FT_MPI */
102
111
}
103
112
104
113
if (source != MPI_PROC_NULL ) { /* wait for recv */
0 commit comments