Skip to content

Commit d6c573d

Browse files
authored
Merge pull request #9128 from abouteiller/bugfix/err-not-in-status
Correct some operations modifying status.MPI_ERROR when it is disallowed
2 parents 2f1bd62 + 3797220 commit d6c573d

21 files changed

+155
-118
lines changed

ompi/mca/mtl/ofi/mtl_ofi.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
* Copyright (c) 2018-2020 Amazon.com, Inc. or its affiliates. All rights
88
* reserved.
99
* Copyright (c) 2021 Cisco Systems, Inc. All rights reserved
10+
* Copyright (c) 2021 The University of Tennessee and The University
11+
* of Tennessee Research Foundation. All rights
12+
* reserved.
1013
* $COPYRIGHT$
1114
*
1215
* Additional copyrights may follow
@@ -1214,7 +1217,7 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
12141217
*flag = ofi_req.match_state;
12151218
if (1 == *flag) {
12161219
if (MPI_STATUS_IGNORE != status) {
1217-
*status = ofi_req.status;
1220+
OMPI_COPY_STATUS(status, ofi_req.status, false);
12181221
}
12191222
}
12201223

@@ -1306,7 +1309,7 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
13061309
*matched = ofi_req->match_state;
13071310
if (1 == *matched) {
13081311
if (MPI_STATUS_IGNORE != status) {
1309-
*status = ofi_req->status;
1312+
OMPI_COPY_STATUS(status, ofi_req->status, false);
13101313
}
13111314

13121315
(*message) = ompi_message_alloc();

ompi/mca/mtl/portals4/mtl_portals4_probe.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2010 The University of Tennessee and The University
5+
* Copyright (c) 2004-2021 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -120,7 +120,9 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl,
120120

121121
*flag = request.found_match;
122122
if (1 == *flag) {
123-
*status = request.status;
123+
if (MPI_STATUS_IGNORE != status) {
124+
OMPI_COPY_STATUS(status, request.status, false);
125+
}
124126
}
125127

126128
return OMPI_SUCCESS;
@@ -198,7 +200,9 @@ ompi_mtl_portals4_improbe(struct mca_mtl_base_module_t *mtl,
198200

199201
*matched = request.found_match;
200202
if (1 == *matched) {
201-
*status = request.status;
203+
if (MPI_STATUS_IGNORE != status) {
204+
OMPI_COPY_STATUS(status, request.status, false);
205+
}
202206

203207
(*message) = ompi_message_alloc();
204208
if (NULL == (*message)) {

ompi/mca/pml/cm/pml_cm.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* Copyright (c) 2004-2006 The Regents of the University of California.
33
* All rights reserved.
4-
* Copyright (c) 2004-2007 The University of Tennessee and The University
4+
* Copyright (c) 2004-2021 The University of Tennessee and The University
55
* of Tennessee Research Foundation. All rights
66
* reserved.
77
* Copyright (c) 2015 Research Organization for Information Science
@@ -212,8 +212,8 @@ mca_pml_cm_recv(void *addr,
212212

213213
ompi_request_wait_completion(&req.req_ompi);
214214

215-
if (NULL != status) { /* return status */
216-
*status = req.req_ompi.req_status;
215+
if (MPI_STATUS_IGNORE != status) {
216+
OMPI_COPY_STATUS(status, req.req_ompi.req_status, false);
217217
}
218218
ret = req.req_ompi.req_status.MPI_ERROR;
219219
OBJ_DESTRUCT(&convertor);
@@ -548,8 +548,8 @@ mca_pml_cm_mrecv(void *buf,
548548

549549
ompi_request_wait_completion(&recvreq->req_base.req_ompi);
550550

551-
if (NULL != status) { /* return status */
552-
*status = recvreq->req_base.req_ompi.req_status;
551+
if (MPI_STATUS_IGNORE != status) {
552+
OMPI_COPY_STATUS(status, recvreq->req_base.req_ompi.req_status, false);
553553
}
554554
ret = recvreq->req_base.req_ompi.req_status.MPI_ERROR;
555555
ompi_request_free( (ompi_request_t**)&recvreq );

ompi/mca/pml/ob1/pml_ob1_iprobe.c

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2016 The University of Tennessee and The University
5+
* Copyright (c) 2004-2021 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -40,10 +40,10 @@ int mca_pml_ob1_iprobe(int src,
4040
MCA_PML_OB1_RECV_REQUEST_START(&recvreq);
4141

4242
if( REQUEST_COMPLETE( &(recvreq.req_recv.req_base.req_ompi)) ) {
43-
if( NULL != status ) {
44-
*status = recvreq.req_recv.req_base.req_ompi.req_status;
45-
}
4643
rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR;
44+
if( MPI_STATUS_IGNORE != status ) {
45+
OMPI_COPY_STATUS(status, recvreq.req_recv.req_base.req_ompi.req_status, false);
46+
}
4747
*matched = 1;
4848
} else {
4949
*matched = 0;
@@ -71,8 +71,8 @@ int mca_pml_ob1_probe(int src,
7171

7272
ompi_request_wait_completion(&recvreq.req_recv.req_base.req_ompi);
7373
rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR;
74-
if (NULL != status) {
75-
*status = recvreq.req_recv.req_base.req_ompi.req_status;
74+
if( MPI_STATUS_IGNORE != status ) {
75+
OMPI_COPY_STATUS(status, recvreq.req_recv.req_base.req_ompi.req_status, false);
7676
}
7777

7878
MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv );
@@ -107,17 +107,16 @@ mca_pml_ob1_improbe(int src,
107107
MCA_PML_OB1_RECV_REQUEST_START(recvreq);
108108

109109
if( REQUEST_COMPLETE( &(recvreq->req_recv.req_base.req_ompi)) ) {
110-
if( NULL != status ) {
111-
*status = recvreq->req_recv.req_base.req_ompi.req_status;
110+
rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
111+
if( MPI_STATUS_IGNORE != status ) {
112+
OMPI_COPY_STATUS(status, recvreq->req_recv.req_base.req_ompi.req_status, false);
112113
}
113114
*matched = 1;
114115

115116
(*message)->comm = comm;
116117
(*message)->req_ptr = recvreq;
117118
(*message)->peer = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
118119
(*message)->count = recvreq->req_recv.req_base.req_ompi.req_status._ucount;
119-
120-
rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
121120
} else {
122121
*matched = 0;
123122

@@ -162,9 +161,8 @@ mca_pml_ob1_mprobe(int src,
162161

163162
ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi);
164163
rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
165-
166-
if( NULL != status ) {
167-
*status = recvreq->req_recv.req_base.req_ompi.req_status;
164+
if( MPI_STATUS_IGNORE != status ) {
165+
OMPI_COPY_STATUS(status, recvreq->req_recv.req_base.req_ompi.req_status, false);
168166
}
169167

170168
if( OMPI_SUCCESS == rc ) {

ompi/mca/pml/ob1/pml_ob1_irecv.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2016 The University of Tennessee and The University
6+
* Copyright (c) 2004-2021 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -145,8 +145,8 @@ int mca_pml_ob1_recv(void *addr,
145145
);
146146
}
147147

148-
if (NULL != status) { /* return status */
149-
*status = recvreq->req_recv.req_base.req_ompi.req_status;
148+
if (MPI_STATUS_IGNORE != status) {
149+
OMPI_COPY_STATUS(status, recvreq->req_recv.req_base.req_ompi.req_status, false);
150150
}
151151

152152
rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
@@ -362,8 +362,8 @@ mca_pml_ob1_mrecv( void *buf,
362362

363363
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
364364

365-
if (NULL != status) { /* return status */
366-
*status = recvreq->req_recv.req_base.req_ompi.req_status;
365+
if (MPI_STATUS_IGNORE != status) {
366+
OMPI_COPY_STATUS(status, recvreq->req_recv.req_base.req_ompi.req_status, false);
367367
}
368368
rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
369369
#if OPAL_ENABLE_FT_MPI

ompi/mca/pml/ucx/pml_ucx.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (C) 2001-2011 Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
3-
* Copyright (c) 2016-2020 The University of Tennessee and The University
3+
* Copyright (c) 2016-2021 The University of Tennessee and The University
44
* of Tennessee Research Foundation. All rights
55
* reserved.
66
* Copyright (c) 2018-2019 Research Organization for Information Science
@@ -633,7 +633,7 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src
633633
MCA_COMMON_UCX_PROGRESS_LOOP(ompi_pml_ucx.ucp_worker) {
634634
status = ucp_request_test(req, &info);
635635
if (status != UCS_INPROGRESS) {
636-
result = mca_pml_ucx_set_recv_status_safe(mpi_status, status, &info);
636+
result = mca_pml_ucx_set_recv_status_public(mpi_status, status, &info);
637637

638638
#if SPC_ENABLE == 1
639639
size_t dt_size;
@@ -977,7 +977,7 @@ int mca_pml_ucx_iprobe(int src, int tag, struct ompi_communicator_t* comm,
977977
0, &info);
978978
if (ucp_msg != NULL) {
979979
*matched = 1;
980-
mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info);
980+
mca_pml_ucx_set_recv_status_public(mpi_status, UCS_OK, &info);
981981
} else {
982982
(++progress_count % opal_common_ucx.progress_iterations) ?
983983
(void)ucp_worker_progress(ompi_pml_ucx.ucp_worker) : opal_progress();
@@ -1001,7 +1001,7 @@ int mca_pml_ucx_probe(int src, int tag, struct ompi_communicator_t* comm,
10011001
ucp_msg = ucp_tag_probe_nb(ompi_pml_ucx.ucp_worker, ucp_tag,
10021002
ucp_tag_mask, 0, &info);
10031003
if (ucp_msg != NULL) {
1004-
mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info);
1004+
mca_pml_ucx_set_recv_status_public(mpi_status, UCS_OK, &info);
10051005
return OMPI_SUCCESS;
10061006
}
10071007
}
@@ -1026,7 +1026,7 @@ int mca_pml_ucx_improbe(int src, int tag, struct ompi_communicator_t* comm,
10261026
PML_UCX_MESSAGE_NEW(comm, ucp_msg, &info, message);
10271027
PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, (void*)ucp_msg);
10281028
*matched = 1;
1029-
mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info);
1029+
mca_pml_ucx_set_recv_status_public(mpi_status, UCS_OK, &info);
10301030
} else {
10311031
(++progress_count % opal_common_ucx.progress_iterations) ?
10321032
(void)ucp_worker_progress(ompi_pml_ucx.ucp_worker) : opal_progress();
@@ -1052,7 +1052,7 @@ int mca_pml_ucx_mprobe(int src, int tag, struct ompi_communicator_t* comm,
10521052
if (ucp_msg != NULL) {
10531053
PML_UCX_MESSAGE_NEW(comm, ucp_msg, &info, message);
10541054
PML_UCX_VERBOSE(8, "got message %p (%p)", (void*)*message, (void*)ucp_msg);
1055-
mca_pml_ucx_set_recv_status_safe(mpi_status, UCS_OK, &info);
1055+
mca_pml_ucx_set_recv_status_public(mpi_status, UCS_OK, &info);
10561056
return OMPI_SUCCESS;
10571057
}
10581058
}

ompi/mca/pml/ucx/pml_ucx_request.h

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
3-
* Copyright (c) 2016 The University of Tennessee and The University
3+
* Copyright (c) 2016-2021 The University of Tennessee and The University
44
* of Tennessee Research Foundation. All rights
55
* reserved.
66
* $COPYRIGHT$
@@ -149,6 +149,12 @@ static inline void mca_pml_ucx_request_reset(ompi_request_t *req)
149149
req->req_complete = REQUEST_PENDING;
150150
}
151151

152+
/* Use when setting a request's status field.
153+
* Note that a new function 'mca_mpl_ucx_set_send_status_public' shall
154+
* be created and used instead if updating a publicly visible status becomes
155+
* necessary (i.e., the status argument in an user-visible procedure), see the
156+
* recv_status case below for rationale.
157+
*/
152158
__opal_attribute_always_inline__
153159
static inline void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status,
154160
ucs_status_t status)
@@ -163,6 +169,11 @@ static inline void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status,
163169
}
164170
}
165171

172+
/* Use when setting a request's status field.
173+
* Note that the next function 'mca_mpl_ucx_set_recv_status_public' shall
174+
* be used instead when updating a publicly visible status (i.e., the
175+
* status argument in an user-visible procedure).
176+
*/
166177
static inline int mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status,
167178
ucs_status_t ucp_status,
168179
const ucp_tag_recv_info_t *info)
@@ -178,6 +189,10 @@ static inline int mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status,
178189
mpi_status->_ucount = info->length;
179190
} else if (ucp_status == UCS_ERR_MESSAGE_TRUNCATED) {
180191
mpi_status->MPI_ERROR = MPI_ERR_TRUNCATE;
192+
mpi_status->MPI_SOURCE = PML_UCX_TAG_GET_SOURCE(tag);
193+
mpi_status->MPI_TAG = PML_UCX_TAG_GET_MPI_TAG(tag);
194+
mpi_status->_cancelled = false;
195+
mpi_status->_ucount = info->length;
181196
} else if (ucp_status == UCS_ERR_CANCELED) {
182197
mpi_status->MPI_ERROR = MPI_SUCCESS;
183198
mpi_status->_cancelled = true;
@@ -188,16 +203,41 @@ static inline int mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status,
188203
return mpi_status->MPI_ERROR;
189204
}
190205

191-
static inline int mca_pml_ucx_set_recv_status_safe(ompi_status_public_t* mpi_status,
206+
/* Use when setting a publicly visible status (i.e., the status argument in an
207+
* user-visible procedure).
208+
* Except in procedures that return MPI_ERR_IN_STATUS, the MPI_ERROR
209+
* field of a status object shall never be modified
210+
* See MPI-1.1 doc, sec 3.2.5, p.22
211+
*/
212+
static inline int mca_pml_ucx_set_recv_status_public(ompi_status_public_t* mpi_status,
192213
ucs_status_t ucp_status,
193214
const ucp_tag_recv_info_t *info)
194215
{
195216
if (mpi_status != MPI_STATUS_IGNORE) {
196-
return mca_pml_ucx_set_recv_status(mpi_status, ucp_status, info);
197-
} else if (OPAL_LIKELY(ucp_status == UCS_OK) || (ucp_status == UCS_ERR_CANCELED)) {
198-
return UCS_OK;
217+
if (OPAL_LIKELY(ucp_status == UCS_OK)) {
218+
uint64_t tag = info->sender_tag;
219+
mpi_status->MPI_SOURCE = PML_UCX_TAG_GET_SOURCE(tag);
220+
mpi_status->MPI_TAG = PML_UCX_TAG_GET_MPI_TAG(tag);
221+
mpi_status->_cancelled = false;
222+
mpi_status->_ucount = info->length;
223+
return MPI_SUCCESS;
224+
} else if (ucp_status == UCS_ERR_MESSAGE_TRUNCATED) {
225+
uint64_t tag = info->sender_tag;
226+
mpi_status->MPI_SOURCE = PML_UCX_TAG_GET_SOURCE(tag);
227+
mpi_status->MPI_TAG = PML_UCX_TAG_GET_MPI_TAG(tag);
228+
mpi_status->_cancelled = false;
229+
mpi_status->_ucount = info->length;
230+
return MPI_ERR_TRUNCATE;
231+
} else if (ucp_status == UCS_ERR_CANCELED) {
232+
mpi_status->_cancelled = true;
233+
return MPI_SUCCESS;
234+
} else {
235+
return MPI_ERR_INTERN;
236+
}
199237
} else if (ucp_status == UCS_ERR_MESSAGE_TRUNCATED) {
200238
return MPI_ERR_TRUNCATE;
239+
} else if (OPAL_LIKELY(ucp_status == UCS_OK) || (ucp_status == UCS_ERR_CANCELED)) {
240+
return MPI_SUCCESS;
201241
}
202242

203243
return MPI_ERR_INTERN;

ompi/mpi/c/improbe.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
/*
22
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
3-
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
3+
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
44
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
55
* Copyright (c) 2015 Research Organization for Information Science
66
* and Technology (RIST). All rights reserved.
7-
* Copyright (c) 2020 The University of Tennessee and The University
7+
* Copyright (c) 2020-2021 The University of Tennessee and The University
88
* of Tennessee Research Foundation. All rights
99
* reserved.
1010
* $COPYRIGHT$
@@ -61,7 +61,7 @@ int MPI_Improbe(int source, int tag, MPI_Comm comm, int *flag,
6161

6262
if (MPI_PROC_NULL == source) {
6363
if (MPI_STATUS_IGNORE != status) {
64-
*status = ompi_request_empty.req_status;
64+
OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false);
6565
/* Per MPI-1, the MPI_ERROR field is not defined for
6666
single-completion calls */
6767
MEMCHECKER(

ompi/mpi/c/iprobe.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2020 The University of Tennessee and The University
5+
* Copyright (c) 2004-2021 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@@ -67,7 +67,7 @@ int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status
6767
if (MPI_PROC_NULL == source) {
6868
*flag = 1;
6969
if (MPI_STATUS_IGNORE != status) {
70-
*status = ompi_request_empty.req_status;
70+
OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false);
7171
/*
7272
* Per MPI-1, the MPI_ERROR field is not defined for single-completion calls
7373
*/

ompi/mpi/c/mprobe.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
/*
22
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
3-
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
3+
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
44
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
55
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
66
* Copyright (c) 2015 Research Organization for Information Science
77
* and Technology (RIST). All rights reserved.
8-
* Copyright (c) 2020 The University of Tennessee and The University
8+
* Copyright (c) 2020-2021 The University of Tennessee and The University
99
* of Tennessee Research Foundation. All rights
1010
* reserved.
1111
* $COPYRIGHT$
@@ -62,7 +62,7 @@ int MPI_Mprobe(int source, int tag, MPI_Comm comm,
6262

6363
if (MPI_PROC_NULL == source) {
6464
if (MPI_STATUS_IGNORE != status) {
65-
*status = ompi_request_empty.req_status;
65+
OMPI_COPY_STATUS(status, ompi_request_empty.req_status, false);
6666
/* Per MPI-1, the MPI_ERROR field is not defined for
6767
single-completion calls */
6868
MEMCHECKER(

0 commit comments

Comments
 (0)