@@ -991,10 +991,20 @@ __opal_attribute_always_inline__ static inline int
991
991
ompi_mtl_ofi_probe_error_callback (struct fi_cq_err_entry * error ,
992
992
ompi_mtl_ofi_request_t * ofi_req )
993
993
{
994
- ofi_req -> status .MPI_ERROR = MPI_ERR_INTERN ;
995
994
ofi_req -> completion_count -- ;
996
995
997
- return OMPI_SUCCESS ;
996
+ /*
997
+ * Receives posted with FI_PEEK and friends will get an error
998
+ * completion with FI_ENOMSG. This just indicates the lack of a match for
999
+ * the probe and is not an error case. All other error cases are
1000
+ * provider-internal errors and should be flagged as such.
1001
+ */
1002
+ if (error -> err == FI_ENOMSG )
1003
+ return OMPI_SUCCESS ;
1004
+
1005
+ ofi_req -> status .MPI_ERROR = MPI_ERR_INTERN ;
1006
+
1007
+ return OMPI_ERROR ;
998
1008
}
999
1009
1000
1010
__opal_attribute_always_inline__ static inline int
@@ -1039,7 +1049,6 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
1039
1049
/**
1040
1050
* fi_trecvmsg with FI_PEEK:
1041
1051
* Initiate a search for a match in the hardware or software queue.
1042
- * The search can complete immediately with -ENOMSG.
1043
1052
* If successful, libfabric will enqueue a context entry into the completion
1044
1053
* queue to make the search nonblocking. This code will poll until the
1045
1054
* entry is enqueued.
@@ -1060,13 +1069,7 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
1060
1069
ofi_req .match_state = 0 ;
1061
1070
1062
1071
MTL_OFI_RETRY_UNTIL_DONE (fi_trecvmsg (ompi_mtl_ofi .ofi_ctxt [ctxt_id ].rx_ep , & msg , msgflags ), ret );
1063
- if (- FI_ENOMSG == ret ) {
1064
- /**
1065
- * The search request completed but no matching message was found.
1066
- */
1067
- * flag = 0 ;
1068
- return OMPI_SUCCESS ;
1069
- } else if (OPAL_UNLIKELY (0 > ret )) {
1072
+ if (OPAL_UNLIKELY (0 > ret )) {
1070
1073
MTL_OFI_LOG_FI_ERR (ret , "fi_trecvmsg failed" );
1071
1074
return ompi_mtl_ofi_get_error (ret );
1072
1075
}
@@ -1136,7 +1139,6 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
1136
1139
/**
1137
1140
* fi_trecvmsg with FI_PEEK and FI_CLAIM:
1138
1141
* Initiate a search for a match in the hardware or software queue.
1139
- * The search can complete immediately with -ENOMSG.
1140
1142
* If successful, libfabric will enqueue a context entry into the completion
1141
1143
* queue to make the search nonblocking. This code will poll until the
1142
1144
* entry is enqueued.
@@ -1158,14 +1160,7 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
1158
1160
ofi_req -> mask_bits = mask_bits ;
1159
1161
1160
1162
MTL_OFI_RETRY_UNTIL_DONE (fi_trecvmsg (ompi_mtl_ofi .ofi_ctxt [ctxt_id ].rx_ep , & msg , msgflags ), ret );
1161
- if (- FI_ENOMSG == ret ) {
1162
- /**
1163
- * The search request completed but no matching message was found.
1164
- */
1165
- * matched = 0 ;
1166
- free (ofi_req );
1167
- return OMPI_SUCCESS ;
1168
- } else if (OPAL_UNLIKELY (0 > ret )) {
1163
+ if (OPAL_UNLIKELY (0 > ret )) {
1169
1164
MTL_OFI_LOG_FI_ERR (ret , "fi_trecvmsg failed" );
1170
1165
free (ofi_req );
1171
1166
return ompi_mtl_ofi_get_error (ret );
0 commit comments