3
3
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
4
4
* University Research and Technology
5
5
* Corporation. All rights reserved.
6
- * Copyright (c) 2004-2020 The University of Tennessee and The University
6
+ * Copyright (c) 2004-2022 The University of Tennessee and The University
7
7
* of Tennessee Research Foundation. All rights
8
8
* reserved.
9
9
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -202,6 +202,43 @@ int mca_pml_ob1_enable(bool enable)
202
202
return OMPI_SUCCESS ;
203
203
}
204
204
205
+ static const char *
206
+ mca_pml_ob1_set_allow_overtake (opal_infosubscriber_t * obj ,
207
+ const char * key ,
208
+ const char * value )
209
+ {
210
+ ompi_communicator_t * ompi_comm = (ompi_communicator_t * ) obj ;
211
+ bool allow_overtake_was_set = OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE (ompi_comm );
212
+
213
+ /* As we keep the out-of-sequence messages ordered by their sequence, as a receiver we
214
+ * can just move the previously considered out-of-order messages into the unexpected queue,
215
+ * and we maintain some form of logical consistency with the message order.
216
+ */
217
+ if (opal_str_to_bool (value )) {
218
+ if (!allow_overtake_was_set ) {
219
+ ompi_comm -> c_flags |= OMPI_COMM_ASSERT_ALLOW_OVERTAKE ;
220
+ mca_pml_ob1_merge_cant_match (ompi_comm );
221
+ }
222
+ return "true" ;
223
+ }
224
+ if (allow_overtake_was_set ) {
225
+ /* However, in the case we are trying to turn off allow_overtake, it is not clear what
226
+ * should be done with the previous messages that are pending on our peers, nor with
227
+ * the messages currently in the network. Similarly, if one process turns off allow
228
+ * overtake, before any potential sender start sending valid sequence numbers there
229
+ * is no way to order the messages in a sensible order.
230
+ * The possible solution is cumbersome, it would force a network quiescence followed by
231
+ * a synchronization of all processes in the communicator, and then all peers will
232
+ * start sending messages starting with sequence number 0.
233
+ * A lot of code for minimal benefit, especially taking in account that the MPI standard
234
+ * does not define this. Instead, refuse to disable allow overtake, and at least the
235
+ * user has the opportunity to check if we accepted to change it.
236
+ */
237
+ return "true" ;
238
+ }
239
+ return "false" ;
240
+ }
241
+
205
242
int mca_pml_ob1_add_comm (ompi_communicator_t * comm )
206
243
{
207
244
/* allocate pml specific comm data */
@@ -221,11 +258,14 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
221
258
}
222
259
223
260
ompi_comm_assert_subscribe (comm , OMPI_COMM_ASSERT_NO_ANY_SOURCE );
224
- ompi_comm_assert_subscribe (comm , OMPI_COMM_ASSERT_ALLOW_OVERTAKE );
225
261
226
262
mca_pml_ob1_comm_init_size (pml_comm , comm -> c_remote_group -> grp_proc_count );
227
263
comm -> c_pml_comm = pml_comm ;
228
264
265
+ /* Register the subscriber alert for the mpi_assert_allow_overtaking info. */
266
+ opal_infosubscribe_subscribe (& comm -> super , "mpi_assert_allow_overtaking" ,
267
+ "false" , mca_pml_ob1_set_allow_overtake );
268
+
229
269
/* Grab all related messages from the non_existing_communicator pending queue */
230
270
OPAL_LIST_FOREACH_SAFE (frag , next_frag , & mca_pml_ob1 .non_existing_communicator_pending , mca_pml_ob1_recv_frag_t ) {
231
271
hdr = & frag -> hdr .hdr_match ;
0 commit comments