3
3
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4
4
* University Research and Technology
5
5
* Corporation. All rights reserved.
6
- * Copyright (c) 2004-2019 The University of Tennessee and The University
6
+ * Copyright (c) 2004-2022 The University of Tennessee and The University
7
7
* of Tennessee Research Foundation. All rights
8
8
* reserved.
9
9
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
@@ -370,7 +370,7 @@ int mca_pml_ob1_revoke_comm( struct ompi_communicator_t* ompi_comm, bool coll_on
370
370
/* note this is not an ompi_proc, but a ob1_comm_proc, thus we don't
371
371
* use ompi_proc_is_sentinel to verify if initialized. */
372
372
if ( NULL == proc ) continue ;
373
- /* remove the frag from the unexpected list, add to the nack list
373
+ /* remove the frag from the unexpected list, add to the nack list
374
374
* so that we can send the nack as needed to remote cancel the send
375
375
* from outside the match lock.
376
376
*/
@@ -385,7 +385,7 @@ int mca_pml_ob1_revoke_comm( struct ompi_communicator_t* ompi_comm, bool coll_on
385
385
}
386
386
}
387
387
/* same for the cantmatch queue/heap; this list is more complicated
388
- * Keep it simple: we pop all of the complex list, put the bad items
388
+ * Keep it simple: we pop all of the complex list, put the bad items
389
389
* in the nack_list, and keep the good items in the keep_list;
390
390
* then we reinsert the good items in the cantmatch heaplist */
391
391
mca_pml_ob1_recv_frag_t * frag ;
@@ -519,7 +519,7 @@ void mca_pml_ob1_recv_frag_callback_match (mca_btl_base_module_t *btl,
519
519
}
520
520
#endif
521
521
522
- if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE (comm_ptr )) {
522
+ if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE (comm_ptr ) || 0 > hdr -> hdr_tag ) {
523
523
/* get sequence number of next message that can be processed.
524
524
* If this frag is out of sequence, queue it up in the list
525
525
* now as we still have the lock.
@@ -637,6 +637,38 @@ void mca_pml_ob1_recv_frag_callback_match (mca_btl_base_module_t *btl,
637
637
}
638
638
}
639
639
640
+ /**
641
+ * Merge all out of sequence fragments into the matching queue, as if they were received now.
642
+ */
643
+ int mca_pml_ob1_merge_cant_match ( ompi_communicator_t * ompi_comm )
644
+ {
645
+ mca_pml_ob1_comm_t * pml_comm = (mca_pml_ob1_comm_t * )ompi_comm -> c_pml_comm ;
646
+ mca_pml_ob1_recv_frag_t * frag , * frags_cant_match ;
647
+ mca_pml_ob1_comm_proc_t * proc ;
648
+ int cnt = 0 ;
649
+
650
+ for (uint32_t i = 0 ; i < pml_comm -> num_procs ; i ++ ) {
651
+ if ((NULL == (proc = pml_comm -> procs [i ])) || (NULL != proc -> frags_cant_match )) {
652
+ continue ;
653
+ }
654
+
655
+ OB1_MATCHING_LOCK (& pml_comm -> matching_lock );
656
+ /* Acquire all cant_match frags from the peer */
657
+ frags_cant_match = proc -> frags_cant_match ;
658
+ proc -> frags_cant_match = NULL ;
659
+ while (NULL != (frag = remove_head_from_ordered_list (& frags_cant_match ))) {
660
+ /* mca_pml_ob1_recv_frag_match_proc() will release the lock. */
661
+ mca_pml_ob1_recv_frag_match_proc (frag -> btl , ompi_comm , proc ,
662
+ & frag -> hdr .hdr_match ,
663
+ frag -> segments , frag -> num_segments ,
664
+ frag -> hdr .hdr_match .hdr_common .hdr_type , frag );
665
+ OB1_MATCHING_LOCK (& pml_comm -> matching_lock );
666
+ cnt ++ ;
667
+ }
668
+ }
669
+ OB1_MATCHING_UNLOCK (& pml_comm -> matching_lock );
670
+ return cnt ;
671
+ }
640
672
641
673
void mca_pml_ob1_recv_frag_callback_rndv (mca_btl_base_module_t * btl ,
642
674
const mca_btl_base_receive_descriptor_t * descriptor )
@@ -1092,7 +1124,7 @@ static int mca_pml_ob1_recv_frag_match (mca_btl_base_module_t *btl,
1092
1124
frag_msg_seq = hdr -> hdr_seq ;
1093
1125
next_msg_seq_expected = (uint16_t )proc -> expected_sequence ;
1094
1126
1095
- if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE (comm_ptr )) {
1127
+ if (!OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE (comm_ptr ) || 0 > hdr -> hdr_tag ) {
1096
1128
/* If the sequence number is wrong, queue it up for later. */
1097
1129
if (OPAL_UNLIKELY (frag_msg_seq != next_msg_seq_expected )) {
1098
1130
mca_pml_ob1_recv_frag_t * frag ;
0 commit comments