@@ -417,6 +417,26 @@ ompi_mtl_ofi_send_callback(struct fi_cq_tagged_entry *wc,
417
417
return OMPI_SUCCESS ;
418
418
}
419
419
420
+
421
+ /*
422
+ * special send callback for excid send operation.
423
+ * Since the send excid operation cannot block
424
+ * waiting for completion of the send operation,
425
+ * we have to free the internal message buffer allocated
426
+ * as part of the excid operation here as well as the
427
+ * request itself.
428
+ */
429
+ __opal_attribute_always_inline__ static inline int
430
+ ompi_mtl_ofi_send_excid_callback (struct fi_cq_tagged_entry * wc ,
431
+ ompi_mtl_ofi_request_t * ofi_req )
432
+ {
433
+ assert (ofi_req -> completion_count > 0 );
434
+ free (ofi_req -> buffer );
435
+ ofi_req -> completion_count -- ; /* no one's waiting on this */
436
+ free (ofi_req );
437
+ return OMPI_SUCCESS ;
438
+ }
439
+
420
440
__opal_attribute_always_inline__ static inline int
421
441
ompi_mtl_ofi_send_error_callback (struct fi_cq_err_entry * error ,
422
442
ompi_mtl_ofi_request_t * ofi_req )
@@ -668,6 +688,13 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req,
668
688
return OMPI_SUCCESS ;
669
689
}
670
690
691
+ /*
692
+ * this routine is invoked in the case of communicators which are not using a
693
+ * global cid, i.e. those created using MPI_Comm_create_from_group/
694
+ * MPI_Intercomm_create_from_groups in order to exchange the local cid used
695
+ * by the sender for this supplied communicator. This function is only invoked
696
+ * for the first message sent to a given receiver.
697
+ */
671
698
static int
672
699
ompi_mtl_ofi_send_excid (struct mca_mtl_base_module_t * mtl ,
673
700
struct ompi_communicator_t * comm ,
@@ -676,14 +703,26 @@ ompi_mtl_ofi_send_excid(struct mca_mtl_base_module_t *mtl,
676
703
bool is_send )
677
704
{
678
705
ssize_t ret = OMPI_SUCCESS ;
679
- ompi_mtl_ofi_request_t * ofi_req = malloc ( sizeof ( ompi_mtl_ofi_request_t )) ;
706
+ ompi_mtl_ofi_request_t * ofi_req = NULL ;
680
707
int ctxt_id = 0 ;
681
- mca_mtl_ofi_cid_hdr_t * start = malloc ( sizeof ( mca_mtl_ofi_cid_hdr_t )) ;
708
+ mca_mtl_ofi_cid_hdr_t * start = NULL ;
682
709
ompi_proc_t * ompi_proc = NULL ;
683
710
mca_mtl_ofi_endpoint_t * endpoint = NULL ;
684
711
fi_addr_t sep_peer_fiaddr = 0 ;
685
712
mca_mtl_comm_t * mtl_comm ;
686
713
714
+ ofi_req = (ompi_mtl_ofi_request_t * )malloc (sizeof (ompi_mtl_ofi_request_t ));
715
+ if (NULL == ofi_req ) {
716
+ ret = OMPI_ERR_OUT_OF_RESOURCE ;
717
+ goto fn_exit ;
718
+ }
719
+
720
+ start = (mca_mtl_ofi_cid_hdr_t * )malloc (sizeof (mca_mtl_ofi_cid_hdr_t ));
721
+ if (NULL == start ) {
722
+ ret = OMPI_ERR_OUT_OF_RESOURCE ;
723
+ goto fn_exit ;
724
+ }
725
+
687
726
mtl_comm = comm -> c_mtl_comm ;
688
727
689
728
ctxt_id = 0 ;
@@ -692,8 +731,9 @@ ompi_mtl_ofi_send_excid(struct mca_mtl_base_module_t *mtl,
692
731
/**
693
732
* Create a send request, start it and wait until it completes.
694
733
*/
695
- ofi_req -> event_callback = ompi_mtl_ofi_send_callback ;
734
+ ofi_req -> event_callback = ompi_mtl_ofi_send_excid_callback ;
696
735
ofi_req -> error_callback = ompi_mtl_ofi_send_error_callback ;
736
+ ofi_req -> buffer = start ;
697
737
698
738
ompi_proc = ompi_comm_peer_lookup (comm , dest );
699
739
endpoint = ompi_mtl_ofi_get_endpoint (mtl , ompi_proc );
@@ -719,12 +759,10 @@ ompi_mtl_ofi_send_excid(struct mca_mtl_base_module_t *mtl,
719
759
opal_show_help ("help-mtl-ofi.txt" ,
720
760
"message too big" , false,
721
761
length , endpoint -> mtl_ofi_module -> max_msg_size );
722
- return OMPI_ERROR ;
762
+ ret = OMPI_ERROR ;
763
+ goto fn_exit ;
723
764
}
724
765
725
- if (OPAL_UNLIKELY (ofi_req -> status .MPI_ERROR != OMPI_SUCCESS ))
726
- return ofi_req -> status .MPI_ERROR ;
727
-
728
766
if (ompi_mtl_ofi .max_inject_size >= length ) {
729
767
if (ofi_cq_data ) {
730
768
MTL_OFI_RETRY_UNTIL_DONE (fi_injectdata (ompi_mtl_ofi .ofi_ctxt [0 ].tx_ep ,
@@ -743,8 +781,6 @@ ompi_mtl_ofi_send_excid(struct mca_mtl_base_module_t *mtl,
743
781
ofi_cq_data ? "fi_injectdata failed"
744
782
: "fi_inject failed" );
745
783
746
- ofi_req -> status .MPI_ERROR = ompi_mtl_ofi_get_error (ret );
747
- return ofi_req -> status .MPI_ERROR ;
748
784
}
749
785
} else {
750
786
ofi_req -> completion_count = 1 ;
@@ -768,11 +804,20 @@ ompi_mtl_ofi_send_excid(struct mca_mtl_base_module_t *mtl,
768
804
MTL_OFI_LOG_FI_ERR (ret ,
769
805
ofi_cq_data ? "fi_tsenddata failed"
770
806
: "fi_tsend failed" );
771
- ofi_req -> status .MPI_ERROR = ompi_mtl_ofi_get_error (ret );
772
807
}
773
808
}
774
809
775
- return ofi_req -> status .MPI_ERROR ;
810
+ ret = ompi_mtl_ofi_get_error (ret );
811
+ ofi_req -> status .MPI_ERROR = ret ;
812
+
813
+ fn_exit :
814
+
815
+ if ((OMPI_SUCCESS != ret ) || (ofi_req -> completion_count == 0 )) {
816
+ if (NULL != ofi_req ) free (ofi_req );
817
+ if (NULL != start ) free (start );
818
+ }
819
+
820
+ return ret ;
776
821
}
777
822
778
823
__opal_attribute_always_inline__ static inline int
0 commit comments