2
2
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3
3
* University Research and Technology
4
4
* Corporation. All rights reserved.
5
- * Copyright (c) 2004-2017 The University of Tennessee and The University
5
+ * Copyright (c) 2004-2016 The University of Tennessee and The University
6
6
* of Tennessee Research Foundation. All rights
7
7
* reserved.
8
8
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -387,7 +387,6 @@ mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint,
387
387
{
388
388
int ret = mca_btl_tcp_send_blocking (btl_endpoint -> endpoint_sd , data , size );
389
389
if (ret < 0 ) {
390
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
391
390
mca_btl_tcp_endpoint_close (btl_endpoint );
392
391
}
393
392
return ret ;
@@ -537,30 +536,20 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
537
536
btl_endpoint -> endpoint_sd = -1 ;
538
537
/**
539
538
* If we keep failing to connect to the peer let the caller know about
540
- * this situation by triggering the callback on all pending fragments and
541
- * reporting the error. The upper layer has then the opportunity to
542
- * re-route or re-schedule the fragments.
539
+ * this situation by triggering all the pending fragments callback and
540
+ * reporting the error.
543
541
*/
544
542
if ( MCA_BTL_TCP_FAILED == btl_endpoint -> endpoint_state ) {
545
543
mca_btl_tcp_frag_t * frag = btl_endpoint -> endpoint_send_frag ;
546
544
if ( NULL == frag )
547
545
frag = (mca_btl_tcp_frag_t * )opal_list_remove_first (& btl_endpoint -> endpoint_frags );
548
546
while (NULL != frag ) {
549
547
frag -> base .des_cbfunc (& frag -> btl -> super , frag -> endpoint , & frag -> base , OPAL_ERR_UNREACH );
550
- if ( frag -> base .des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ) {
551
- MCA_BTL_TCP_FRAG_RETURN (frag );
552
- }
548
+
553
549
frag = (mca_btl_tcp_frag_t * )opal_list_remove_first (& btl_endpoint -> endpoint_frags );
554
550
}
555
- btl_endpoint -> endpoint_send_frag = NULL ;
556
- /* Let's report the error upstream */
557
- if (NULL != btl_endpoint -> endpoint_btl -> tcp_error_cb ) {
558
- btl_endpoint -> endpoint_btl -> tcp_error_cb ((mca_btl_base_module_t * )btl_endpoint -> endpoint_btl , 0 ,
559
- btl_endpoint -> endpoint_proc -> proc_opal , "Socket closed" );
560
- }
561
- } else {
562
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_CLOSED ;
563
551
}
552
+ btl_endpoint -> endpoint_state = MCA_BTL_TCP_CLOSED ;
564
553
}
565
554
566
555
/*
@@ -617,6 +606,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
617
606
opal_show_help ("help-mpi-btl-tcp.txt" , "client handshake fail" ,
618
607
true, opal_process_info .nodename ,
619
608
getpid (), "did not receive entire connect ACK from peer" );
609
+
620
610
return OPAL_ERR_BAD_PARAM ;
621
611
}
622
612
if (0 != strncmp (hs_msg .magic_id , mca_btl_tcp_magic_id_string , len )) {
@@ -636,7 +626,6 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
636
626
if (0 != opal_compare_proc (btl_proc -> proc_opal -> proc_name , guid )) {
637
627
BTL_ERROR (("received unexpected process identifier %s" ,
638
628
OPAL_NAME_PRINT (guid )));
639
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
640
629
mca_btl_tcp_endpoint_close (btl_endpoint );
641
630
return OPAL_ERR_UNREACH ;
642
631
}
@@ -843,7 +832,6 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
843
832
opal_net_get_hostname ((struct sockaddr * ) & endpoint_addr ),
844
833
((struct sockaddr_in * ) & endpoint_addr )-> sin_port ,
845
834
strerror (opal_socket_errno ), opal_socket_errno ));
846
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
847
835
mca_btl_tcp_endpoint_close (btl_endpoint );
848
836
return OPAL_ERROR ;
849
837
}
@@ -860,7 +848,6 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
860
848
getpid (), msg ,
861
849
strerror (opal_socket_errno ), opal_socket_errno );
862
850
free (msg );
863
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
864
851
mca_btl_tcp_endpoint_close (btl_endpoint );
865
852
return OPAL_ERROR ;
866
853
}
@@ -932,15 +919,12 @@ static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user)
932
919
OPAL_THREAD_UNLOCK (& btl_endpoint -> endpoint_send_lock );
933
920
MCA_BTL_TCP_ENDPOINT_DUMP (10 , btl_endpoint , true, "connected" );
934
921
}
935
- else if (OPAL_ERR_BAD_PARAM == rc
936
- || OPAL_ERROR == rc ) {
922
+ else if (OPAL_ERR_BAD_PARAM == rc ) {
937
923
/* If we get a BAD_PARAM, it means that it probably wasn't
938
924
an OMPI process on the other end of the socket (e.g.,
939
- the magic string ID failed). recv_connect_ack already cleaned
940
- up the socket. */
941
- /* If we get OPAL_ERROR, the other end closed the connection
942
- * because it has initiated a symetrical connexion on its end.
943
- * recv_connect_ack already cleaned up the socket. */
925
+ the magic string ID failed). So we can probably just
926
+ close the socket and ignore this connection. */
927
+ CLOSE_THE_SOCKET (sd );
944
928
}
945
929
else {
946
930
/* Otherwise, it probably *was* an OMPI peer process on
@@ -1079,8 +1063,6 @@ static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user)
1079
1063
opal_event_del (& btl_endpoint -> endpoint_send_event );
1080
1064
}
1081
1065
break ;
1082
- case MCA_BTL_TCP_FAILED :
1083
- break ;
1084
1066
default :
1085
1067
BTL_ERROR (("invalid connection state (%d)" , btl_endpoint -> endpoint_state ));
1086
1068
MCA_BTL_TCP_ENDPOINT_DUMP (1 , btl_endpoint , true, "event_del(send) [endpoint_send_handler:error]" );
0 commit comments