2
2
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3
3
* University Research and Technology
4
4
* Corporation. All rights reserved.
5
- * Copyright (c) 2004-2017 The University of Tennessee and The University
5
+ * Copyright (c) 2004-2016 The University of Tennessee and The University
6
6
* of Tennessee Research Foundation. All rights
7
7
* reserved.
8
8
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -388,7 +388,6 @@ mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint,
388
388
{
389
389
int ret = mca_btl_tcp_send_blocking (btl_endpoint -> endpoint_sd , data , size );
390
390
if (ret < 0 ) {
391
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
392
391
mca_btl_tcp_endpoint_close (btl_endpoint );
393
392
}
394
393
return ret ;
@@ -539,30 +538,20 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
539
538
btl_endpoint -> endpoint_sd = -1 ;
540
539
/**
541
540
* If we keep failing to connect to the peer let the caller know about
542
- * this situation by triggering the callback on all pending fragments and
543
- * reporting the error. The upper layer has then the opportunity to
544
- * re-route or re-schedule the fragments.
541
+ * this situation by triggering all the pending fragments callback and
542
+ * reporting the error.
545
543
*/
546
544
if ( MCA_BTL_TCP_FAILED == btl_endpoint -> endpoint_state ) {
547
545
mca_btl_tcp_frag_t * frag = btl_endpoint -> endpoint_send_frag ;
548
546
if ( NULL == frag )
549
547
frag = (mca_btl_tcp_frag_t * )opal_list_remove_first (& btl_endpoint -> endpoint_frags );
550
548
while (NULL != frag ) {
551
549
frag -> base .des_cbfunc (& frag -> btl -> super , frag -> endpoint , & frag -> base , OPAL_ERR_UNREACH );
552
- if ( frag -> base .des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ) {
553
- MCA_BTL_TCP_FRAG_RETURN (frag );
554
- }
550
+
555
551
frag = (mca_btl_tcp_frag_t * )opal_list_remove_first (& btl_endpoint -> endpoint_frags );
556
552
}
557
- btl_endpoint -> endpoint_send_frag = NULL ;
558
- /* Let's report the error upstream */
559
- if (NULL != btl_endpoint -> endpoint_btl -> tcp_error_cb ) {
560
- btl_endpoint -> endpoint_btl -> tcp_error_cb ((mca_btl_base_module_t * )btl_endpoint -> endpoint_btl , 0 ,
561
- btl_endpoint -> endpoint_proc -> proc_opal , "Socket closed" );
562
- }
563
- } else {
564
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_CLOSED ;
565
553
}
554
+ btl_endpoint -> endpoint_state = MCA_BTL_TCP_CLOSED ;
566
555
}
567
556
568
557
/*
@@ -619,6 +608,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
619
608
opal_show_help ("help-mpi-btl-tcp.txt" , "client handshake fail" ,
620
609
true, opal_process_info .nodename ,
621
610
getpid (), "did not receive entire connect ACK from peer" );
611
+
622
612
return OPAL_ERR_BAD_PARAM ;
623
613
}
624
614
if (0 != strncmp (hs_msg .magic_id , mca_btl_tcp_magic_id_string , len )) {
@@ -638,7 +628,6 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
638
628
if (0 != opal_compare_proc (btl_proc -> proc_opal -> proc_name , guid )) {
639
629
BTL_ERROR (("received unexpected process identifier %s" ,
640
630
OPAL_NAME_PRINT (guid )));
641
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
642
631
mca_btl_tcp_endpoint_close (btl_endpoint );
643
632
return OPAL_ERR_UNREACH ;
644
633
}
@@ -845,7 +834,6 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
845
834
opal_net_get_hostname ((struct sockaddr * ) & endpoint_addr ),
846
835
((struct sockaddr_in * ) & endpoint_addr )-> sin_port ,
847
836
strerror (opal_socket_errno ), opal_socket_errno ));
848
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
849
837
mca_btl_tcp_endpoint_close (btl_endpoint );
850
838
return OPAL_ERROR ;
851
839
}
@@ -862,7 +850,6 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
862
850
getpid (), msg ,
863
851
strerror (opal_socket_errno ), opal_socket_errno );
864
852
free (msg );
865
- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
866
853
mca_btl_tcp_endpoint_close (btl_endpoint );
867
854
return OPAL_ERROR ;
868
855
}
@@ -934,15 +921,12 @@ static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user)
934
921
OPAL_THREAD_UNLOCK (& btl_endpoint -> endpoint_send_lock );
935
922
MCA_BTL_TCP_ENDPOINT_DUMP (10 , btl_endpoint , true, "connected" );
936
923
}
937
- else if (OPAL_ERR_BAD_PARAM == rc
938
- || OPAL_ERROR == rc ) {
924
+ else if (OPAL_ERR_BAD_PARAM == rc ) {
939
925
/* If we get a BAD_PARAM, it means that it probably wasn't
940
926
an OMPI process on the other end of the socket (e.g.,
941
- the magic string ID failed). recv_connect_ack already cleaned
942
- up the socket. */
943
- /* If we get OPAL_ERROR, the other end closed the connection
944
- * because it has initiated a symetrical connexion on its end.
945
- * recv_connect_ack already cleaned up the socket. */
927
+ the magic string ID failed). So we can probably just
928
+ close the socket and ignore this connection. */
929
+ CLOSE_THE_SOCKET (sd );
946
930
}
947
931
else {
948
932
/* Otherwise, it probably *was* an OMPI peer process on
@@ -1081,8 +1065,6 @@ static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user)
1081
1065
opal_event_del (& btl_endpoint -> endpoint_send_event );
1082
1066
}
1083
1067
break ;
1084
- case MCA_BTL_TCP_FAILED :
1085
- break ;
1086
1068
default :
1087
1069
BTL_ERROR (("invalid connection state (%d)" , btl_endpoint -> endpoint_state ));
1088
1070
MCA_BTL_TCP_ENDPOINT_DUMP (1 , btl_endpoint , true, "event_del(send) [endpoint_send_handler:error]" );
0 commit comments