30
30
#include <unistd.h>
31
31
#endif
32
32
#include <fcntl.h>
33
+ #include <sys/socket.h>
34
+
33
35
#ifdef HAVE_SYS_UIO_H
34
36
#include <sys/uio.h>
35
37
#endif
76
78
#include "orte/mca/oob/tcp/oob_tcp_peer.h"
77
79
#include "orte/mca/oob/tcp/oob_tcp_common.h"
78
80
#include "orte/mca/oob/tcp/oob_tcp_connection.h"
81
+ #include "oob_tcp_peer.h"
82
+ #include "oob_tcp_common.h"
83
+ #include "oob_tcp_connection.h"
79
84
80
85
static void tcp_peer_event_init (mca_oob_tcp_peer_t * peer );
81
86
static int tcp_peer_send_connect_ack (mca_oob_tcp_peer_t * peer );
@@ -85,7 +90,7 @@ static bool tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd,
85
90
void * data , size_t size );
86
91
static void tcp_peer_connected (mca_oob_tcp_peer_t * peer );
87
92
88
- static int tcp_peer_create_socket (mca_oob_tcp_peer_t * peer )
93
+ static int tcp_peer_create_socket (mca_oob_tcp_peer_t * peer , sa_family_t family )
89
94
{
90
95
int flags ;
91
96
@@ -97,8 +102,7 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer)
97
102
"%s oob:tcp:peer creating socket to %s" ,
98
103
ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
99
104
ORTE_NAME_PRINT (& (peer -> name ))));
100
-
101
- peer -> sd = socket (AF_INET , SOCK_STREAM , 0 );
105
+ peer -> sd = socket (family , SOCK_STREAM , 0 );
102
106
if (peer -> sd < 0 ) {
103
107
opal_output (0 , "%s-%s tcp_peer_create_socket: socket() failed: %s (%d)\n" ,
104
108
ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
@@ -154,6 +158,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
154
158
{
155
159
mca_oob_tcp_conn_op_t * op = (mca_oob_tcp_conn_op_t * )cbdata ;
156
160
mca_oob_tcp_peer_t * peer ;
161
+ int current_socket_family = 0 ;
157
162
int rc ;
158
163
opal_socklen_t addrlen = 0 ;
159
164
mca_oob_tcp_addr_t * addr ;
@@ -170,30 +175,12 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
170
175
ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
171
176
ORTE_NAME_PRINT (& (peer -> name )));
172
177
173
- rc = tcp_peer_create_socket (peer );
174
- if (ORTE_SUCCESS != rc ) {
175
- /* FIXME: we cannot create a TCP socket - this spans
176
- * all interfaces, so all we can do is report
177
- * back to the component that this peer is
178
- * unreachable so it can remove the peer
179
- * from its list and report back to the base
180
- * NOTE: this could be a reconnect attempt,
181
- * so we also need to mark any queued messages
182
- * and return them as "unreachable"
183
- */
184
- opal_output (0 , "%s CANNOT CREATE SOCKET" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ));
185
- ORTE_FORCED_TERMINATE (1 );
186
- OBJ_RELEASE (op );
187
- return ;
188
- }
189
-
190
178
opal_output_verbose (OOB_TCP_DEBUG_CONNECT , orte_oob_base_framework .framework_output ,
191
179
"%s orte_tcp_peer_try_connect: "
192
180
"attempting to connect to proc %s on socket %d" ,
193
181
ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
194
182
ORTE_NAME_PRINT (& (peer -> name )), peer -> sd );
195
183
196
- addrlen = sizeof (struct sockaddr_in );
197
184
peer -> active_addr = NULL ;
198
185
OPAL_LIST_FOREACH (addr , & peer -> addrs , mca_oob_tcp_addr_t ) {
199
186
opal_output_verbose (OOB_TCP_DEBUG_CONNECT , orte_oob_base_framework .framework_output ,
@@ -221,9 +208,36 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
221
208
continue ;
222
209
}
223
210
peer -> active_addr = addr ; // record the one we are using
211
+ addrlen = addr -> addr .ss_family == AF_INET6 ? sizeof (struct sockaddr_in6 )
212
+ : sizeof (struct sockaddr_in );
213
+ if (addr -> addr .ss_family != current_socket_family ) {
214
+ if (peer -> sd >= 0 ) {
215
+ CLOSE_THE_SOCKET (peer -> sd );
216
+ peer -> sd = -1 ;
217
+ }
218
+ rc = tcp_peer_create_socket (peer , addr -> addr .ss_family );
219
+ current_socket_family = addr -> addr .ss_family ;
220
+
221
+ if (ORTE_SUCCESS != rc ) {
222
+ /* FIXME: we cannot create a TCP socket - this spans
223
+ * all interfaces, so all we can do is report
224
+ * back to the component that this peer is
225
+ * unreachable so it can remove the peer
226
+ * from its list and report back to the base
227
+ * NOTE: this could be a reconnect attempt,
228
+ * so we also need to mark any queued messages
229
+ * and return them as "unreachable"
230
+ */
231
+ opal_output (0 , "%s CANNOT CREATE SOCKET" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ));
232
+ ORTE_FORCED_TERMINATE (1 );
233
+ goto cleanup ;
234
+ }
235
+ }
224
236
retry_connect :
225
237
addr -> retries ++ ;
226
- if (connect (peer -> sd , (struct sockaddr * )& addr -> addr , addrlen ) < 0 ) {
238
+
239
+ rc = connect (peer -> sd , (struct sockaddr * ) & addr -> addr , addrlen );
240
+ if (rc < 0 ) {
227
241
/* non-blocking so wait for completion */
228
242
if (opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK ) {
229
243
opal_output_verbose (OOB_TCP_DEBUG_CONNECT , orte_oob_base_framework .framework_output ,
0 commit comments