30
30
#include <unistd.h>
31
31
#endif
32
32
#include <fcntl.h>
33
+ #include <sys/socket.h>
34
+
33
35
#ifdef HAVE_SYS_UIO_H
34
36
#include <sys/uio.h>
35
37
#endif
77
79
#include "orte/mca/oob/tcp/oob_tcp_peer.h"
78
80
#include "orte/mca/oob/tcp/oob_tcp_common.h"
79
81
#include "orte/mca/oob/tcp/oob_tcp_connection.h"
82
+ #include "oob_tcp_peer.h"
83
+ #include "oob_tcp_common.h"
84
+ #include "oob_tcp_connection.h"
80
85
81
86
static void tcp_peer_event_init (mca_oob_tcp_peer_t * peer );
82
87
static int tcp_peer_send_connect_ack (mca_oob_tcp_peer_t * peer );
@@ -86,7 +91,7 @@ static bool tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd,
86
91
void * data , size_t size );
87
92
static void tcp_peer_connected (mca_oob_tcp_peer_t * peer );
88
93
89
- static int tcp_peer_create_socket (mca_oob_tcp_peer_t * peer )
94
+ static int tcp_peer_create_socket (mca_oob_tcp_peer_t * peer , sa_family_t family )
90
95
{
91
96
int flags ;
92
97
@@ -98,8 +103,7 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer)
98
103
"%s oob:tcp:peer creating socket to %s" ,
99
104
ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
100
105
ORTE_NAME_PRINT (& (peer -> name ))));
101
-
102
- peer -> sd = socket (AF_INET , SOCK_STREAM , 0 );
106
+ peer -> sd = socket (family , SOCK_STREAM , 0 );
103
107
if (peer -> sd < 0 ) {
104
108
opal_output (0 , "%s-%s tcp_peer_create_socket: socket() failed: %s (%d)\n" ,
105
109
ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
@@ -155,6 +159,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
155
159
{
156
160
mca_oob_tcp_conn_op_t * op = (mca_oob_tcp_conn_op_t * )cbdata ;
157
161
mca_oob_tcp_peer_t * peer ;
162
+ int current_socket_family = 0 ;
158
163
int rc ;
159
164
opal_socklen_t addrlen = 0 ;
160
165
mca_oob_tcp_addr_t * addr ;
@@ -171,30 +176,12 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
171
176
ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
172
177
ORTE_NAME_PRINT (& (peer -> name )));
173
178
174
- rc = tcp_peer_create_socket (peer );
175
- if (ORTE_SUCCESS != rc ) {
176
- /* FIXME: we cannot create a TCP socket - this spans
177
- * all interfaces, so all we can do is report
178
- * back to the component that this peer is
179
- * unreachable so it can remove the peer
180
- * from its list and report back to the base
181
- * NOTE: this could be a reconnect attempt,
182
- * so we also need to mark any queued messages
183
- * and return them as "unreachable"
184
- */
185
- opal_output (0 , "%s CANNOT CREATE SOCKET" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ));
186
- ORTE_FORCED_TERMINATE (1 );
187
- OBJ_RELEASE (op );
188
- return ;
189
- }
190
-
191
179
opal_output_verbose (OOB_TCP_DEBUG_CONNECT , orte_oob_base_framework .framework_output ,
192
180
"%s orte_tcp_peer_try_connect: "
193
181
"attempting to connect to proc %s on socket %d" ,
194
182
ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
195
183
ORTE_NAME_PRINT (& (peer -> name )), peer -> sd );
196
184
197
- addrlen = sizeof (struct sockaddr_in );
198
185
peer -> active_addr = NULL ;
199
186
OPAL_LIST_FOREACH (addr , & peer -> addrs , mca_oob_tcp_addr_t ) {
200
187
opal_output_verbose (OOB_TCP_DEBUG_CONNECT , orte_oob_base_framework .framework_output ,
@@ -222,9 +209,36 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
222
209
continue ;
223
210
}
224
211
peer -> active_addr = addr ; // record the one we are using
212
+ addrlen = addr -> addr .ss_family == AF_INET6 ? sizeof (struct sockaddr_in6 )
213
+ : sizeof (struct sockaddr_in );
214
+ if (addr -> addr .ss_family != current_socket_family ) {
215
+ if (peer -> sd >= 0 ) {
216
+ CLOSE_THE_SOCKET (peer -> sd );
217
+ peer -> sd = -1 ;
218
+ }
219
+ rc = tcp_peer_create_socket (peer , addr -> addr .ss_family );
220
+ current_socket_family = addr -> addr .ss_family ;
221
+
222
+ if (ORTE_SUCCESS != rc ) {
223
+ /* FIXME: we cannot create a TCP socket - this spans
224
+ * all interfaces, so all we can do is report
225
+ * back to the component that this peer is
226
+ * unreachable so it can remove the peer
227
+ * from its list and report back to the base
228
+ * NOTE: this could be a reconnect attempt,
229
+ * so we also need to mark any queued messages
230
+ * and return them as "unreachable"
231
+ */
232
+ opal_output (0 , "%s CANNOT CREATE SOCKET" , ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ));
233
+ ORTE_FORCED_TERMINATE (1 );
234
+ goto cleanup ;
235
+ }
236
+ }
225
237
retry_connect :
226
238
addr -> retries ++ ;
227
- if (connect (peer -> sd , (struct sockaddr * )& addr -> addr , addrlen ) < 0 ) {
239
+
240
+ rc = connect (peer -> sd , (struct sockaddr * ) & addr -> addr , addrlen );
241
+ if (rc < 0 ) {
228
242
/* non-blocking so wait for completion */
229
243
if (opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK ) {
230
244
opal_output_verbose (OOB_TCP_DEBUG_CONNECT , orte_oob_base_framework .framework_output ,
0 commit comments