@@ -490,11 +490,59 @@ static int mca_btl_tcp_component_close(void)
490
490
* Create a btl instance and add to modules list.
491
491
*/
492
492
493
- static int mca_btl_tcp_create (int if_kindex , const char * if_name )
493
+ static int mca_btl_tcp_create (const int if_kindex , const char * if_name )
494
494
{
495
495
struct mca_btl_tcp_module_t * btl ;
496
496
char param [256 ];
497
497
int i ;
498
+ struct sockaddr_storage addr ;
499
+ bool found = false;
500
+
501
+ /*
502
+ * Look for an address on the given device (ie, kindex) which
503
+ * isn't disabled by the disable_family option. If there isn't
504
+ * one, skip creating the modules for this interface. We store
505
+ * the address on the module both to publish in the modex and to
506
+ * use as the source address of all packets sent by this module.
507
+ *
508
+ * This still isn't quite right. Better would be to pull apart
509
+ * split_and_resolve and pass the address used to select the
510
+ * device into mca_btl_tcp_create(). This is a cleanup of the
511
+ * logic that's been in use for years, but the case it doesn't
512
+ * cover is (say) only specifing mca_btl_if_include 10.0.0.0/16
513
+ * when the interface has addresses of both 10.0.0.1 and 10.1.0.1;
514
+ * there's absolutely nothing that keeps this code from picking
515
+ * 10.1.0.1 as the one that is published in the modex and used for
516
+ * connection.
517
+ */
518
+ for (i = opal_ifbegin () ; i >= 0 ; i = opal_ifnext (i )) {
519
+ int ret ;
520
+
521
+ if (if_kindex != opal_ifindextokindex (i )) {
522
+ continue ;
523
+ }
524
+
525
+ ret = opal_ifindextoaddr (i , (struct sockaddr * )& addr ,
526
+ sizeof (struct sockaddr_storage ));
527
+ if (OPAL_SUCCESS != ret ) {
528
+ return ret ;
529
+ }
530
+
531
+ if (addr .ss_family == AF_INET &&
532
+ 4 != mca_btl_tcp_component .tcp_disable_family ) {
533
+ found = true;
534
+ break ;
535
+ } else if (addr .ss_family == AF_INET6 &&
536
+ 6 != mca_btl_tcp_component .tcp_disable_family ) {
537
+ found = true;
538
+ break ;
539
+ }
540
+ }
541
+ /* if we didn't find an address that works for us on this
542
+ interface, just move on. */
543
+ if (!found ) {
544
+ return OPAL_SUCCESS ;
545
+ }
498
546
499
547
for ( i = 0 ; i < (int )mca_btl_tcp_component .tcp_num_links ; i ++ ) {
500
548
btl = (struct mca_btl_tcp_module_t * )malloc (sizeof (mca_btl_tcp_module_t ));
@@ -513,17 +561,8 @@ static int mca_btl_tcp_create(int if_kindex, const char* if_name)
513
561
btl -> tcp_send_handler = 0 ;
514
562
#endif
515
563
516
- struct sockaddr_storage addr ;
517
- opal_ifkindextoaddr (if_kindex , (struct sockaddr * ) & addr ,
518
- sizeof (struct sockaddr_storage ));
519
- #if OPAL_ENABLE_IPV6
520
- if (addr .ss_family == AF_INET6 ) {
521
- btl -> tcp_ifaddr_6 = addr ;
522
- }
523
- #endif
524
- if (addr .ss_family == AF_INET ) {
525
- btl -> tcp_ifaddr = addr ;
526
- }
564
+ memcpy (& btl -> tcp_ifaddr , & addr , sizeof (struct sockaddr_storage ));
565
+
527
566
/* allow user to specify interface bandwidth */
528
567
sprintf (param , "bandwidth_%s" , if_name );
529
568
mca_btl_tcp_param_register_uint (param , NULL , btl -> super .btl_bandwidth , OPAL_INFO_LVL_5 , & btl -> super .btl_bandwidth );
@@ -567,8 +606,8 @@ static int mca_btl_tcp_create(int if_kindex, const char* if_name)
567
606
opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
568
607
"btl:tcp: %p: if %s kidx %d cnt %i addr %s %s bw %d lt %d\n" ,
569
608
(void * )btl , if_name , (int ) btl -> tcp_ifkindex , i ,
570
- opal_net_get_hostname ((struct sockaddr * )& addr ),
571
- (addr .ss_family == AF_INET ) ? "IPv4" : "IPv6" ,
609
+ opal_net_get_hostname ((struct sockaddr * )& btl -> tcp_ifaddr ),
610
+ (btl -> tcp_ifaddr .ss_family == AF_INET ) ? "IPv4" : "IPv6" ,
572
611
btl -> super .btl_bandwidth , btl -> super .btl_latency );
573
612
}
574
613
return OPAL_SUCCESS ;
@@ -721,6 +760,7 @@ static int mca_btl_tcp_component_create_instances(void)
721
760
*/
722
761
for (if_index = opal_ifbegin (); if_index >= 0 ; if_index = opal_ifnext (if_index )){
723
762
int index = opal_ifindextokindex (if_index );
763
+
724
764
if (index > 0 ) {
725
765
bool want_this_if = true;
726
766
@@ -1121,97 +1161,64 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family)
1121
1161
1122
1162
static int mca_btl_tcp_component_exchange (void )
1123
1163
{
1124
- int rc = 0 , index ;
1125
- size_t i = 0 ;
1126
- size_t size = mca_btl_tcp_component .tcp_addr_count *
1127
- mca_btl_tcp_component .tcp_num_links * sizeof (mca_btl_tcp_addr_t );
1128
- /* adi@2007-04-12:
1129
- *
1130
- * We'll need to explain things a bit here:
1131
- * 1. We normally have as many BTLs as physical NICs.
1132
- * 2. With num_links, we now have num_btl = num_links * #NICs
1133
- * 3. we might have more than one address per NIC
1134
- */
1135
- size_t xfer_size = 0 ; /* real size to transfer (may differ from 'size') */
1136
- size_t current_addr = 0 ;
1137
-
1138
- if (mca_btl_tcp_component .tcp_num_btls != 0 ) {
1139
- char ifn [32 ];
1140
- mca_btl_tcp_addr_t * addrs = (mca_btl_tcp_addr_t * )malloc (size );
1141
- memset (addrs , 0 , size );
1142
-
1143
- /* here we start populating our addresses */
1144
- for ( i = 0 ; i < mca_btl_tcp_component .tcp_num_btls ; i ++ ) {
1145
- for (index = opal_ifbegin (); index >= 0 ;
1146
- index = opal_ifnext (index )) {
1147
- struct sockaddr_storage my_ss ;
1148
-
1149
- /* Look if the module's address belongs to this
1150
- * kernel IP interface. If not, go to next address.
1151
- */
1152
- if (opal_ifindextokindex (index ) !=
1153
- mca_btl_tcp_component .tcp_btls [i ]-> tcp_ifkindex ) {
1154
- continue ;
1155
- }
1156
-
1157
- opal_ifindextoname (index , ifn , sizeof (ifn ));
1158
- opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1159
- "btl: tcp: component_exchange: examining interface %s" ,
1160
- ifn );
1161
- if (OPAL_SUCCESS !=
1162
- opal_ifindextoaddr (index , (struct sockaddr * ) & my_ss ,
1163
- sizeof (my_ss ))) {
1164
- opal_output (0 ,
1165
- "btl_tcp_component: problems getting address for index %i (kernel index %i)\n" ,
1166
- index , opal_ifindextokindex (index ));
1167
- continue ;
1168
- }
1164
+ int rc ;
1165
+ size_t i ;
1166
+ size_t num_btls = mca_btl_tcp_component .tcp_num_btls ;
1167
+ size_t size = num_btls * sizeof (mca_btl_tcp_modex_addr_t );
1168
+ mca_btl_tcp_modex_addr_t * addrs ;
1169
+
1170
+ if (num_btls <= 0 ) {
1171
+ return 0 ;
1172
+ }
1173
+
1174
+ addrs = (mca_btl_tcp_modex_addr_t * )malloc (size );
1175
+ if (NULL == addrs ) {
1176
+ return OPAL_ERR_OUT_OF_RESOURCE ;
1177
+ }
1178
+ memset (addrs , 0 , size );
1179
+
1180
+ for (i = 0 ; i < num_btls ; i ++ ) {
1181
+ struct mca_btl_tcp_module_t * btl = mca_btl_tcp_component .tcp_btls [i ];
1182
+ struct sockaddr * addr = (struct sockaddr * )& (btl -> tcp_ifaddr );
1169
1183
1170
1184
#if OPAL_ENABLE_IPV6
1171
- if ((AF_INET6 == my_ss .ss_family ) &&
1172
- (6 != mca_btl_tcp_component .tcp_disable_family )) {
1173
- memcpy (& addrs [current_addr ].addr_inet ,
1174
- & ((struct sockaddr_in6 * )& my_ss )-> sin6_addr ,
1175
- sizeof (addrs [0 ].addr_inet ));
1176
- addrs [current_addr ].addr_port =
1177
- mca_btl_tcp_component .tcp6_listen_port ;
1178
- addrs [current_addr ].addr_family = MCA_BTL_TCP_AF_INET6 ;
1179
- xfer_size += sizeof (mca_btl_tcp_addr_t );
1180
- addrs [current_addr ].addr_inuse = 0 ;
1181
- addrs [current_addr ].addr_ifkindex =
1182
- opal_ifindextokindex (index );
1183
- current_addr ++ ;
1184
- opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1185
- "btl: tcp: component_exchange: "
1186
- "%s IPv6 %s" , ifn ,
1187
- opal_net_get_hostname ((struct sockaddr * )& my_ss ));
1188
- } else
1185
+ if (AF_INET6 == addr -> sa_family ) {
1186
+ struct sockaddr_in6 * inaddr6 = (struct sockaddr_in6 * )addr ;
1187
+
1188
+ memcpy (& addrs [i ].addr , & (inaddr6 -> sin6_addr ),
1189
+ sizeof (struct in6_addr ));
1190
+ addrs [i ].addr_port = mca_btl_tcp_component .tcp6_listen_port ;
1191
+ addrs [i ].addr_ifkindex = btl -> tcp_ifkindex ;
1192
+ addrs [i ].addr_family = MCA_BTL_TCP_AF_INET6 ;
1193
+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
1194
+ "btl: tcp: exchange: %d %d IPv6 %s" ,
1195
+ (int )i , btl -> tcp_ifkindex ,
1196
+ opal_net_get_hostname (addr ));
1197
+ } else
1189
1198
#endif
1190
- if ((AF_INET == my_ss .ss_family ) &&
1191
- (4 != mca_btl_tcp_component .tcp_disable_family )) {
1192
- memcpy (& addrs [current_addr ].addr_inet ,
1193
- & ((struct sockaddr_in * )& my_ss )-> sin_addr ,
1194
- sizeof (struct in_addr ));
1195
- addrs [current_addr ].addr_port =
1196
- mca_btl_tcp_component .tcp_listen_port ;
1197
- addrs [current_addr ].addr_family = MCA_BTL_TCP_AF_INET ;
1198
- xfer_size += sizeof (mca_btl_tcp_addr_t );
1199
- addrs [current_addr ].addr_inuse = 0 ;
1200
- addrs [current_addr ].addr_ifkindex =
1201
- opal_ifindextokindex (index );
1202
- current_addr ++ ;
1203
- opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1204
- "btl: tcp: component_exchange: "
1205
- "%s IPv4 %s" , ifn ,
1206
- opal_net_get_hostname ((struct sockaddr * )& my_ss ));
1207
- }
1208
- } /* end of for opal_ifbegin() */
1209
- } /* end of for tcp_num_btls */
1210
- OPAL_MODEX_SEND (rc , OPAL_PMIX_GLOBAL ,
1211
- & mca_btl_tcp_component .super .btl_version ,
1212
- addrs , xfer_size );
1213
- free (addrs );
1214
- } /* end if */
1199
+ if (AF_INET == addr -> sa_family ) {
1200
+ struct sockaddr_in * inaddr = (struct sockaddr_in * )addr ;
1201
+
1202
+ memcpy (& addrs [i ].addr , & (inaddr -> sin_addr ),
1203
+ sizeof (struct in_addr ));
1204
+ addrs [i ].addr_port = mca_btl_tcp_component .tcp_listen_port ;
1205
+ addrs [i ].addr_ifkindex = btl -> tcp_ifkindex ;
1206
+ addrs [i ].addr_family = MCA_BTL_TCP_AF_INET ;
1207
+ opal_output_verbose (5 , opal_btl_base_framework .framework_output ,
1208
+ "btl: tcp: exchange: %d %d IPv4 %s" ,
1209
+ (int )i , btl -> tcp_ifkindex ,
1210
+ opal_net_get_hostname (addr ));
1211
+ } else {
1212
+ BTL_ERROR (("Unexpected address family: %d" , addr -> sa_family ));
1213
+ return OPAL_ERR_BAD_PARAM ;
1214
+ }
1215
+ }
1216
+
1217
+ OPAL_MODEX_SEND (rc , OPAL_PMIX_GLOBAL ,
1218
+ & mca_btl_tcp_component .super .btl_version ,
1219
+ addrs , size );
1220
+ free (addrs );
1221
+
1215
1222
return rc ;
1216
1223
}
1217
1224
0 commit comments