Skip to content

Commit 867e835

Browse files
committed
ofi: prevent issues with multi-plane fabrics
The OFI provider fabric_attr->name field in fi_info provides a description of the fabric. Only units whose fabric name matches can be expected to be able to communicate. Added some potentially paranoid checks of the fi_bus_attr struct for NULL and type == FI_BUS_PCI before OpenMPI proceeds to use to compare PCI NUMA locality. The fi_bus_attr structure is designed as a union and may eventually list additional bus_type values with different attribute structures in the union. Signed-off-by: Goldman, Adam <adam.goldman@intel.com> Signed-off-by: Rimmer, Todd <todd.rimmer@intel.com>
1 parent c0e8cf0 commit 867e835

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

opal/mca/common/ofi/common_ofi.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ static int check_provider_attr(struct fi_info *provider_info, struct fi_info *pr
426426
{
427427
/* make sure both info are the same provider and provide the same attributes */
428428
if (0 == strcmp(provider_info->fabric_attr->prov_name, provider->fabric_attr->prov_name)
429+
&& 0 == strcmp(provider_info->fabric_attr->name, provider->fabric_attr->name)
429430
&& !check_tx_attr(provider_info->tx_attr, provider->tx_attr)
430431
&& !check_rx_attr(provider_info->rx_attr, provider->rx_attr)
431432
&& !check_ep_attr(provider_info->ep_attr, provider->ep_attr)
@@ -631,7 +632,9 @@ struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_lis
631632
if (!check_provider_attr(provider, current_provider)) {
632633
cpusets_match = false;
633634
#if OPAL_OFI_PCI_DATA_AVAILABLE
634-
if (NULL != current_provider->nic) {
635+
if (NULL != current_provider->nic
636+
&& NULL != current_provider->nic->bus_attr
637+
&& current_provider->nic->bus_attr->bus_type == FI_BUS_PCI) {
635638
pci = current_provider->nic->bus_attr->attr.pci;
636639
cpusets_match = compare_cpusets(opal_hwloc_topology, pci);
637640
}
@@ -666,7 +669,9 @@ struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_lis
666669
}
667670

668671
#if OPAL_OFI_PCI_DATA_AVAILABLE
669-
if (NULL != provider->nic) {
672+
if (NULL != provider->nic
673+
&& NULL != current_provider->nic->bus_attr
674+
&& current_provider->nic->bus_attr->bus_type == FI_BUS_PCI) {
670675
pci = provider->nic->bus_attr->attr.pci;
671676
cpusets_match = compare_cpusets(opal_hwloc_topology, pci);
672677
}

0 commit comments

Comments
 (0)