Skip to content

Commit 0a21a58

Browse files
authored
Merge pull request #7771 from dancejic/multi
common/ofi: Fixing compilation issue with ofi versions that do not support fi_info.nic
2 parents c074a23 + ae2a447 commit 0a21a58

File tree

2 files changed

+65
-26
lines changed

2 files changed

+65
-26
lines changed

config/opal_check_ofi.m4

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,11 @@ AC_DEFUN([_OPAL_CHECK_OFI],[
8989
OPAL_CHECK_WITHDIR([ofi-libdir], [$with_ofi_libdir],
9090
[libfabric.*])
9191

92-
OPAL_VAR_SCOPE_PUSH([opal_check_ofi_save_CPPFLAGS opal_check_ofi_save_LDFLAGS opal_check_ofi_save_LIBS])
92+
OPAL_VAR_SCOPE_PUSH([opal_check_ofi_save_CPPFLAGS opal_check_ofi_save_LDFLAGS opal_check_ofi_save_LIBS opal_check_fi_info_pci])
9393
opal_check_ofi_save_CPPFLAGS=$CPPFLAGS
9494
opal_check_ofi_save_LDFLAGS=$LDFLAGS
9595
opal_check_ofi_save_LIBS=$LIBS
96+
opal_check_fi_info_pci=0
9697

9798
opal_ofi_happy=yes
9899
AS_IF([test "$with_ofi" = "no"],
@@ -120,6 +121,16 @@ AC_DEFUN([_OPAL_CHECK_OFI],[
120121
[],
121122
[opal_ofi_happy=no])])
122123

124+
AS_IF([test $opal_ofi_happy = yes],
125+
[AC_CHECK_MEMBER([struct fi_info.nic],
126+
[opal_check_fi_info_pci=1],
127+
[opal_check_fi_info_pci=0],
128+
[[#include "$with_ofi/include/rdma/fabric.h"]])])
129+
130+
AC_DEFINE_UNQUOTED([OPAL_OFI_PCI_DATA_AVAILABLE],
131+
[$opal_check_fi_info_pci],
132+
[check if pci data is available in ofi])
133+
123134
CPPFLAGS=$opal_check_ofi_save_CPPFLAGS
124135
LDFLAGS=$opal_check_ofi_save_LDFLAGS
125136
LIBS=$opal_check_ofi_save_LIBS

opal/mca/common/ofi/common_ofi.c

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ check_provider_attr(struct fi_info *provider_info,
178178
}
179179
}
180180

181+
#if OPAL_OFI_PCI_DATA_AVAILABLE
181182
/* Check if a process and a pci device share the same cpuset
182183
* @param (IN) pci struct fi_pci_attr pci device attributes,
183184
* used to find hwloc object for device.
@@ -236,6 +237,7 @@ compare_cpusets(hwloc_topology_t topology, struct fi_pci_attr pci)
236237
hwloc_bitmap_free(proc_cpuset);
237238
return result;
238239
}
240+
#endif
239241

240242
/* Count providers returns the number of providers present in an fi_info list
241243
* @param (IN) provider_list struct fi_info* list of providers available
@@ -258,41 +260,56 @@ count_providers(struct fi_info* provider_list)
258260
return num_provider;
259261
}
260262

261-
/* Selects a NIC based on hardware locality to process cpuset and device BDF.
263+
/* Selects a NIC based on hardware locality between process cpuset and device BDF.
264+
*
265+
* Initializes opal_hwloc_topology to access hardware topology if not previously
266+
* initialized
267+
*
268+
* There are 3 main cases that this covers:
269+
*
270+
* 1. If the first provider passed into this function is the only valid
271+
* provider, this provider is returned.
272+
*
273+
* 2. If there is more than 1 provider that matches the type of the first
274+
* provider in the list, and the BDF data
275+
* is available then a provider is selected based on locality of device
276+
* cpuset and process cpuset and tries to ensure that processes are distributed
277+
* evenly across NICs. This has two separate cases:
278+
*
279+
* i. There is one or more provider local to the process:
280+
*
281+
* (local rank % number of providers of the same type that share the process cpuset)
282+
* is used to select one of these providers.
283+
*
284+
* ii. There is no provider that is local to the process:
285+
*
286+
* (local rank % number of providers of the same type)
287+
* is used to select one of these providers
288+
*
289+
* 3. If there is more than 1 providers of the same type in the list, and the BDF data
290+
* is not available (the ofi version does not support fi_info.nic or the
291+
* provider does not support BDF) then (local rank % number of providers of the same type)
292+
* is used to select one of these providers
262293
*
263-
* @param provider_list (IN) struct fi_info* An initially selected
294+
* @param provider_list (IN) struct fi_info* An initially selected
264295
* provider NIC. The provider name and
265296
* attributes are used to restrict NIC
266297
* selection. This provider is returned if the
267298
* NIC selection fails.
268299
*
269-
* @param local_index (IN) int The local rank of the process. Used to
300+
* @param local_index (IN) int The local rank of the process. Used to
270301
* select one valid NIC if there is a case
271302
* where more than one can be selected. This
272303
* could occur when more than one provider
273304
* shares the same cpuset as the process.
274305
*
275-
* @param provider (OUT) struct fi_info* object with the selected
306+
* @param provider (OUT) struct fi_info* object with the selected
276307
* provider if the selection succeeds
277308
* if the selection fails, returns the fi_info
278309
* object that was initially provided.
279310
*
280-
* If there is more than one provider that shares the same cpuset, we use
281-
* (local rank % number of valid providers that share the process cpuset)
282-
* to select one of the local providers.
283-
*
284-
* Likewise, If no providers share the same cpuset as the process, we use
285-
* (local rank % number of valid providers that share the process cpuset)
286-
* to select one of the valid providers.
287-
*
288-
* Initializes opal_hwloc_topology to access hardware topology if not previously
289-
* initialized
290-
*
291-
* If a provider does not provide a BDF, the locality can't be determined and it
292-
* is treated as though it does not share the same cpuset as the process.
293-
*
294311
* All errors should be recoverable and will return the initially provided
295-
* provider. However, if an error occurs this will no longer guarantee
312+
* provider. However, if an error occurs we can no longer guarantee
296313
* that the provider returned is local to the process or that the processes will
297314
* balance across available NICs.
298315
*/
@@ -301,7 +318,9 @@ opal_mca_common_ofi_select_provider(struct fi_info *provider_list, int local_ind
301318
{
302319
struct fi_info *provider = provider_list, *current_provider = provider_list;
303320
struct fi_info **provider_table;
321+
#if OPAL_OFI_PCI_DATA_AVAILABLE
304322
struct fi_pci_attr pci;
323+
#endif
305324
int ret;
306325
unsigned int num_provider = 0, provider_limit = 0;
307326
bool provider_found = false, cpusets_match = false;
@@ -310,15 +329,19 @@ opal_mca_common_ofi_select_provider(struct fi_info *provider_list, int local_ind
310329
ret = opal_hwloc_base_get_topology();
311330
if (0 > ret) {
312331
/* Provider selection can continue but there is no guarantee of locality */
313-
opal_output(1, "%s:%d:Failed to initialize topology\n", __FILE__, __LINE__);
332+
opal_output_verbose(1, opal_common_ofi.output,
333+
"%s:%d:Failed to initialize topology\n",
334+
__FILE__, __LINE__);
314335
}
315336

316337
provider_limit = count_providers(provider_list);
317338

318339
/* Allocate memory for provider table */
319340
provider_table = calloc(provider_limit, sizeof(struct fi_info*));
320341
if (NULL == provider_table) {
321-
opal_output(1, "%s:%d:Failed to allocate memory for provider table\n", __FILE__, __LINE__);
342+
opal_output_verbose(1, opal_common_ofi.output,
343+
"%s:%d:Failed to allocate memory for provider table\n",
344+
__FILE__, __LINE__);
322345
return provider_list;
323346
}
324347

@@ -328,10 +351,12 @@ opal_mca_common_ofi_select_provider(struct fi_info *provider_list, int local_ind
328351
while (NULL != current_provider) {
329352
if (!check_provider_attr(provider, current_provider)) {
330353
cpusets_match = false;
354+
#if OPAL_OFI_PCI_DATA_AVAILABLE
331355
if (NULL != current_provider->nic) {
332356
pci = current_provider->nic->bus_attr->attr.pci;
333357
cpusets_match = compare_cpusets(opal_hwloc_topology, pci);
334358
}
359+
#endif
335360

336361
/* Reset the list if the cpusets match and no other provider was
337362
* found on the same cpuset as the process.
@@ -357,17 +382,20 @@ opal_mca_common_ofi_select_provider(struct fi_info *provider_list, int local_ind
357382
provider = provider_table[local_index % num_provider];
358383
}
359384

360-
#if OPAL_DEBUG_ENABLE
385+
#if OPAL_OFI_PCI_DATA_AVAILABLE
361386
if (NULL != provider->nic) {
362387
pci = provider->nic->bus_attr->attr.pci;
363388
cpusets_match = compare_cpusets(opal_hwloc_topology, pci);
364389
}
390+
#endif
365391

366-
opal_output(10, "local rank: %d device: %s cpusets match: %s\n",
367-
local_index, provider->domain_attr->name, cpusets_match ? "true" : "false");
392+
#if OPAL_DEBUG_ENABLE
393+
opal_output_verbose(1, opal_common_ofi.output,
394+
"local rank: %d device: %s cpusets match: %s\n",
395+
local_index, provider->domain_attr->name,
396+
cpusets_match ? "true" : "false");
368397
#endif
369398

370-
err_free_table:
371399
free(provider_table);
372400
return provider;
373401
}

0 commit comments

Comments
 (0)