@@ -178,6 +178,7 @@ check_provider_attr(struct fi_info *provider_info,
178
178
}
179
179
}
180
180
181
+ #if OPAL_OFI_PCI_DATA_AVAILABLE
181
182
/* Check if a process and a pci device share the same cpuset
182
183
* @param (IN) pci struct fi_pci_attr pci device attributes,
183
184
* used to find hwloc object for device.
@@ -236,6 +237,7 @@ compare_cpusets(hwloc_topology_t topology, struct fi_pci_attr pci)
236
237
hwloc_bitmap_free (proc_cpuset );
237
238
return result ;
238
239
}
240
+ #endif
239
241
240
242
/* Count providers returns the number of providers present in an fi_info list
241
243
* @param (IN) provider_list struct fi_info* list of providers available
@@ -258,41 +260,56 @@ count_providers(struct fi_info* provider_list)
258
260
return num_provider ;
259
261
}
260
262
261
- /* Selects a NIC based on hardware locality to process cpuset and device BDF.
263
+ /* Selects a NIC based on hardware locality between process cpuset and device BDF.
264
+ *
265
+ * Initializes opal_hwloc_topology to access hardware topology if not previously
266
+ * initialized
267
+ *
268
+ * There are 3 main cases that this covers:
269
+ *
270
+ * 1. If the first provider passed into this function is the only valid
271
+ * provider, this provider is returned.
272
+ *
273
+ * 2. If there is more than 1 provider that matches the type of the first
274
+ * provider in the list, and the BDF data
275
+ * is available then a provider is selected based on locality of device
276
+ * cpuset and process cpuset and tries to ensure that processes are distributed
277
+ * evenly across NICs. This has two separate cases:
278
+ *
279
+ * i. There is one or more provider local to the process:
280
+ *
281
+ * (local rank % number of providers of the same type that share the process cpuset)
282
+ * is used to select one of these providers.
283
+ *
284
+ * ii. There is no provider that is local to the process:
285
+ *
286
+ * (local rank % number of providers of the same type)
287
+ * is used to select one of these providers
288
+ *
289
+ * 3. If there is more than 1 providers of the same type in the list, and the BDF data
290
+ * is not available (the ofi version does not support fi_info.nic or the
291
+ * provider does not support BDF) then (local rank % number of providers of the same type)
292
+ * is used to select one of these providers
262
293
*
263
- * @param provider_list (IN) struct fi_info* An initially selected
294
+ * @param provider_list (IN) struct fi_info* An initially selected
264
295
* provider NIC. The provider name and
265
296
* attributes are used to restrict NIC
266
297
* selection. This provider is returned if the
267
298
* NIC selection fails.
268
299
*
269
- * @param local_index (IN) int The local rank of the process. Used to
300
+ * @param local_index (IN) int The local rank of the process. Used to
270
301
* select one valid NIC if there is a case
271
302
* where more than one can be selected. This
272
303
* could occur when more than one provider
273
304
* shares the same cpuset as the process.
274
305
*
275
- * @param provider (OUT) struct fi_info* object with the selected
306
+ * @param provider (OUT) struct fi_info* object with the selected
276
307
* provider if the selection succeeds
277
308
* if the selection fails, returns the fi_info
278
309
* object that was initially provided.
279
310
*
280
- * If there is more than one provider that shares the same cpuset, we use
281
- * (local rank % number of valid providers that share the process cpuset)
282
- * to select one of the local providers.
283
- *
284
- * Likewise, If no providers share the same cpuset as the process, we use
285
- * (local rank % number of valid providers that share the process cpuset)
286
- * to select one of the valid providers.
287
- *
288
- * Initializes opal_hwloc_topology to access hardware topology if not previously
289
- * initialized
290
- *
291
- * If a provider does not provide a BDF, the locality can't be determined and it
292
- * is treated as though it does not share the same cpuset as the process.
293
- *
294
311
* All errors should be recoverable and will return the initially provided
295
- * provider. However, if an error occurs this will no longer guarantee
312
+ * provider. However, if an error occurs we can no longer guarantee
296
313
* that the provider returned is local to the process or that the processes will
297
314
* balance across available NICs.
298
315
*/
@@ -301,7 +318,9 @@ opal_mca_common_ofi_select_provider(struct fi_info *provider_list, int local_ind
301
318
{
302
319
struct fi_info * provider = provider_list , * current_provider = provider_list ;
303
320
struct fi_info * * provider_table ;
321
+ #if OPAL_OFI_PCI_DATA_AVAILABLE
304
322
struct fi_pci_attr pci ;
323
+ #endif
305
324
int ret ;
306
325
unsigned int num_provider = 0 , provider_limit = 0 ;
307
326
bool provider_found = false, cpusets_match = false;
@@ -310,15 +329,19 @@ opal_mca_common_ofi_select_provider(struct fi_info *provider_list, int local_ind
310
329
ret = opal_hwloc_base_get_topology ();
311
330
if (0 > ret ) {
312
331
/* Provider selection can continue but there is no guarantee of locality */
313
- opal_output (1 , "%s:%d:Failed to initialize topology\n" , __FILE__ , __LINE__ );
332
+ opal_output_verbose (1 , opal_common_ofi .output ,
333
+ "%s:%d:Failed to initialize topology\n" ,
334
+ __FILE__ , __LINE__ );
314
335
}
315
336
316
337
provider_limit = count_providers (provider_list );
317
338
318
339
/* Allocate memory for provider table */
319
340
provider_table = calloc (provider_limit , sizeof (struct fi_info * ));
320
341
if (NULL == provider_table ) {
321
- opal_output (1 , "%s:%d:Failed to allocate memory for provider table\n" , __FILE__ , __LINE__ );
342
+ opal_output_verbose (1 , opal_common_ofi .output ,
343
+ "%s:%d:Failed to allocate memory for provider table\n" ,
344
+ __FILE__ , __LINE__ );
322
345
return provider_list ;
323
346
}
324
347
@@ -328,10 +351,12 @@ opal_mca_common_ofi_select_provider(struct fi_info *provider_list, int local_ind
328
351
while (NULL != current_provider ) {
329
352
if (!check_provider_attr (provider , current_provider )) {
330
353
cpusets_match = false;
354
+ #if OPAL_OFI_PCI_DATA_AVAILABLE
331
355
if (NULL != current_provider -> nic ) {
332
356
pci = current_provider -> nic -> bus_attr -> attr .pci ;
333
357
cpusets_match = compare_cpusets (opal_hwloc_topology , pci );
334
358
}
359
+ #endif
335
360
336
361
/* Reset the list if the cpusets match and no other provider was
337
362
* found on the same cpuset as the process.
@@ -357,17 +382,20 @@ opal_mca_common_ofi_select_provider(struct fi_info *provider_list, int local_ind
357
382
provider = provider_table [local_index % num_provider ];
358
383
}
359
384
360
- #if OPAL_DEBUG_ENABLE
385
+ #if OPAL_OFI_PCI_DATA_AVAILABLE
361
386
if (NULL != provider -> nic ) {
362
387
pci = provider -> nic -> bus_attr -> attr .pci ;
363
388
cpusets_match = compare_cpusets (opal_hwloc_topology , pci );
364
389
}
390
+ #endif
365
391
366
- opal_output (10 , "local rank: %d device: %s cpusets match: %s\n" ,
367
- local_index , provider -> domain_attr -> name , cpusets_match ? "true" : "false" );
392
+ #if OPAL_DEBUG_ENABLE
393
+ opal_output_verbose (1 , opal_common_ofi .output ,
394
+ "local rank: %d device: %s cpusets match: %s\n" ,
395
+ local_index , provider -> domain_attr -> name ,
396
+ cpusets_match ? "true" : "false" );
368
397
#endif
369
398
370
- err_free_table :
371
399
free (provider_table );
372
400
return provider ;
373
401
}
0 commit comments