@@ -47,24 +47,32 @@ static int opal_common_ofi_init_ref_cnt = 0;
47
47
48
48
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
49
49
50
+ /*
51
+ * These no-op functions are necessary since libfabric does not allow null
52
+ * function pointers here.
53
+ */
50
54
static int opal_common_ofi_monitor_start (struct fid_mem_monitor * monitor )
51
55
{
52
56
return 0 ;
53
57
}
58
+
54
59
static void opal_common_ofi_monitor_stop (struct fid_mem_monitor * monitor )
55
60
{
56
61
return ;
57
62
}
63
+
58
64
static int opal_common_ofi_monitor_subscribe (struct fid_mem_monitor * monitor ,
59
65
const void * addr , size_t len )
60
66
{
61
67
return 0 ;
62
68
}
69
+
63
70
static void opal_common_ofi_monitor_unsubscribe (struct fid_mem_monitor * monitor ,
64
71
const void * addr , size_t len )
65
72
{
66
73
return ;
67
74
}
75
+
68
76
static bool opal_common_ofi_monitor_valid (struct fid_mem_monitor * monitor ,
69
77
const void * addr , size_t len )
70
78
{
@@ -88,6 +96,7 @@ static void opal_common_ofi_mem_release_cb(void *buf, size_t length,
88
96
opal_common_ofi_monitor -> import_ops -> notify (opal_common_ofi_monitor ,
89
97
buf , length );
90
98
}
99
+
91
100
#endif /* HAVE_STRUCT_FI_OPS_MEM_MONITOR */
92
101
93
102
int opal_common_ofi_init (void )
@@ -107,6 +116,12 @@ int opal_common_ofi_init(void)
107
116
return OPAL_SUCCESS ;
108
117
}
109
118
119
+ /*
120
+ * This cache object doesn't do much, but is necessary for the API to work.
121
+ * It is required to call the fi_import_fid API. This API was introduced in
122
+ * libfabric version 1.13.0 and "mr_cache" is a "well known" name (documented
123
+ * in libfabric) to indicate the type of object that we are trying to open.
124
+ */
110
125
ret = fi_open (FI_VERSION (1 ,13 ), "mr_cache" , NULL , 0 , 0 , & opal_common_ofi_cache_fid , NULL );
111
126
if (ret ) {
112
127
goto err ;
@@ -119,6 +134,13 @@ int opal_common_ofi_init(void)
119
134
120
135
opal_common_ofi_monitor -> fid .fclass = FI_CLASS_MEM_MONITOR ;
121
136
opal_common_ofi_monitor -> export_ops = & opal_common_ofi_export_ops ;
137
+ /*
138
+ * This import_fid call must occur before the libfabric provider creates
139
+ * its memory registration cache. This will typically occur during domain
140
+ * open as it is a domain level object. We put it early in initialization
141
+ * to guarantee this and share the import monitor between the ofi btl
142
+ * and ofi mtl.
143
+ */
122
144
ret = fi_import_fid (opal_common_ofi_cache_fid , & opal_common_ofi_monitor -> fid , 0 );
123
145
if (ret ) {
124
146
goto err ;
@@ -491,61 +513,6 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
491
513
return (uint32_t ) package_ranks [process_info -> my_local_rank ];
492
514
}
493
515
494
- /* Selects a NIC based on hardware locality between process cpuset and device BDF.
495
- *
496
- * Initializes opal_hwloc_topology to access hardware topology if not previously
497
- * initialized
498
- *
499
- * There are 3 main cases that this covers:
500
- *
501
- * 1. If the first provider passed into this function is the only valid
502
- * provider, this provider is returned.
503
- *
504
- * 2. If there is more than 1 provider that matches the type of the first
505
- * provider in the list, and the BDF data
506
- * is available then a provider is selected based on locality of device
507
- * cpuset and process cpuset and tries to ensure that processes are distributed
508
- * evenly across NICs. This has two separate cases:
509
- *
510
- * i. There is one or more provider local to the process:
511
- *
512
- * (local rank % number of providers of the same type that share the process cpuset)
513
- * is used to select one of these providers.
514
- *
515
- * ii. There is no provider that is local to the process:
516
- *
517
- * (local rank % number of providers of the same type)
518
- * is used to select one of these providers
519
- *
520
- * 3. If there is more than 1 providers of the same type in the list, and the BDF data
521
- * is not available (the ofi version does not support fi_info.nic or the
522
- * provider does not support BDF) then (local rank % number of providers of the same type)
523
- * is used to select one of these providers
524
- *
525
- * @param provider_list (IN) struct fi_info* An initially selected
526
- * provider NIC. The provider name and
527
- * attributes are used to restrict NIC
528
- * selection. This provider is returned if the
529
- * NIC selection fails.
530
- *
531
- * @param package_rank (IN) uint32_t The rank of the process. Used to
532
- * select one valid NIC if there is a case
533
- * where more than one can be selected. This
534
- * could occur when more than one provider
535
- * shares the same cpuset as the process.
536
- * This could either be a package_rank if one is
537
- * successfully calculated, or the process id.
538
- *
539
- * @param provider (OUT) struct fi_info* object with the selected
540
- * provider if the selection succeeds
541
- * if the selection fails, returns the fi_info
542
- * object that was initially provided.
543
- *
544
- * All errors should be recoverable and will return the initially provided
545
- * provider. However, if an error occurs we can no longer guarantee
546
- * that the provider returned is local to the process or that the processes will
547
- * balance across available NICs.
548
- */
549
516
struct fi_info * opal_mca_common_ofi_select_provider (struct fi_info * provider_list ,
550
517
opal_process_info_t * process_info )
551
518
{
0 commit comments