@@ -41,12 +41,21 @@ opal_common_ofi_module_t opal_common_ofi = {.prov_include = NULL,
41
41
static const char default_prov_exclude_list [] = "shm,sockets,tcp,udp,rstream,usnic" ;
42
42
static opal_mutex_t opal_common_ofi_mutex = OPAL_MUTEX_STATIC_INIT ;
43
43
static int opal_common_ofi_init_ref_cnt = 0 ;
44
+ static bool opal_common_ofi_installed_memory_monitor = false;
44
45
45
46
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
46
47
47
48
/*
48
- * These no-op functions are necessary since libfabric does not allow null
49
- * function pointers here.
49
+ * Monitor object to export into Libfabric to provide memory release
50
+ * notifications using our own memory hooks framework. Monitors may
51
+ * use the subscribe/unsubscribe notifications to reduce unnecessary
52
+ * notifications, but are not required to do so. Because patcher
53
+ * notifies about all releases, it is cheaper for us to not filter and
54
+ * this monitor can safely ignore subscribe/unsubscribe notifications.
55
+ *
56
+ * Libfabric requires the object to be fully defined. Unlike most of
57
+ * Open MPI, it does not have NULL function pointer checks in calling
58
+ * code.
50
59
*/
51
60
static int opal_common_ofi_monitor_start (struct fid_mem_monitor * monitor )
52
61
{
@@ -76,8 +85,8 @@ static bool opal_common_ofi_monitor_valid(struct fid_mem_monitor *monitor,
76
85
return true;
77
86
}
78
87
79
- static struct fid_mem_monitor * opal_common_ofi_monitor ;
80
- static struct fid * opal_common_ofi_cache_fid ;
88
+ static struct fid_mem_monitor * opal_common_ofi_monitor = NULL ;
89
+ static struct fid * opal_common_ofi_cache_fid = NULL ;
81
90
static struct fi_ops_mem_monitor opal_common_ofi_export_ops = {
82
91
.size = sizeof (struct fi_ops_mem_monitor ),
83
92
.start = opal_common_ofi_monitor_start ,
@@ -87,6 +96,12 @@ static struct fi_ops_mem_monitor opal_common_ofi_export_ops = {
87
96
.valid = opal_common_ofi_monitor_valid ,
88
97
};
89
98
99
+ /**
100
+ * Callback function from Open MPI memory monitor
101
+ *
102
+ * Translation function between the callback function from Open MPI's
103
+ * memory notifier to the Libfabric memory monitor.
104
+ */
90
105
static void opal_common_ofi_mem_release_cb (void * buf , size_t length ,
91
106
void * cbdata , bool from_alloc )
92
107
{
@@ -96,68 +111,110 @@ static void opal_common_ofi_mem_release_cb(void *buf, size_t length,
96
111
97
112
#endif /* HAVE_STRUCT_FI_OPS_MEM_MONITOR */
98
113
99
- int opal_common_ofi_open (void )
114
+ int opal_common_ofi_export_memory_monitor (void )
100
115
{
101
- int ret ;
116
+ int ret = - FI_ENOSYS ;
102
117
103
- if ((opal_common_ofi_init_ref_cnt ++ ) > 0 ) {
104
- return OPAL_SUCCESS ;
105
- }
106
118
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
119
+ OPAL_THREAD_LOCK (& opal_common_ofi_mutex );
120
+
121
+ if (NULL != opal_common_ofi_cache_fid ) {
122
+ return 0 ;
123
+ }
124
+
125
+ /*
126
+ * While the memory import functionality was introduced in 1.13,
127
+ * some deadlock bugs exist in the 1.13 series. Require version
128
+ * 1.14 before this code is activated. Not activating the code
129
+ * should not break any functionality directly, but may lead to
130
+ * sub-optimal memory monitors being used in Libfabric, as Open
131
+ * MPI will almost certainly install a patcher first.
132
+ */
133
+ if (FI_VERSION_LT (fi_version (), FI_VERSION (1 , 14 ))) {
134
+ ret = - FI_ENOSYS ;
135
+ goto err ;
136
+ }
107
137
108
- mca_base_framework_open (& opal_memory_base_framework , 0 );
138
+ ret = mca_base_framework_open (& opal_memory_base_framework , 0 );
139
+ if (OPAL_SUCCESS != ret ) {
140
+ ret = - FI_ENOSYS ;
141
+ goto err ;
142
+ }
109
143
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT )
110
144
!= (((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT ))
111
145
& opal_mem_hooks_support_level ())) {
112
- return OPAL_SUCCESS ;
146
+ ret = - FI_ENOSYS ;
147
+ goto err ;
113
148
}
114
149
115
150
/*
116
- * This cache object doesn't do much, but is necessary for the API to work.
117
- * It is required to call the fi_import_fid API. This API was introduced in
118
- * libfabric version 1.13.0 and "mr_cache" is a "well known" name (documented
119
- * in libfabric) to indicate the type of object that we are trying to open.
151
+ * The monitor import object has the well known name "mr_cache"
152
+ * and was introduced in Libfabric 1.13
120
153
*/
121
- ret = fi_open (FI_VERSION (1 ,13 ), "mr_cache" , NULL , 0 , 0 , & opal_common_ofi_cache_fid , NULL );
122
- if (ret ) {
154
+ ret = fi_open (FI_VERSION (1 ,13 ), "mr_cache" , NULL , 0 , 0 ,
155
+ & opal_common_ofi_cache_fid , NULL );
156
+ if (0 != ret ) {
123
157
goto err ;
124
158
}
125
159
126
160
opal_common_ofi_monitor = calloc (1 , sizeof (* opal_common_ofi_monitor ));
127
- if (!opal_common_ofi_monitor ) {
161
+ if (NULL == opal_common_ofi_monitor ) {
162
+ ret = - FI_ENOMEM ;
128
163
goto err ;
129
164
}
130
165
131
166
opal_common_ofi_monitor -> fid .fclass = FI_CLASS_MEM_MONITOR ;
132
167
opal_common_ofi_monitor -> export_ops = & opal_common_ofi_export_ops ;
133
- /*
134
- * This import_fid call must occur before the libfabric provider creates
135
- * its memory registration cache. This will typically occur during domain
136
- * open as it is a domain level object. We put it early in initialization
137
- * to guarantee this and share the import monitor between the ofi btl
138
- * and ofi mtl.
139
- */
140
- ret = fi_import_fid (opal_common_ofi_cache_fid , & opal_common_ofi_monitor -> fid , 0 );
141
- if (ret ) {
168
+ ret = fi_import_fid (opal_common_ofi_cache_fid ,
169
+ & opal_common_ofi_monitor -> fid , 0 );
170
+ if (0 != ret ) {
142
171
goto err ;
143
172
}
144
173
opal_mem_hooks_register_release (opal_common_ofi_mem_release_cb , NULL );
174
+ opal_common_ofi_installed_memory_monitor = true;
175
+
176
+ ret = 0 ;
145
177
146
- return OPAL_SUCCESS ;
147
178
err :
148
- if (opal_common_ofi_cache_fid ) {
149
- fi_close (opal_common_ofi_cache_fid );
179
+ if (0 != ret ) {
180
+ if (NULL != opal_common_ofi_cache_fid ) {
181
+ fi_close (opal_common_ofi_cache_fid );
182
+ }
183
+ if (NULL != opal_common_ofi_monitor ) {
184
+ free (opal_common_ofi_monitor );
185
+ }
150
186
}
151
- if (opal_common_ofi_monitor ) {
187
+
188
+ opal_common_ofi_installed_memory_monitor = false;
189
+
190
+ OPAL_THREAD_UNLOCK (& opal_common_ofi_mutex );
191
+ #endif
192
+
193
+ return ret ;
194
+ }
195
+
196
+ static int opal_common_ofi_remove_memory_monitor (void )
197
+ {
198
+ #ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
199
+ if (opal_common_ofi_installed_memory_monitor ) {
200
+ opal_mem_hooks_unregister_release (opal_common_ofi_mem_release_cb );
201
+ fi_close (opal_common_ofi_cache_fid );
202
+ fi_close (& opal_common_ofi_monitor -> fid );
152
203
free (opal_common_ofi_monitor );
204
+ opal_common_ofi_installed_memory_monitor = false;
153
205
}
206
+ #endif
154
207
155
- opal_common_ofi_init_ref_cnt -- ;
208
+ return OPAL_SUCCESS ;
209
+ }
210
+
211
+ int opal_common_ofi_open (void )
212
+ {
213
+ if ((opal_common_ofi_init_ref_cnt ++ ) > 0 ) {
214
+ return OPAL_SUCCESS ;
215
+ }
156
216
157
- return OPAL_ERROR ;
158
- #else
159
217
return OPAL_SUCCESS ;
160
- #endif
161
218
}
162
219
163
220
int opal_common_ofi_close (void )
@@ -168,14 +225,12 @@ int opal_common_ofi_close(void)
168
225
return OPAL_SUCCESS ;
169
226
}
170
227
171
- #ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
172
- opal_mem_hooks_unregister_release (opal_common_ofi_mem_release_cb );
173
- fi_close (opal_common_ofi_cache_fid );
174
- fi_close (& opal_common_ofi_monitor -> fid );
175
- free (opal_common_ofi_monitor );
176
- #endif
228
+ ret = opal_common_ofi_remove_memory_monitor ();
229
+ if (OPAL_SUCCESS != ret ) {
230
+ return ret ;
231
+ }
177
232
178
- if (opal_common_ofi . output != -1 ) {
233
+ if (-1 != opal_common_ofi . output ) {
179
234
opal_output_close (opal_common_ofi .output );
180
235
opal_common_ofi .output = -1 ;
181
236
if (OPAL_SUCCESS != ret ) {
0 commit comments