Skip to content

Commit bf36d24

Browse files
committed
ofi: match common and MCA interfaces
Rename initialization calls to match the MCA terminology and restructure code to match the calling pattern of normal components (so during component_register, OFI components should call common_ofi_mca_register). Signed-off-by: Brian Barrett <bbarrett@amazon.com>
1 parent 348152a commit bf36d24

File tree

4 files changed

+62
-92
lines changed

4 files changed

+62
-92
lines changed

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,7 @@ ompi_mtl_ofi_component_register(void)
254254
MCA_BASE_VAR_SCOPE_READONLY,
255255
&ompi_mtl_ofi.num_ofi_contexts);
256256

257-
opal_common_ofi_register_mca_variables(&mca_mtl_ofi_component.super.mtl_version);
258-
259-
return OMPI_SUCCESS;
257+
return opal_common_ofi_mca_register(&mca_mtl_ofi_component.super.mtl_version);
260258
}
261259

262260

@@ -285,7 +283,7 @@ ompi_mtl_ofi_component_open(void)
285283
"provider_exclude")) {
286284
return OMPI_ERR_NOT_AVAILABLE;
287285
}
288-
return opal_common_ofi_init();
286+
return opal_common_ofi_open();
289287
}
290288

291289
static int
@@ -302,9 +300,7 @@ ompi_mtl_ofi_component_close(void)
302300
#if OPAL_CUDA_SUPPORT
303301
mca_common_cuda_fini();
304302
#endif
305-
opal_common_ofi_mca_deregister();
306-
opal_common_ofi_fini();
307-
return OMPI_SUCCESS;
303+
return opal_common_ofi_close();
308304
}
309305

310306
int
@@ -582,8 +578,6 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
582578
int universe_size;
583579
char *univ_size_str;
584580

585-
opal_common_ofi_mca_register();
586-
587581
opal_output_verbose(1, opal_common_ofi.output,
588582
"%s:%d: mtl:ofi:provider_include = \"%s\"\n",
589583
__FILE__, __LINE__, *opal_common_ofi.prov_include);

opal/mca/btl/ofi/btl_ofi_component.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ static int validate_info(struct fi_info *info, uint64_t required_caps, char **in
123123
/* Register the MCA parameters */
124124
static int mca_btl_ofi_component_register(void)
125125
{
126+
int ret;
126127
char *msg;
127128
mca_btl_ofi_module_t *module = &mca_btl_ofi_module_template;
128129

@@ -191,27 +192,30 @@ static int mca_btl_ofi_component_register(void)
191192
/* for now we want this component to lose to the MTL. */
192193
module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 50;
193194

194-
opal_common_ofi_register_mca_variables(&mca_btl_ofi_component.super.btl_version);
195+
ret = opal_common_ofi_mca_register(&mca_btl_ofi_component.super.btl_version);
196+
if (OPAL_SUCCESS != ret) {
197+
return ret;
198+
}
195199

196200
return mca_btl_base_param_register(&mca_btl_ofi_component.super.btl_version, &module->super);
197201
}
198202

199203
static int mca_btl_ofi_component_open(void)
200204
{
201205
mca_btl_ofi_component.module_count = 0;
202-
return opal_common_ofi_init();
206+
return opal_common_ofi_open();
203207
}
204208

205209
/*
206210
* component cleanup - sanity checking of queue lengths
207211
*/
208212
static int mca_btl_ofi_component_close(void)
209213
{
210-
opal_common_ofi_mca_deregister();
211-
opal_common_ofi_fini();
214+
int ret;
215+
ret = opal_common_ofi_close();
212216
/* If we don't sleep, sockets provider freaks out. Ummm this is a scary comment */
213217
sleep(1);
214-
return OPAL_SUCCESS;
218+
return ret;
215219
}
216220

217221
void mca_btl_ofi_exit(void)
@@ -259,8 +263,6 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
259263
struct fi_domain_attr domain_attr = {0};
260264
uint64_t required_caps;
261265

262-
opal_common_ofi_mca_register();
263-
264266
switch (mca_btl_ofi_component.mode) {
265267

266268
case MCA_BTL_OFI_MODE_TWO_SIDED:

opal/mca/common/ofi/common_ofi.c

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,9 @@
3737

3838
opal_common_ofi_module_t opal_common_ofi = {.prov_include = NULL,
3939
.prov_exclude = NULL,
40-
.registered = 0,
41-
.verbose = 0};
42-
40+
.output = -1};
4341
static const char default_prov_exclude_list[] = "shm,sockets,tcp,udp,rstream,usnic";
4442
static opal_mutex_t opal_common_ofi_mutex = OPAL_MUTEX_STATIC_INIT;
45-
static bool opal_common_ofi_initialized = false;
4643
static int opal_common_ofi_init_ref_cnt = 0;
4744

4845
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
@@ -99,12 +96,11 @@ static void opal_common_ofi_mem_release_cb(void *buf, size_t length,
9996

10097
#endif /* HAVE_STRUCT_FI_OPS_MEM_MONITOR */
10198

102-
int opal_common_ofi_init(void)
99+
int opal_common_ofi_open(void)
103100
{
104101
int ret;
105102

106-
opal_common_ofi_init_ref_cnt++;
107-
if (opal_common_ofi_initialized) {
103+
if ((opal_common_ofi_init_ref_cnt++) > 0) {
108104
return OPAL_SUCCESS;
109105
}
110106
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
@@ -146,7 +142,6 @@ int opal_common_ofi_init(void)
146142
goto err;
147143
}
148144
opal_mem_hooks_register_release(opal_common_ofi_mem_release_cb, NULL);
149-
opal_common_ofi_initialized = true;
150145

151146
return OPAL_SUCCESS;
152147
err:
@@ -157,23 +152,35 @@ int opal_common_ofi_init(void)
157152
free(opal_common_ofi_monitor);
158153
}
159154

155+
opal_common_ofi_init_ref_cnt--;
156+
160157
return OPAL_ERROR;
161158
#else
162-
opal_common_ofi_initialized = true;
163159
return OPAL_SUCCESS;
164160
#endif
165161
}
166162

167-
int opal_common_ofi_fini(void)
163+
int opal_common_ofi_close(void)
168164
{
169-
if (opal_common_ofi_initialized && !--opal_common_ofi_init_ref_cnt) {
170-
#if OPAL_OFI_IMPORT_MONITOR_SUPPORT
171-
opal_mem_hooks_unregister_release(opal_common_ofi_mem_release_cb);
172-
fi_close(opal_common_ofi_cache_fid);
173-
fi_close(&opal_common_ofi_monitor->fid);
174-
free(opal_common_ofi_monitor);
165+
int ret;
166+
167+
if ((--opal_common_ofi_init_ref_cnt) > 0) {
168+
return OPAL_SUCCESS;
169+
}
170+
171+
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
172+
opal_mem_hooks_unregister_release(opal_common_ofi_mem_release_cb);
173+
fi_close(opal_common_ofi_cache_fid);
174+
fi_close(&opal_common_ofi_monitor->fid);
175+
free(opal_common_ofi_monitor);
175176
#endif
176-
opal_common_ofi_initialized = false;
177+
178+
if (opal_common_ofi.output != -1) {
179+
opal_output_close(opal_common_ofi.output);
180+
opal_common_ofi.output = -1;
181+
if (OPAL_SUCCESS != ret) {
182+
return ret;
183+
}
177184
}
178185

179186
return OPAL_SUCCESS;
@@ -198,11 +205,12 @@ int opal_common_ofi_is_in_list(char **list, char *item)
198205
return 0;
199206
}
200207

201-
int opal_common_ofi_register_mca_variables(const mca_base_component_t *component)
208+
int opal_common_ofi_mca_register(const mca_base_component_t *component)
202209
{
203210
static int include_index;
204211
static int exclude_index;
205212
static int verbose_index;
213+
int verbose;
206214
int param;
207215

208216
if (fi_version() < FI_VERSION(1, 0)) {
@@ -260,7 +268,7 @@ int opal_common_ofi_register_mca_variables(const mca_base_component_t *component
260268
MCA_BASE_VAR_TYPE_INT, NULL, 0,
261269
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
262270
MCA_BASE_VAR_SCOPE_LOCAL,
263-
&opal_common_ofi.verbose);
271+
&verbose);
264272
} else {
265273
verbose_index = param;
266274
}
@@ -277,32 +285,14 @@ int opal_common_ofi_register_mca_variables(const mca_base_component_t *component
277285
"verbose", 0);
278286
}
279287

280-
OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex);
281-
282-
return OPAL_SUCCESS;
283-
}
284-
285-
void opal_common_ofi_mca_register(void)
286-
{
287-
opal_common_ofi.registered++;
288-
if (opal_common_ofi.registered > 1) {
289-
opal_output_set_verbosity(opal_common_ofi.output, opal_common_ofi.verbose);
290-
return;
288+
if (opal_common_ofi.output == -1) {
289+
opal_common_ofi.output = opal_output_open(NULL);
290+
opal_output_set_verbosity(opal_common_ofi.output, verbose);
291291
}
292292

293-
opal_common_ofi.output = opal_output_open(NULL);
294-
opal_output_set_verbosity(opal_common_ofi.output, opal_common_ofi.verbose);
295-
}
293+
OPAL_THREAD_UNLOCK(&opal_common_ofi_mutex);
296294

297-
void opal_common_ofi_mca_deregister(void)
298-
{
299-
/* unregister only on last deregister */
300-
opal_common_ofi.registered--;
301-
assert(opal_common_ofi.registered >= 0);
302-
if (opal_common_ofi.registered) {
303-
return;
304-
}
305-
opal_output_close(opal_common_ofi.output);
295+
return OPAL_SUCCESS;
306296
}
307297

308298
/* check that the tx attributes match */

opal/mca/common/ofi/common_ofi.h

Lines changed: 18 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26,46 +26,46 @@ BEGIN_C_DECLS
2626
typedef struct opal_common_ofi_module {
2727
char **prov_include;
2828
char **prov_exclude;
29-
int verbose;
30-
int registered;
3129
int output;
3230
} opal_common_ofi_module_t;
3331

3432
extern opal_common_ofi_module_t opal_common_ofi;
3533

36-
3734
/**
38-
* Register component-specialized MCA variables
35+
* Common MCA registration
3936
*
40-
* Register MCA variables common to all OFI components on behalf of
41-
* the calling component. Expected to be called during
42-
* component_register for all OFI-related components.
37+
* Common MCA registration handlinge. After calling this function,
38+
* \code opal_common_ofi.output will be properly initialized.
4339
*
4440
* @param component (IN) OFI component being initialized
4541
*
4642
* @returns OPAL_SUCCESS on success, OPAL error code on failure
4743
*/
48-
OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_component_t *component);
44+
OPAL_DECLSPEC int opal_common_ofi_mca_register(const mca_base_component_t *component);
4945

5046
/**
51-
* Common MCA registration
47+
* Initializes common objects for libfabric
5248
*
53-
* Common MCA registration handlinge. After calling this function,
54-
* \code opal_common_ofi.output will be properly initialized.
49+
* Initialize common libfabric interface. This should be called from
50+
* any other OFI component's component_open() call.
5551
*
56-
* @returns OPAL_SUCCESS on success, OPAL error code on failure
52+
* @note This function is not thread safe and must be called in a
53+
* serial portion of the code.
5754
*/
58-
OPAL_DECLSPEC void opal_common_ofi_mca_register(void);
55+
OPAL_DECLSPEC int opal_common_ofi_open(void);
5956

6057
/**
61-
* Common MCA cleanup
58+
* Cleans up common objects for libfabric
6259
*
63-
* Cleanup for any resources registered during \code
64-
* opal_common_ofi_mca_register().
60+
* Clean up common libfabric interface. This should be called from
61+
* any other OFI component's component_close() call. Resource cleanup
62+
* is reference counted, so any successful call to
63+
* opal_common_ofi_init().
6564
*
66-
* @returns OPAL_SUCCESS on success, OPAL error code on failure
65+
* @note This function is not thread safe and must be called in a
66+
* serial portion of the code.
6767
*/
68-
OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void);
68+
OPAL_DECLSPEC int opal_common_ofi_close(void);
6969

7070
/**
7171
* Search function for provider names
@@ -85,22 +85,6 @@ OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void);
8585
*/
8686
OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item);
8787

88-
/**
89-
* Initializes common objects for libfabric
90-
*
91-
* @note This function is not thread safe and must be called in a
92-
* serial portion of the code.
93-
*/
94-
OPAL_DECLSPEC int opal_common_ofi_init(void);
95-
96-
/**
97-
* Cleans up common objects for libfabric
98-
*
99-
* @note This function is not thread safe and must be called in a
100-
* serial portion of the code.
101-
*/
102-
OPAL_DECLSPEC int opal_common_ofi_fini(void);
103-
10488
/**
10589
* Selects NIC (provider) based on hardware locality
10690
*

0 commit comments

Comments
 (0)