Skip to content

Commit 348152a

Browse files
committed
ofi: Update OFI common documentation
The documentation in the header file for the ofi common code had been neglected, so show some love. Also add some helpful comments in understanding the memory monitor injection code. Signed-off-by: Brian Barrett <bbarrett@amazon.com>
1 parent 947b3fe commit 348152a

File tree

2 files changed

+118
-62
lines changed

2 files changed

+118
-62
lines changed

opal/mca/common/ofi/common_ofi.c

Lines changed: 22 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -47,24 +47,32 @@ static int opal_common_ofi_init_ref_cnt = 0;
4747

4848
#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
4949

50+
/*
51+
* These no-op functions are necessary since libfabric does not allow null
52+
* function pointers here.
53+
*/
5054
static int opal_common_ofi_monitor_start(struct fid_mem_monitor *monitor)
5155
{
5256
return 0;
5357
}
58+
5459
static void opal_common_ofi_monitor_stop(struct fid_mem_monitor *monitor)
5560
{
5661
return;
5762
}
63+
5864
static int opal_common_ofi_monitor_subscribe(struct fid_mem_monitor *monitor,
5965
const void *addr, size_t len)
6066
{
6167
return 0;
6268
}
69+
6370
static void opal_common_ofi_monitor_unsubscribe(struct fid_mem_monitor *monitor,
6471
const void *addr, size_t len)
6572
{
6673
return;
6774
}
75+
6876
static bool opal_common_ofi_monitor_valid(struct fid_mem_monitor *monitor,
6977
const void *addr, size_t len)
7078
{
@@ -88,6 +96,7 @@ static void opal_common_ofi_mem_release_cb(void *buf, size_t length,
8896
opal_common_ofi_monitor->import_ops->notify(opal_common_ofi_monitor,
8997
buf, length);
9098
}
99+
91100
#endif /* HAVE_STRUCT_FI_OPS_MEM_MONITOR */
92101

93102
int opal_common_ofi_init(void)
@@ -107,6 +116,12 @@ int opal_common_ofi_init(void)
107116
return OPAL_SUCCESS;
108117
}
109118

119+
/*
120+
* This cache object doesn't do much, but is necessary for the API to work.
121+
* It is required to call the fi_import_fid API. This API was introduced in
122+
* libfabric version 1.13.0 and "mr_cache" is a "well known" name (documented
123+
* in libfabric) to indicate the type of object that we are trying to open.
124+
*/
110125
ret = fi_open(FI_VERSION(1,13), "mr_cache", NULL, 0, 0, &opal_common_ofi_cache_fid, NULL);
111126
if (ret) {
112127
goto err;
@@ -119,6 +134,13 @@ int opal_common_ofi_init(void)
119134

120135
opal_common_ofi_monitor->fid.fclass = FI_CLASS_MEM_MONITOR;
121136
opal_common_ofi_monitor->export_ops = &opal_common_ofi_export_ops;
137+
/*
138+
* This import_fid call must occur before the libfabric provider creates
139+
* its memory registration cache. This will typically occur during domain
140+
* open as it is a domain level object. We put it early in initialization
141+
* to guarantee this and share the import monitor between the ofi btl
142+
* and ofi mtl.
143+
*/
122144
ret = fi_import_fid(opal_common_ofi_cache_fid, &opal_common_ofi_monitor->fid, 0);
123145
if (ret) {
124146
goto err;
@@ -491,61 +513,6 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
491513
return (uint32_t) package_ranks[process_info->my_local_rank];
492514
}
493515

494-
/* Selects a NIC based on hardware locality between process cpuset and device BDF.
495-
*
496-
* Initializes opal_hwloc_topology to access hardware topology if not previously
497-
* initialized
498-
*
499-
* There are 3 main cases that this covers:
500-
*
501-
* 1. If the first provider passed into this function is the only valid
502-
* provider, this provider is returned.
503-
*
504-
* 2. If there is more than 1 provider that matches the type of the first
505-
* provider in the list, and the BDF data
506-
* is available then a provider is selected based on locality of device
507-
* cpuset and process cpuset and tries to ensure that processes are distributed
508-
* evenly across NICs. This has two separate cases:
509-
*
510-
* i. There is one or more provider local to the process:
511-
*
512-
* (local rank % number of providers of the same type that share the process cpuset)
513-
* is used to select one of these providers.
514-
*
515-
* ii. There is no provider that is local to the process:
516-
*
517-
* (local rank % number of providers of the same type)
518-
* is used to select one of these providers
519-
*
520-
* 3. If there is more than 1 providers of the same type in the list, and the BDF data
521-
* is not available (the ofi version does not support fi_info.nic or the
522-
* provider does not support BDF) then (local rank % number of providers of the same type)
523-
* is used to select one of these providers
524-
*
525-
* @param provider_list (IN) struct fi_info* An initially selected
526-
* provider NIC. The provider name and
527-
* attributes are used to restrict NIC
528-
* selection. This provider is returned if the
529-
* NIC selection fails.
530-
*
531-
* @param package_rank (IN) uint32_t The rank of the process. Used to
532-
* select one valid NIC if there is a case
533-
* where more than one can be selected. This
534-
* could occur when more than one provider
535-
* shares the same cpuset as the process.
536-
* This could either be a package_rank if one is
537-
* successfully calculated, or the process id.
538-
*
539-
* @param provider (OUT) struct fi_info* object with the selected
540-
* provider if the selection succeeds
541-
* if the selection fails, returns the fi_info
542-
* object that was initially provided.
543-
*
544-
* All errors should be recoverable and will return the initially provided
545-
* provider. However, if an error occurs we can no longer guarantee
546-
* that the provider returned is local to the process or that the processes will
547-
* balance across available NICs.
548-
*/
549516
struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_list,
550517
opal_process_info_t *process_info)
551518
{

opal/mca/common/ofi/common_ofi.h

Lines changed: 96 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,47 @@ typedef struct opal_common_ofi_module {
3333

3434
extern opal_common_ofi_module_t opal_common_ofi;
3535

36+
37+
/**
38+
* Register component-specialized MCA variables
39+
*
40+
* Register MCA variables common to all OFI components on behalf of
41+
* the calling component. Expected to be called during
42+
* component_register for all OFI-related components.
43+
*
44+
* @param component (IN) OFI component being initialized
45+
*
46+
* @returns OPAL_SUCCESS on success, OPAL error code on failure
47+
*/
3648
OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_component_t *component);
49+
50+
/**
51+
* Common MCA registration
52+
*
53+
* Common MCA registration handlinge. After calling this function,
54+
* \code opal_common_ofi.output will be properly initialized.
55+
*
56+
* @returns OPAL_SUCCESS on success, OPAL error code on failure
57+
*/
3758
OPAL_DECLSPEC void opal_common_ofi_mca_register(void);
59+
60+
/**
61+
* Common MCA cleanup
62+
*
63+
* Cleanup for any resources registered during \code
64+
* opal_common_ofi_mca_register().
65+
*
66+
* @returns OPAL_SUCCESS on success, OPAL error code on failure
67+
*/
3868
OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void);
3969

40-
/*
70+
/**
71+
* Search function for provider names
72+
*
73+
* This function will take a provider name string and a list of lower
74+
* provider name strings as inputs. It will return true if the lower
75+
* provider in the item string matches a lower provider in the list.
76+
*
4177
* @param list (IN) List of strings corresponding to lower providers.
4278
* @param item (IN) Single string corresponding to a provider.
4379
*
@@ -46,23 +82,76 @@ OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void);
4682
* @return 1 The lower provider of the item string matches
4783
* a string in the item list.
4884
*
49-
* This function will take a provider name string and a list of lower
50-
* provider name strings as inputs. It will return true if the lower
51-
* provider in the item string matches a lower provider in the list.
52-
*
5385
*/
5486
OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item);
5587

56-
/*
88+
/**
5789
* Initializes common objects for libfabric
90+
*
91+
* @note This function is not thread safe and must be called in a
92+
* serial portion of the code.
5893
*/
5994
OPAL_DECLSPEC int opal_common_ofi_init(void);
6095

61-
/*
96+
/**
6297
* Cleans up common objects for libfabric
98+
*
99+
* @note This function is not thread safe and must be called in a
100+
* serial portion of the code.
63101
*/
64102
OPAL_DECLSPEC int opal_common_ofi_fini(void);
65103

104+
/**
105+
* Selects NIC (provider) based on hardware locality
106+
*
107+
* In multi-nic situations, use hardware topology to pick the "best"
108+
* of the selected NICs.
109+
* There are 3 main cases that this covers:
110+
*
111+
* 1. If the first provider passed into this function is the only valid
112+
* provider, this provider is returned.
113+
*
114+
* 2. If there is more than 1 provider that matches the type of the first
115+
* provider in the list, and the BDF data
116+
* is available then a provider is selected based on locality of device
117+
* cpuset and process cpuset and tries to ensure that processes
118+
* are distributed evenly across NICs. This has two separate
119+
* cases:
120+
*
121+
* i. There is one or more provider local to the process:
122+
*
123+
* (local rank % number of providers of the same type
124+
* that share the process cpuset) is used to select one
125+
* of these providers.
126+
*
127+
* ii. There is no provider that is local to the process:
128+
*
129+
* (local rank % number of providers of the same type)
130+
* is used to select one of these providers
131+
*
132+
* 3. If there is more than 1 providers of the same type in the
133+
* list, and the BDF data is not available (the ofi version does
134+
* not support fi_info.nic or the provider does not support BDF)
135+
* then (local rank % number of providers of the same type) is
136+
* used to select one of these providers
137+
*
138+
* @param provider_list (IN) struct fi_info* An initially selected
139+
* provider NIC. The provider name and
140+
* attributes are used to restrict NIC
141+
* selection. This provider is returned if the
142+
* NIC selection fails.
143+
*
144+
* @param provider (OUT) struct fi_info* object with the selected
145+
* provider if the selection succeeds
146+
* if the selection fails, returns the fi_info
147+
* object that was initially provided.
148+
*
149+
* All errors should be recoverable and will return the initially provided
150+
* provider. However, if an error occurs we can no longer guarantee
151+
* that the provider returned is local to the process or that the processes will
152+
* balance across available NICs.
153+
*
154+
*/
66155
OPAL_DECLSPEC struct fi_info *opal_mca_common_ofi_select_provider(struct fi_info *provider_list,
67156
opal_process_info_t *process_info);
68157

0 commit comments

Comments
 (0)