Skip to content

Commit 5ed3871

Browse files
committed
btl/uct: complete rework of descovery and initialization code
There is an issue with btl/uct which prevents the usage of the standard btl_uct_ MCA variables (eager limit, flags, etc). Because of the way the btl was written these values are all determined directly from UCT and can not be changed using the MCA variable interface. To address this issue this commit breaks apart the initialization code and separates out the pieces that are necessary for discovery only. The discovery pieces now use a new set of variables that include the memory domain name and directly control the behavior for BTLs on that memory domain as well as enabling the usage of the btl_uct variable to control the defaults for these variables. Example, using memory domain irdma0 will create variables: btl_uct_irdma0_eager_limit, btl_uct_irdma0_max_send_size, etc. The defaults will be based on what is reported by UCT and the user can set the values to a subset of what UCT reports. For example, if the max send size for the hardware is 8192B then it can be set to anything up to and including that value. The same is true for feature flags, if the hardware supports only some btl atomics or operations the user can specify a subset of them (others will be ignored). Signed-off-by: Nathan Hjelm <hjelmn@google.com>
1 parent 34ae4a8 commit 5ed3871

14 files changed

+1157
-798
lines changed

opal/mca/btl/uct/Makefile.am

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# Copyright (c) 2017 IBM Corporation. All rights reserved.
1414
# Copyright (c) 2017-2018 Los Alamos National Security, LLC. All rights
1515
# reserved.
16+
# Copyright (c) 2025 Google, LLC. All rights reserved.
1617
# $COPYRIGHT$
1718
#
1819
# Additional copyrights may follow
@@ -24,22 +25,30 @@ AM_CPPFLAGS = $(btl_uct_CPPFLAGS)
2425

2526
amca_paramdir = $(AMCA_PARAM_SETS_DIR)
2627

27-
sources = \
28+
headers = \
2829
btl_uct.h \
30+
btl_uct_rdma.h \
31+
btl_uct_endpoint.h \
32+
btl_uct_am.h \
33+
btl_uct_frag.h \
34+
btl_uct_types.h \
35+
btl_uct_device_context.h \
36+
btl_uct_discover.h \
37+
btl_uct_modex.h \
38+
btl_uct_include_list.h
39+
40+
sources = \
2941
btl_uct_module.c \
3042
btl_uct_component.c \
31-
btl_uct_rdma.h \
3243
btl_uct_rdma.c \
33-
btl_uct_endpoint.h \
3444
btl_uct_endpoint.c \
3545
btl_uct_amo.c \
36-
btl_uct_am.h \
3746
btl_uct_am.c \
38-
btl_uct_frag.h \
3947
btl_uct_frag.c \
4048
btl_uct_tl.c \
41-
btl_uct_types.h \
42-
btl_uct_device_context.h
49+
btl_uct_discover.c \
50+
btl_uct_modex.c \
51+
btl_uct_include_list.c
4352

4453
# Make the output library in this directory, and name it either
4554
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
@@ -50,20 +59,22 @@ lib =
5059
lib_sources =
5160
component = mca_btl_uct.la
5261
component_sources = $(sources)
62+
component_headers = $(headers)
5363
else
5464
lib = libmca_btl_uct.la
5565
lib_sources = $(sources)
66+
lib_headers = ${headers}
5667
component =
5768
component_sources =
5869
endif
5970

6071
mcacomponentdir = $(opallibdir)
6172
mcacomponent_LTLIBRARIES = $(component)
62-
mca_btl_uct_la_SOURCES = $(component_sources)
73+
mca_btl_uct_la_SOURCES = $(component_sources) $(component_headers)
6374
mca_btl_uct_la_LDFLAGS = -module -avoid-version $(btl_uct_LDFLAGS)
6475
mca_btl_uct_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_NAME@.la $(btl_uct_LIBS)
6576

6677
noinst_LTLIBRARIES = $(lib)
67-
libmca_btl_uct_la_SOURCES = $(lib_sources)
78+
libmca_btl_uct_la_SOURCES = $(lib_sources) $(lib_headers)
6879
libmca_btl_uct_la_LDFLAGS = -module -avoid-version $(btl_uct_LDFLAGS)
6980
libmca_btl_uct_la_LIBADD = $(btl_uct_LIBS)

opal/mca/btl/uct/btl_uct.h

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,6 @@ struct mca_btl_uct_module_t {
8585
/** transport for RDMA/AMOs */
8686
mca_btl_uct_tl_t *rdma_tl;
8787

88-
/** transport for forming connections (if needed) */
89-
mca_btl_uct_tl_t *conn_tl;
90-
91-
/** array containing the am_tl and rdma_tl */
92-
mca_btl_uct_tl_t *comm_tls[2];
93-
9488
/** registration cache */
9589
mca_rcache_base_module_t *rcache;
9690

@@ -111,6 +105,10 @@ struct mca_btl_uct_module_t {
111105

112106
/** frags that were waiting on connections that are now ready to send */
113107
opal_list_t pending_frags;
108+
109+
/** allowed transports */
110+
char *allowed_transports;
111+
mca_btl_uct_include_list_t allowed_transport_list;
114112
};
115113
typedef struct mca_btl_uct_module_t mca_btl_uct_module_t;
116114

@@ -123,6 +121,9 @@ struct mca_btl_uct_component_t {
123121
/** base BTL component */
124122
mca_btl_base_component_3_0_0_t super;
125123

124+
/** whether the component is initialized. controls cleanup. */
125+
bool initialized;
126+
126127
/** number of TL modules */
127128
int module_count;
128129

@@ -135,7 +136,6 @@ struct mca_btl_uct_component_t {
135136

136137
/** allowed transports */
137138
char *allowed_transports;
138-
mca_btl_uct_include_list_t allowed_transport_list;
139139

140140
/** transports to consider for forming connections */
141141
char *connection_domains;
@@ -155,14 +155,16 @@ struct mca_btl_uct_component_t {
155155
/** connection retry timeout */
156156
unsigned int connection_retry_timeout;
157157

158-
/** alternate connection-only module that can be used if no suitable
159-
* connection tl is found. this is usually a tcp tl. */
160-
mca_btl_uct_module_t *conn_module;
161-
162158
#if UCT_API >= UCT_VERSION(1, 7)
163159
uct_component_h *uct_components;
164160
unsigned num_uct_components;
165161
#endif
162+
163+
/** list of memory domains (btl_uct_md_t) */
164+
opal_list_t md_list;
165+
166+
/** connection transport (if needed). reference is owned by conn_md */
167+
mca_btl_uct_tl_t *conn_tl;
166168
};
167169
typedef struct mca_btl_uct_component_t mca_btl_uct_component_t;
168170

@@ -298,12 +300,16 @@ ucs_status_t mca_btl_uct_am_handler(void *arg, void *data, size_t length, unsign
298300
struct mca_btl_base_endpoint_t *mca_btl_uct_get_ep(struct mca_btl_base_module_t *module,
299301
opal_proc_t *proc);
300302

301-
int mca_btl_uct_query_tls(mca_btl_uct_module_t *module, mca_btl_uct_md_t *md,
302-
uct_tl_resource_desc_t *tl_descs, unsigned tl_count,
303-
bool evaluate_for_conn_only);
303+
int mca_btl_uct_populate_tls(mca_btl_uct_md_t *md, uct_tl_resource_desc_t *tl_descs, unsigned tl_count);
304304
int mca_btl_uct_process_connection_request(mca_btl_uct_module_t *module,
305305
mca_btl_uct_conn_req_t *req);
306306

307+
mca_btl_uct_module_t *mca_btl_uct_alloc_module(mca_btl_uct_md_t *md,
308+
size_t registration_size);
309+
310+
int mca_btl_uct_evaluate_tl(mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl);
311+
int mca_btl_uct_enable_tl_conn(mca_btl_uct_tl_t *tl);
312+
307313
/**
308314
* @brief Checks if a tl is suitable for using for RDMA
309315
*
@@ -344,18 +350,12 @@ static inline bool mca_btl_uct_tl_supports_conn(mca_btl_uct_tl_t *tl)
344350
*/
345351
static inline bool mca_btl_uct_tl_requires_connection_tl(mca_btl_uct_tl_t *tl)
346352
{
353+
if (NULL == tl) {
354+
return false;
355+
}
356+
347357
return !(tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE);
348358
}
349359

350-
/**
351-
* @brief Find the rank of `name` in the include list `list`.
352-
*
353-
* @param[in] name name to find
354-
* @param[in] list list to search
355-
*
356-
* A negative result means the name is not present or the list is negated.
357-
*/
358-
int mca_btl_uct_include_list_rank (const char *name, const mca_btl_uct_include_list_t *list);
359-
360360
END_C_DECLS
361361
#endif

opal/mca/btl/uct/btl_uct_am.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ int mca_btl_uct_send_frag(mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t
233233
}
234234

235235
OPAL_THREAD_LOCK(&uct_btl->lock);
236-
mca_btl_uct_append_pending_frag(uct_btl, frag, context, true);
236+
mca_btl_uct_append_pending_frag(uct_btl, frag, context, /*ready=*/true);
237237
OPAL_THREAD_UNLOCK(&uct_btl->lock);
238238

239239
return OPAL_SUCCESS;
@@ -260,14 +260,14 @@ int mca_btl_uct_send(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
260260
OPAL_THREAD_LOCK(&uct_btl->lock);
261261
/* check one more time in case another thread is completing the connection now */
262262
if (OPAL_SUCCESS != mca_btl_uct_endpoint_test_am(uct_btl, endpoint, context, &ep_handle)) {
263-
mca_btl_uct_append_pending_frag(uct_btl, frag, context, false);
263+
mca_btl_uct_append_pending_frag(uct_btl, frag, context, /*ready=*/false);
264264
OPAL_THREAD_UNLOCK(&uct_btl->lock);
265265
return OPAL_SUCCESS;
266266
}
267267
OPAL_THREAD_UNLOCK(&uct_btl->lock);
268268
}
269269

270-
return mca_btl_uct_send_frag(uct_btl, frag, true);
270+
return mca_btl_uct_send_frag(uct_btl, frag, /*append=*/true);
271271
}
272272

273273
struct mca_btl_uct_sendi_pack_args_t {

0 commit comments

Comments
 (0)