Skip to content

Commit 0475fe9

Browse files
committed
btl/am-rdma: Add external interface
Add a mode for the am-rdma interface where the existing BTL is not updated, but the wrapper functions are exported directly to the caller. This interface allows for specifying required flags (although REMOTE_COMPLETION is the only one supported today) and disabling memory registration requirements for the returned interface. Signed-off-by: Brian Barrett <bbarrett@amazon.com>
1 parent adcb2a1 commit 0475fe9

File tree

2 files changed

+162
-57
lines changed

2 files changed

+162
-57
lines changed

opal/mca/btl/base/btl_base_am_rdma.c

Lines changed: 47 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -64,57 +64,8 @@ static OBJ_CLASS_INSTANCE(am_rdma_component_t, opal_object_t,
6464
am_rdma_component_init, am_rdma_component_fini);
6565

6666

67-
struct mca_btl_base_am_rdma_module_t;
68-
69-
typedef int (*mca_btl_base_am_rdma_module_put_fn_t)(
70-
struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint,
71-
void *local_address, uint64_t remote_address,
72-
struct mca_btl_base_registration_handle_t *local_handle,
73-
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order,
74-
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
75-
76-
typedef int (*mca_btl_base_am_rdma_module_get_fn_t)(
77-
struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint,
78-
void *local_address, uint64_t remote_address,
79-
struct mca_btl_base_registration_handle_t *local_handle,
80-
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order,
81-
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
82-
83-
typedef int (*mca_btl_base_am_rdma_module_atomic_fop64_fn_t)(
84-
struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint,
85-
void *local_address, uint64_t remote_address,
86-
struct mca_btl_base_registration_handle_t *local_handle,
87-
struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
88-
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
89-
void *cbcontext, void *cbdata);
90-
91-
typedef int (*mca_btl_base_am_rdma_module_atomic_cswap64_fn_t)(
92-
struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint,
93-
void *local_address, uint64_t remote_address,
94-
struct mca_btl_base_registration_handle_t *local_handle,
95-
struct mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value,
96-
int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
97-
98-
struct mca_btl_base_am_rdma_module_t {
99-
opal_object_t super;
100-
mca_btl_base_module_t *btl;
101-
bool use_rdma_put;
102-
bool use_rdma_get;
103-
104-
size_t am_btl_put_limit;
105-
size_t am_btl_put_alignment;
106-
size_t am_btl_get_limit;
107-
size_t am_btl_get_alignment;
108-
109-
mca_btl_base_am_rdma_module_put_fn_t am_btl_put;
110-
mca_btl_base_am_rdma_module_get_fn_t am_btl_get;
111-
mca_btl_base_am_rdma_module_atomic_fop64_fn_t am_btl_atomic_fop;
112-
mca_btl_base_am_rdma_module_atomic_cswap64_fn_t am_btl_atomic_cswap;
113-
};
114-
typedef struct mca_btl_base_am_rdma_module_t mca_btl_base_am_rdma_module_t;
115-
116-
static OBJ_CLASS_INSTANCE(mca_btl_base_am_rdma_module_t, opal_object_t,
117-
NULL, NULL);
67+
OBJ_CLASS_INSTANCE(mca_btl_base_am_rdma_module_t, opal_object_t,
68+
NULL, NULL);
11869

11970

12071
/**
@@ -1256,6 +1207,8 @@ static void am_rdma_register_callbacks(void)
12561207

12571208

12581209
static int am_rdma_internal_init(mca_btl_base_module_t *btl,
1210+
uint32_t flags_requested,
1211+
bool no_memory_registration,
12591212
mca_btl_base_am_rdma_module_t **new_module)
12601213
{
12611214
static bool initialized = false;
@@ -1282,6 +1235,21 @@ static int am_rdma_internal_init(mca_btl_base_module_t *btl,
12821235
module->use_rdma_put = !!(btl->btl_flags & MCA_BTL_FLAGS_PUT);
12831236
module->use_rdma_get = !!(btl->btl_flags & MCA_BTL_FLAGS_GET);
12841237

1238+
/* if the requester asked for remote completion and the btl does
1239+
* not provide remove completion, we can not use put.
1240+
*/
1241+
if (!(btl->btl_flags & MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION)) {
1242+
module->use_rdma_put = false;
1243+
}
1244+
1245+
/* if the requester does not want to do memory registration and
1246+
* the BTL requires memory registration, disable the use of RDMA.
1247+
*/
1248+
if (no_memory_registration && NULL != btl->btl_register_mem) {
1249+
module->use_rdma_put = false;
1250+
module->use_rdma_get = false;
1251+
}
1252+
12851253
if (module->use_rdma_get) {
12861254
/* implement operations over get. */
12871255
max_operation_size = btl->btl_get_limit;
@@ -1345,7 +1313,7 @@ int mca_btl_base_am_rdma_init(mca_btl_base_module_t *btl)
13451313
* (even default_component global data) until internal_init returns
13461314
* successfully.
13471315
*/
1348-
ret = am_rdma_internal_init(btl, &am_module);
1316+
ret = am_rdma_internal_init(btl, 0, false, &am_module);
13491317
if (OPAL_SUCCESS != ret) {
13501318
BTL_VERBOSE(("am_rdma_init: btl %p internal_init failure %d",
13511319
(void *)btl, ret));
@@ -1410,3 +1378,30 @@ int mca_btl_base_am_rdma_init(mca_btl_base_module_t *btl)
14101378

14111379
return OPAL_SUCCESS;
14121380
}
1381+
1382+
1383+
int opal_btl_base_am_rdma_create(mca_btl_base_module_t *btl,
1384+
uint32_t flags_requested,
1385+
bool no_memory_registration,
1386+
mca_btl_base_am_rdma_module_t **am_module)
1387+
{
1388+
int ret;
1389+
1390+
BTL_VERBOSE(("am_rdma_create: called for btl %s (%p)",
1391+
btl->btl_component->btl_version.mca_component_name, (void *)btl));
1392+
1393+
ret = am_rdma_internal_init(btl, flags_requested, no_memory_registration, am_module);
1394+
if (OPAL_SUCCESS != ret) {
1395+
BTL_VERBOSE(("am_rdma_create: btl %p internal_init failure %d",
1396+
(void *)btl, ret));
1397+
return ret;
1398+
}
1399+
1400+
return OPAL_SUCCESS;
1401+
}
1402+
1403+
1404+
int opal_btl_base_am_rdma_destroy(mca_btl_base_am_rdma_module_t *am_module)
1405+
{
1406+
return am_rdma_internal_fini(am_module);
1407+
}

opal/mca/btl/base/btl_base_am_rdma.h

Lines changed: 115 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
44
* reserved.
55
* Copyright (c) 2020 Google, LLC. All rights reserved.
6+
* Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights
7+
* reserved.
68
* $COPYRIGHT$
79
*
810
* Additional copyrights may follow
@@ -13,10 +15,46 @@
1315
/**
1416
* This file provides support for active-message (send/recv) based RDMA.
1517
* It can be used with any btl that provides a minimum of send support but
16-
* can also be used with partial-RDMA BTLs (put only, get only, etc). It
17-
* will provide support for any RDMA or atomic operation not currently
18-
* supported by the supplied BTL. For more info see the description of
19-
* mca_btl_base_am_rdma_init.
18+
* can also be used with partial-RDMA BTLs (put only, get only, etc)
19+
* to provide a complete RDMA interface.
20+
*
21+
* There are two modes of using this interface, depending on your
22+
* requirements:
23+
*
24+
* First, this interface can be used to provide a complete
25+
* put/get/atomic interface for BTLs which do not natively provide
26+
* such an interface. In this mode, active message rdma functions are
27+
* only used if the underlying implementation does not already provide
28+
* the required functionality. For example, if a BTL natively
29+
* supports put but not get, the interface would provide an emulated
30+
* get. The registration, completion and atomicity semantics of the
31+
* BTL remain the native interface's capabilities. That is, if the
32+
* native interface does not provide remote completion or atomics that
33+
* are atomic with processor atomics, neither will the interface after
34+
* initializing the am rdma interface for that BTL. This mode will
35+
* likely give better performance than the second mode for transfers
36+
* that fit within the BTL's native semantics. In this mode, the BTL
37+
* interface is updated so that the btl_{put, get, atomic_fop,
38+
* atomic_cswap} function pointers are usage. However, the btl
39+
* capability flags will not be updated to indicate native support of
40+
* the emulated functionality (for example, if btl_get() is emulated,
41+
* MCA_BTL_FLAGS_GET will not be set). Instead, the emulated flags
42+
* will be set (MCA_BTL_FLAGS_PUT_AM, MCA_BTL_FLAGS_GET_AM,
43+
* MCA_BTL_FLAGS_ATOMIC_AM_FOP, etc.).
44+
*
45+
* Second, this interface can be used to provide different
46+
* sementicsthan a BTL natively provides. This mode is not
47+
* transparent to the caller (unlike the first mode). Instead, the
48+
* caller must manage calling the active message put/get/atomic
49+
* interface directly (rather than through the BTL function pointers).
50+
* For interfaces which require strict remote completion or require
51+
* implicit memory registration, this can greatly simplify the code,
52+
* in return for marginally more management complexity and lower
53+
* performance.
54+
*
55+
* While the calling convention and initialization are different, the
56+
* communication routines uses by the active message rdma
57+
* implementation are identical in both modes of operation.
2058
*/
2159

2260
#include "opal_config.h"
@@ -28,14 +66,86 @@
2866
/**
2967
* @brief initialize active-message RDMA/atomic support
3068
*
31-
* @inout btl btl module to augment
69+
* @param btl[in,out] btl module to augment
70+
*
71+
* @retval OPAL_SUCCESS btl successfully updated, btl already
72+
* updated, or btl has all available
73+
* functionality natively.
74+
* @retval OPAL_ERR_TEMP_OUT_OF_RESOURCE Allocating BTL-level data
75+
* structure failed.
3276
*
3377
* This function adds functionality to the btl for any missing RDMA/atomic
3478
* operation. Atomic operations are entirely emulated using send/recv and
3579
* work best with a btl that also has async-progress enabled. Put/get
3680
* support will use either send/recv or get (for put)/put (for get) (if
3781
* available).
82+
*
83+
* Note that calling this function will change the BTL interface.
84+
* Care must be taken to not call this function outside of early
85+
* initialization routines.
3886
*/
3987
int mca_btl_base_am_rdma_init(mca_btl_base_module_t *btl);
4088

89+
struct mca_btl_base_am_rdma_module_t;
90+
91+
typedef int (*mca_btl_base_am_rdma_module_put_fn_t)(
92+
struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint,
93+
void *local_address, uint64_t remote_address,
94+
struct mca_btl_base_registration_handle_t *local_handle,
95+
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order,
96+
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
97+
98+
typedef int (*mca_btl_base_am_rdma_module_get_fn_t)(
99+
struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint,
100+
void *local_address, uint64_t remote_address,
101+
struct mca_btl_base_registration_handle_t *local_handle,
102+
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order,
103+
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
104+
105+
typedef int (*mca_btl_base_am_rdma_module_atomic_fop64_fn_t)(
106+
struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint,
107+
void *local_address, uint64_t remote_address,
108+
struct mca_btl_base_registration_handle_t *local_handle,
109+
struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
110+
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
111+
void *cbcontext, void *cbdata);
112+
113+
typedef int (*mca_btl_base_am_rdma_module_atomic_cswap64_fn_t)(
114+
struct mca_btl_base_am_rdma_module_t *am_btl, struct mca_btl_base_endpoint_t *endpoint,
115+
void *local_address, uint64_t remote_address,
116+
struct mca_btl_base_registration_handle_t *local_handle,
117+
struct mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value,
118+
int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
119+
120+
struct mca_btl_base_am_rdma_module_t {
121+
opal_object_t super;
122+
mca_btl_base_module_t *btl;
123+
bool use_rdma_put;
124+
bool use_rdma_get;
125+
126+
size_t am_btl_put_limit;
127+
size_t am_btl_put_alignment;
128+
size_t am_btl_get_limit;
129+
size_t am_btl_get_alignment;
130+
131+
mca_btl_base_am_rdma_module_put_fn_t am_btl_put;
132+
mca_btl_base_am_rdma_module_get_fn_t am_btl_get;
133+
mca_btl_base_am_rdma_module_atomic_fop64_fn_t am_btl_atomic_fop;
134+
mca_btl_base_am_rdma_module_atomic_cswap64_fn_t am_btl_atomic_cswap;
135+
};
136+
typedef struct mca_btl_base_am_rdma_module_t mca_btl_base_am_rdma_module_t;
137+
138+
OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_base_am_rdma_module_t);
139+
140+
141+
/**
142+
* @brief create active-message RDMA/atomics functions
143+
*/
144+
int opal_btl_base_am_rdma_create(mca_btl_base_module_t *btl,
145+
uint32_t flags_requested,
146+
bool no_memory_registration,
147+
mca_btl_base_am_rdma_module_t **am_module);
148+
149+
int opal_btl_base_am_rdma_destroy(mca_btl_base_am_rdma_module_t *am_module);
150+
41151
#endif /* OPAL_MCA_BTL_BASE_AM_RDMA_H */

0 commit comments

Comments
 (0)