Skip to content

Commit 121b492

Browse files
authored
Merge pull request #5837 from hjelmn/uct_update
btl/uct: bug fixes and general improvements
2 parents bb13941 + 39be6ec commit 121b492

File tree

8 files changed

+152
-93
lines changed

8 files changed

+152
-93
lines changed

opal/mca/btl/uct/btl_uct.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,6 @@ struct mca_btl_uct_module_t {
106106
/** large registered frags for packing non-contiguous data */
107107
opal_free_list_t max_frags;
108108

109-
/** RDMA completions */
110-
opal_free_list_t rdma_completions;
111-
112109
/** frags that were waiting on connections that are now ready to send */
113110
opal_list_t pending_frags;
114111
};

opal/mca/btl/uct/btl_uct_amo.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,10 @@ int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
104104
rc = OPAL_SUCCESS;
105105
} else if (UCS_OK == ucs_status) {
106106
rc = 1;
107+
mca_btl_uct_uct_completion_release (comp);
107108
} else {
108109
rc = OPAL_ERR_OUT_OF_RESOURCE;
110+
mca_btl_uct_uct_completion_release (comp);
109111
}
110112

111113
uct_rkey_release (&rkey);
@@ -176,8 +178,10 @@ int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
176178
rc = OPAL_SUCCESS;
177179
} else if (UCS_OK == ucs_status) {
178180
rc = 1;
181+
mca_btl_uct_uct_completion_release (comp);
179182
} else {
180183
rc = OPAL_ERR_OUT_OF_RESOURCE;
184+
mca_btl_uct_uct_completion_release (comp);
181185
}
182186

183187
uct_rkey_release (&rkey);

opal/mca/btl/uct/btl_uct_component.c

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
#include "opal/mca/btl/base/base.h"
3030
#include "opal/mca/hwloc/base/base.h"
3131
#include "opal/util/argv.h"
32+
#include "opal/memoryhooks/memory.h"
33+
#include "opal/mca/memory/base/base.h"
34+
#include <ucm/api/ucm.h>
35+
3236
#include "opal/util/printf.h"
3337

3438
#include <string.h>
@@ -49,13 +53,13 @@ static int mca_btl_uct_component_register(void)
4953
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
5054
&mca_btl_uct_component.memory_domains);
5155

52-
mca_btl_uct_component.allowed_transports = "any";
56+
mca_btl_uct_component.allowed_transports = "dc_mlx5,rc_mlx5,ud,any";
5357
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
54-
"transports", "Comma-delimited list of transports of the form to use."
55-
" The list of transports available can be queried using ucx_info. Special"
56-
"values: any (any available) (default: any)", MCA_BASE_VAR_TYPE_STRING,
57-
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
58-
&mca_btl_uct_component.allowed_transports);
58+
"transports", "Comma-delimited list of transports to use sorted by increasing "
59+
"priority. The list of transports available can be queried using ucx_info. Special"
60+
"values: any (any available) (default: dc_mlx5,rc_mlx5,ud,any)",
61+
MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
62+
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_uct_component.allowed_transports);
5963

6064
mca_btl_uct_component.num_contexts_per_module = 0;
6165
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
@@ -95,6 +99,11 @@ static int mca_btl_uct_component_register(void)
9599
&module->super);
96100
}
97101

102+
static void mca_btl_uct_mem_release_cb(void *buf, size_t length, void *cbdata, bool from_alloc)
103+
{
104+
ucm_vm_munmap(buf, length);
105+
}
106+
98107
static int mca_btl_uct_component_open(void)
99108
{
100109
if (0 == mca_btl_uct_component.num_contexts_per_module) {
@@ -114,6 +123,15 @@ static int mca_btl_uct_component_open(void)
114123
}
115124
}
116125

126+
if (mca_btl_uct_component.num_contexts_per_module > MCA_BTL_UCT_MAX_WORKERS) {
127+
mca_btl_uct_component.num_contexts_per_module = MCA_BTL_UCT_MAX_WORKERS;
128+
}
129+
130+
if (mca_btl_uct_component.disable_ucx_memory_hooks) {
131+
ucm_set_external_event(UCM_EVENT_VM_UNMAPPED);
132+
opal_mem_hooks_register_release(mca_btl_uct_mem_release_cb, NULL);
133+
}
134+
117135
return OPAL_SUCCESS;
118136
}
119137

@@ -123,6 +141,10 @@ static int mca_btl_uct_component_open(void)
123141
*/
124142
static int mca_btl_uct_component_close(void)
125143
{
144+
if (mca_btl_uct_component.disable_ucx_memory_hooks) {
145+
opal_mem_hooks_unregister_release (mca_btl_uct_mem_release_cb);
146+
}
147+
126148
return OPAL_SUCCESS;
127149
}
128150

@@ -249,7 +271,6 @@ static mca_btl_uct_module_t *mca_btl_uct_alloc_module (const char *md_name, mca_
249271
OBJ_CONSTRUCT(&module->short_frags, opal_free_list_t);
250272
OBJ_CONSTRUCT(&module->eager_frags, opal_free_list_t);
251273
OBJ_CONSTRUCT(&module->max_frags, opal_free_list_t);
252-
OBJ_CONSTRUCT(&module->rdma_completions, opal_free_list_t);
253274
OBJ_CONSTRUCT(&module->pending_frags, opal_list_t);
254275
OBJ_CONSTRUCT(&module->lock, opal_mutex_t);
255276

opal/mca/btl/uct/btl_uct_device_context.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
* @param[in] tl btl uct tl pointer
2424
* @param[in] context_id identifier for this context (0..MCA_BTL_UCT_MAX_WORKERS-1)
2525
*/
26-
mca_btl_uct_device_context_t *mca_btl_uct_context_create (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl, int context_id);
26+
mca_btl_uct_device_context_t *mca_btl_uct_context_create (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl, int context_id, bool enable_progress);
2727

2828
/**
2929
* @brief Destroy a device context and release all resources
@@ -91,8 +91,9 @@ mca_btl_uct_module_get_tl_context_specific (mca_btl_uct_module_t *module, mca_bt
9191
if (OPAL_UNLIKELY(NULL == context)) {
9292
mca_btl_uct_device_context_t *new_context;
9393

94-
new_context = mca_btl_uct_context_create (module, tl, context_id);
95-
if (!opal_atomic_compare_exchange_strong_ptr ((opal_atomic_intptr_t *) &tl->uct_dev_contexts[context_id], &context, new_context)) {
94+
new_context = mca_btl_uct_context_create (module, tl, context_id, true);
95+
if (!opal_atomic_compare_exchange_strong_ptr ((opal_atomic_intptr_t *) &tl->uct_dev_contexts[context_id],
96+
(intptr_t *) &context, (intptr_t) new_context)) {
9697
mca_btl_uct_context_destroy (new_context);
9798
} else {
9899
context = new_context;

opal/mca/btl/uct/btl_uct_module.c

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,6 @@
3131
#include "btl_uct_endpoint.h"
3232
#include "btl_uct_am.h"
3333

34-
#include "opal/memoryhooks/memory.h"
35-
#include "opal/mca/memory/base/base.h"
36-
#include <ucm/api/ucm.h>
37-
38-
static void mca_btl_uct_mem_release_cb(void *buf, size_t length, void *cbdata, bool from_alloc)
39-
{
40-
ucm_vm_munmap(buf, length);
41-
}
42-
4334
struct mca_btl_base_endpoint_t *mca_btl_uct_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc)
4435
{
4536
mca_btl_uct_module_t *uct_module = (mca_btl_uct_module_t *) module;
@@ -111,18 +102,6 @@ static int mca_btl_uct_add_procs (mca_btl_base_module_t *btl,
111102
NULL, 0, uct_module->rcache, NULL, NULL);
112103
}
113104

114-
if (rdma_tl) {
115-
rc = opal_free_list_init (&uct_module->rdma_completions, sizeof (mca_btl_uct_uct_completion_t),
116-
opal_cache_line_size, OBJ_CLASS(mca_btl_uct_uct_completion_t),
117-
0, opal_cache_line_size, 0, 4096, 128, NULL, 0, NULL, NULL,
118-
NULL);
119-
}
120-
121-
if (mca_btl_uct_component.disable_ucx_memory_hooks) {
122-
ucm_set_external_event(UCM_EVENT_VM_UNMAPPED);
123-
opal_mem_hooks_register_release(mca_btl_uct_mem_release_cb, NULL);
124-
}
125-
126105
uct_module->initialized = true;
127106
}
128107

@@ -288,10 +267,6 @@ int mca_btl_uct_finalize (mca_btl_base_module_t* btl)
288267
mca_btl_uct_endpoint_t *endpoint;
289268
uint64_t key;
290269

291-
if (mca_btl_uct_component.disable_ucx_memory_hooks) {
292-
opal_mem_hooks_unregister_release (mca_btl_uct_mem_release_cb);
293-
}
294-
295270
/* clean up any leftover endpoints */
296271
OPAL_HASH_TABLE_FOREACH(key, uint64, endpoint, &uct_module->id_to_endpoint) {
297272
OBJ_RELEASE(endpoint);
@@ -300,7 +275,6 @@ int mca_btl_uct_finalize (mca_btl_base_module_t* btl)
300275
OBJ_DESTRUCT(&uct_module->short_frags);
301276
OBJ_DESTRUCT(&uct_module->eager_frags);
302277
OBJ_DESTRUCT(&uct_module->max_frags);
303-
OBJ_DESTRUCT(&uct_module->rdma_completions);
304278
OBJ_DESTRUCT(&uct_module->pending_frags);
305279
OBJ_DESTRUCT(&uct_module->lock);
306280

opal/mca/btl/uct/btl_uct_rdma.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,14 @@ static void mca_btl_uct_uct_completion_construct (mca_btl_uct_uct_completion_t *
3030

3131
OBJ_CLASS_INSTANCE(mca_btl_uct_uct_completion_t, opal_free_list_item_t, mca_btl_uct_uct_completion_construct, NULL);
3232

33+
3334
mca_btl_uct_uct_completion_t *
3435
mca_btl_uct_uct_completion_alloc (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint,
3536
void *local_address, mca_btl_base_registration_handle_t *local_handle,
3637
mca_btl_uct_device_context_t *dev_context, mca_btl_base_rdma_completion_fn_t cbfunc,
3738
void *cbcontext, void *cbdata)
3839
{
39-
mca_btl_uct_uct_completion_t *comp = (mca_btl_uct_uct_completion_t *) opal_free_list_get (&uct_btl->rdma_completions);
40+
mca_btl_uct_uct_completion_t *comp = (mca_btl_uct_uct_completion_t *) opal_free_list_get (&dev_context->rdma_completions);
4041
if (OPAL_LIKELY(NULL != comp)) {
4142
comp->uct_comp.count = 1;
4243
comp->btl = &uct_btl->super;
@@ -55,8 +56,7 @@ mca_btl_uct_uct_completion_alloc (mca_btl_uct_module_t *uct_btl, mca_btl_base_en
5556
void mca_btl_uct_uct_completion_release (mca_btl_uct_uct_completion_t *comp)
5657
{
5758
if (comp) {
58-
mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) comp->btl;
59-
opal_free_list_return (&uct_btl->rdma_completions, &comp->super);
59+
opal_free_list_return (&comp->dev_context->rdma_completions, &comp->super);
6060
}
6161
}
6262

@@ -122,6 +122,8 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
122122
mca_btl_uct_uct_completion_release (comp);
123123
} else if (UCS_INPROGRESS == ucs_status) {
124124
ucs_status = UCS_OK;
125+
} else {
126+
mca_btl_uct_uct_completion_release (comp);
125127
}
126128

127129
BTL_VERBOSE(("get issued. status = %d", ucs_status));
@@ -157,6 +159,8 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
157159
ucs_status_t ucs_status;
158160
uct_rkey_bundle_t rkey;
159161
uct_ep_h ep_handle;
162+
bool use_short = false;
163+
bool use_bcopy = false;
160164
int rc;
161165

162166
BTL_VERBOSE(("performing put operation. local address: %p, length: %lu", local_address, (unsigned long) size));
@@ -177,12 +181,19 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
177181

178182
mca_btl_uct_context_lock (context);
179183

184+
/* determine what UCT prototol should be used */
185+
if (size <= uct_btl->super.btl_put_local_registration_threshold) {
186+
use_short = size <= uct_btl->rdma_tl->uct_iface_attr.cap.put.max_short;
187+
use_bcopy = !use_short;
188+
}
189+
180190
do {
181-
if (size <= uct_btl->rdma_tl->uct_iface_attr.cap.put.max_short) {
191+
if (use_short) {
182192
ucs_status = uct_ep_put_short (ep_handle, local_address, size, remote_address, rkey.rkey);
183-
} else if (size <= uct_btl->super.btl_put_local_registration_threshold) {
193+
} else if (use_bcopy) {
184194
ssize_t tmp = uct_ep_put_bcopy (ep_handle, mca_btl_uct_put_pack,
185-
&(mca_btl_uct_put_pack_args_t) {.local_address = local_address, .size = size},
195+
&(mca_btl_uct_put_pack_args_t) {.local_address = local_address,
196+
.size = size},
186197
remote_address, rkey.rkey);
187198
ucs_status = (tmp == (ssize_t) size) ? UCS_OK : UCS_ERR_NO_RESOURCE;
188199
} else {

0 commit comments

Comments
 (0)