Skip to content

Commit af33612

Browse files
committed
btl/ofi: Add FI_HMEM capability and memory registration
Signed-off-by: William Zhang <wilzhang@amazon.com>
1 parent 4d32d2d commit af33612

File tree

2 files changed

+65
-7
lines changed

2 files changed

+65
-7
lines changed

opal/mca/btl/ofi/btl_ofi_component.c

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ static int validate_info(struct fi_info *info, uint64_t required_caps, char **in
106106
mr_mode = info->domain_attr->mr_mode;
107107

108108
if (!(mr_mode == FI_MR_BASIC || mr_mode == FI_MR_SCALABLE
109-
|| (mr_mode & ~(FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT)) == 0)) {
109+
|| (mr_mode & ~(FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT | FI_MR_HMEM)) == 0)) {
110110
BTL_VERBOSE(("unsupported MR mode"));
111111
return OPAL_ERROR;
112112
}
@@ -256,8 +256,8 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
256256
libfabric_api = fi_version();
257257

258258
/* bail if OFI version is less than 1.5. */
259-
if (libfabric_api < FI_VERSION(1, 5)) {
260-
BTL_VERBOSE(("ofi btl disqualified because OFI version < 1.5."));
259+
if (libfabric_api < FI_VERSION(1, 9)) {
260+
BTL_VERBOSE(("ofi btl disqualified because OFI version < 1.9."));
261261
return NULL;
262262
}
263263

@@ -339,16 +339,43 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
339339

340340
mca_btl_ofi_component.module_count = 0;
341341

342-
/* do the query. */
343-
rc = fi_getinfo(FI_VERSION(1, 5), NULL, NULL, 0, &hints, &info_list);
342+
/* Request device transfer capabilities, separate from required_caps */
343+
hints.caps |= FI_HMEM;
344+
hints.domain_attr->mr_mode |= FI_MR_HMEM;
345+
no_hmem:
346+
347+
/* Do the query. The earliest version that supports FI_HMEM hints is 1.9 */
348+
rc = fi_getinfo(FI_VERSION(1, 9), NULL, NULL, 0, &hints, &info_list);
344349
if (0 != rc) {
350+
if (hints.caps & FI_HMEM) {
351+
/* Try again without FI_HMEM hints */
352+
hints.caps &= ~FI_HMEM;
353+
hints.domain_attr->mr_mode &= ~FI_MR_HMEM;
354+
goto no_hmem;
355+
}
345356
BTL_VERBOSE(("fi_getinfo failed with code %d: %s", rc, fi_strerror(-rc)));
346357
if (NULL != include_list) {
347358
opal_argv_free(include_list);
348359
}
349360
return NULL;
350361
}
351362

363+
/* If we get to this point with FI_HMEM hint set, we want it to be a
364+
* required capability
365+
*/
366+
if (hints.caps & FI_HMEM) {
367+
/* The EFA provider has a bug where it incorrectly advertises FI_HMEM +
368+
* FI_ATOMIC capability without being able to provide that support in
369+
* versions before libfabric 1.18.0
370+
*/
371+
if (libfabric_api < FI_VERSION(1, 18) && !strncasecmp(info_list->fabric_attr->prov_name, "efa", 3)) {
372+
hints.caps &= ~FI_HMEM;
373+
hints.domain_attr->mr_mode &= ~FI_MR_HMEM;
374+
goto no_hmem;
375+
}
376+
required_caps |= FI_HMEM;
377+
}
378+
352379
/* count the number of resources/ */
353380
info = info_list;
354381
while (info) {

opal/mca/btl/ofi/btl_ofi_module.c

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include "opal_config.h"
2929
#include "opal/class/opal_bitmap.h"
3030
#include "opal/datatype/opal_convertor.h"
31+
#include "opal/mca/accelerator/accelerator.h"
32+
#include "opal/mca/accelerator/base/base.h"
3133
#include "opal/mca/btl/btl.h"
3234
#include "opal/mca/mpool/base/base.h"
3335
#include "opal/mca/mpool/mpool.h"
@@ -234,14 +236,43 @@ static int mca_btl_ofi_deregister_mem(mca_btl_base_module_t *btl,
234236
int mca_btl_ofi_reg_mem(void *reg_data, void *base, size_t size,
235237
mca_rcache_base_registration_t *reg)
236238
{
237-
int rc;
239+
int rc, dev_id;
240+
uint64_t flags;
238241
static uint64_t access_flags = FI_REMOTE_WRITE | FI_REMOTE_READ | FI_READ | FI_WRITE;
242+
struct fi_mr_attr attr = {0};
243+
struct iovec iov = {0};
239244

240245
mca_btl_ofi_module_t *btl = (mca_btl_ofi_module_t *) reg_data;
241246
mca_btl_ofi_reg_t *ur = (mca_btl_ofi_reg_t *) reg;
242247

243-
rc = fi_mr_reg(btl->domain, base, size, access_flags, 0, (uint64_t) reg, 0, &ur->ur_mr, NULL);
248+
iov.iov_base = base;
249+
iov.iov_len = size;
250+
attr.mr_iov = &iov;
251+
attr.iov_count = 1;
252+
attr.access = access_flags;
253+
attr.offset = 0;
254+
attr.context = NULL;
255+
256+
if (OPAL_LIKELY(NULL != base)) {
257+
rc = opal_accelerator.check_addr(base, &dev_id, &flags);
258+
if (rc < 0) {
259+
return rc;
260+
} else if (rc > 0 ) {
261+
if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "cuda")) {
262+
attr.iface = FI_HMEM_CUDA;
263+
opal_accelerator.get_device(&attr.device.cuda);
264+
} else if (0 == strcmp(opal_accelerator_base_selected_component.base_version.mca_component_name, "rocm")) {
265+
attr.iface = FI_HMEM_ROCR;
266+
opal_accelerator.get_device(&attr.device.cuda);
267+
} else {
268+
return OPAL_ERROR;
269+
}
270+
}
271+
}
272+
273+
rc = fi_mr_regattr(btl->domain, &attr, 0, &ur->ur_mr);
244274
if (0 != rc) {
275+
ur->ur_mr = NULL;
245276
return OPAL_ERR_OUT_OF_RESOURCE;
246277
}
247278

0 commit comments

Comments
 (0)