Skip to content

Commit e3c55a7

Browse files
authored
Merge pull request #11929 from hppritcha/ofi_mca_disable_hmem_param
ofi - add MCA parameters to not use FI_HMEM
2 parents d01cad6 + baf882a commit e3c55a7

File tree

4 files changed

+43
-10
lines changed

4 files changed

+43
-10
lines changed

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,15 @@ ompi_mtl_ofi_component_register(void)
251251
MCA_BASE_VAR_SCOPE_READONLY,
252252
&ompi_mtl_ofi.num_ofi_contexts);
253253

254+
ompi_mtl_ofi.disable_hmem = false;
255+
mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
256+
"disable_hmem",
257+
"Disable HMEM usage",
258+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
259+
OPAL_INFO_LVL_3,
260+
MCA_BASE_VAR_SCOPE_READONLY,
261+
&ompi_mtl_ofi.disable_hmem);
262+
254263
return opal_common_ofi_mca_register(&mca_mtl_ofi_component.super.mtl_version);
255264
}
256265

@@ -626,8 +635,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
626635

627636
/* Request device transfer capabilities */
628637
#if defined(FI_HMEM)
629-
hints->caps |= FI_HMEM;
630-
hints->domain_attr->mr_mode |= FI_MR_HMEM | FI_MR_ALLOCATED;
638+
if (false == ompi_mtl_ofi.disable_hmem) {
639+
hints->caps |= FI_HMEM;
640+
hints->domain_attr->mr_mode |= FI_MR_HMEM | FI_MR_ALLOCATED;
641+
}
631642
#endif
632643

633644
no_hmem:
@@ -791,10 +802,17 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
791802

792803
*accelerator_support = false;
793804
#if defined(FI_HMEM)
794-
if (!(prov->caps & FI_HMEM)) {
795-
opal_output_verbose(50, opal_common_ofi.output,
796-
"%s:%d: Libfabric provider does not support device buffers. Continuing with device to host copies.\n",
797-
__FILE__, __LINE__);
805+
if (!(prov->caps & FI_HMEM) || (true == ompi_mtl_ofi.disable_hmem)) {
806+
if (!(prov->caps & FI_HMEM) && (false == ompi_mtl_ofi.disable_hmem)) {
807+
opal_output_verbose(50, opal_common_ofi.output,
808+
"%s:%d: Libfabric provider does not support device buffers. Continuing with device to host copies.\n",
809+
__FILE__, __LINE__);
810+
}
811+
if (true == ompi_mtl_ofi.disable_hmem) {
812+
opal_output_verbose(50, opal_common_ofi.output,
813+
"%s:%d: Support for device buffers disabled by MCA parameter. Continuing with device to host copies.\n",
814+
__FILE__, __LINE__);
815+
}
798816
} else {
799817
*accelerator_support = true;
800818
ompi_mtl_ofi.hmem_needs_reg = true;

ompi/mca/mtl/ofi/mtl_ofi_types.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved
33
*
44
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
5-
* Copyright (c) 2022 Triad National Security, LLC. All rights
6-
* reserved.
5+
* Copyright (c) 2022-2023 Triad National Security, LLC. All rights
6+
* reserved.
77
* $COPYRIGHT$
88
*
99
* Additional copyrights may follow
@@ -59,6 +59,7 @@ typedef struct mca_mtl_ofi_module_t {
5959
int enable_sep; /* MCA to enable/disable SEP feature */
6060
int thread_grouping; /* MCA for thread grouping feature */
6161
int num_ofi_contexts; /* MCA for number of contexts to use */
62+
bool disable_hmem; /* MCA to enable/disable request for FI_HMEM support from provider */
6263

6364
/** Endpoint name length */
6465
size_t epnamelen;

opal/mca/btl/ofi/btl_ofi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ struct mca_btl_ofi_component_t {
169169
size_t max_inject_size;
170170
bool disable_inject;
171171

172+
bool disable_hmem;
173+
172174
/** All BTL OFI modules (1 per tl) */
173175
mca_btl_ofi_module_t *modules[MCA_BTL_OFI_MAX_MODULES];
174176
};

opal/mca/btl/ofi/btl_ofi_component.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,16 @@ static int mca_btl_ofi_component_register(void)
200200
MCA_BASE_VAR_SCOPE_READONLY,
201201
&mca_btl_ofi_component.disable_inject);
202202

203+
mca_btl_ofi_component.disable_hmem = false;
204+
mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
205+
"disable_hmem",
206+
"Disable HMEM usage",
207+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
208+
OPAL_INFO_LVL_5,
209+
MCA_BASE_VAR_SCOPE_READONLY,
210+
&mca_btl_ofi_component.disable_hmem);
211+
212+
203213
/* for now we want this component to lose to the MTL. */
204214
module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 50;
205215

@@ -345,8 +355,10 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init(int *num_btl_modules,
345355

346356
#if defined(FI_HMEM)
347357
/* Request device transfer capabilities, separate from required_caps */
348-
hints.caps |= FI_HMEM;
349-
hints.domain_attr->mr_mode |= FI_MR_HMEM;
358+
if (false == mca_btl_ofi_component.disable_hmem) {
359+
hints.caps |= FI_HMEM;
360+
hints.domain_attr->mr_mode |= FI_MR_HMEM;
361+
}
350362
no_hmem:
351363
#endif
352364

0 commit comments

Comments
 (0)