Skip to content

Commit da5f891

Browse files
authored
Merge pull request #9584 from rwespetal/mtl-ofi-call-p2p-setopt
mtl/ofi: call fi_setopt to state MPI p2p requirements for CUDA
2 parents 8dc4f35 + 6e56ce0 commit da5f891

File tree

2 files changed

+35
-0
lines changed

2 files changed

+35
-0
lines changed

config/opal_check_ofi.m4

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ AC_DEFUN([_OPAL_CHECK_OFI],[
142142
[],
143143
[#include <pmix.h>])
144144

145+
AC_CHECK_DECLS([FI_OPT_FI_HMEM_P2P],
146+
[], [],
147+
[#include <rdma/fi_endpoint.h>])
148+
145149
AC_CHECK_TYPES([struct fi_ops_mem_monitor], [], [],
146150
[#ifdef HAVE_RDMA_FI_EXT_H
147151
#include <rdma/fi_ext.h>

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,37 @@ static int ompi_mtl_ofi_init_regular_ep(struct fi_info * prov, int universe_size
514514
return ret;
515515
}
516516

517+
#if OPAL_CUDA_SUPPORT && HAVE_DECL_FI_OPT_FI_HMEM_P2P
518+
/*
519+
* Set the FI_HMEM peer to peer option to ENABLED. This notifies Libfabric
520+
* that the provider can decide whether to use device peer to peer support
521+
* for network transfers, and allows copies if p2p is not supported.
522+
*
523+
* Note that this option may not be supported by the provider, so continue
524+
* if FI_HMEM is supported by the provider but it does not support this
525+
* setopt option. This setopt parameter was introduced in Libfabric 1.14.
526+
*
527+
* The version check is needed as one of the Libfabric setopt handlers
528+
* incorrectly assumed all option values are size_t, which was also fixed
529+
* in 1.14.
530+
*/
531+
int setopt_val = FI_HMEM_P2P_ENABLED;
532+
533+
if (FI_VERSION_GE(fi_version(), FI_VERSION(1, 14))) {
534+
ret = fi_setopt(&ompi_mtl_ofi.sep->fid,
535+
FI_OPT_ENDPOINT, FI_OPT_FI_HMEM_P2P,
536+
&setopt_val, sizeof(setopt_val));
537+
538+
if (!(0 == ret || -FI_ENOPROTOOPT == ret)) {
539+
opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
540+
"fi_setopt",
541+
ompi_process_info.nodename, __FILE__, __LINE__,
542+
fi_strerror(-ret), -ret);
543+
return ret;
544+
}
545+
}
546+
#endif /* OPAL_CUDA_SUPPORT && FI_OPT_FI_HMEM_P2P */
547+
517548
/**
518549
* Create the objects that will be bound to the endpoint.
519550
* The objects include:

0 commit comments

Comments
 (0)