Skip to content

Commit 6e56ce0

Browse files
committed
mtl/ofi: call fi_setopt to state MPI p2p requirements for CUDA
Call fi_setopt after endpoint initialization to tell Libfabric that while device peer to peer support should be enabled when possible for network transfers, it is not required and copies may be used instead. An MCA parameter needs to be added for this in the future so that users can toggle this option. Signed-off-by: Robert Wespetal <wesper@amazon.com>
1 parent 4f6127e commit 6e56ce0

File tree

2 files changed

+35
-0
lines changed

2 files changed

+35
-0
lines changed

config/opal_check_ofi.m4

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ AC_DEFUN([_OPAL_CHECK_OFI],[
142142
[],
143143
[#include <pmix.h>])
144144

145+
AC_CHECK_DECLS([FI_OPT_FI_HMEM_P2P],
146+
[], [],
147+
[#include <rdma/fi_endpoint.h>])
148+
145149
AC_CHECK_TYPES([struct fi_ops_mem_monitor], [], [],
146150
[#ifdef HAVE_RDMA_FI_EXT_H
147151
#include <rdma/fi_ext.h>

ompi/mca/mtl/ofi/mtl_ofi_component.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,37 @@ static int ompi_mtl_ofi_init_regular_ep(struct fi_info * prov, int universe_size
514514
return ret;
515515
}
516516

517+
#if OPAL_CUDA_SUPPORT && HAVE_DECL_FI_OPT_FI_HMEM_P2P
518+
/*
519+
* Set the FI_HMEM peer to peer option to ENABLED. This notifies Libfabric
520+
* that the provider can decide whether to use device peer to peer support
521+
* for network transfers, and allows copies if p2p is not supported.
522+
*
523+
* Note that this option may not be supported by the provider, so continue
524+
* if FI_HMEM is supported by the provider but it does not support this
525+
* setopt option. This setopt parameter was introduced in Libfabric 1.14.
526+
*
527+
* The version check is needed as one of the Libfabric setopt handlers
528+
* incorrectly assumed all option values are size_t, which was also fixed
529+
* in 1.14.
530+
*/
531+
int setopt_val = FI_HMEM_P2P_ENABLED;
532+
533+
if (FI_VERSION_GE(fi_version(), FI_VERSION(1, 14))) {
534+
ret = fi_setopt(&ompi_mtl_ofi.sep->fid,
535+
FI_OPT_ENDPOINT, FI_OPT_FI_HMEM_P2P,
536+
&setopt_val, sizeof(setopt_val));
537+
538+
if (!(0 == ret || -FI_ENOPROTOOPT == ret)) {
539+
opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
540+
"fi_setopt",
541+
ompi_process_info.nodename, __FILE__, __LINE__,
542+
fi_strerror(-ret), -ret);
543+
return ret;
544+
}
545+
}
546+
#endif /* OPAL_CUDA_SUPPORT && FI_OPT_FI_HMEM_P2P */
547+
517548
/**
518549
* Create the objects that will be bound to the endpoint.
519550
* The objects include:

0 commit comments

Comments
 (0)