Skip to content

Commit 7a593ce

Browse files
authored
Merge pull request #6361 from aravindksg/fix_tg_segfault
mtl/ofi: Fix segfault when not using Thread-Grouping feature
2 parents c570966 + 6edcc47 commit 7a593ce

File tree

2 files changed

+47
-29
lines changed

2 files changed

+47
-29
lines changed

ompi/mca/mtl/ofi/README

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ by reducing the bits available for the communicator ID field in the OFI tag.
7272

7373
SCALABLE ENDPOINTS:
7474
-------------------
75-
OFI MTL supports OFI Scalable Endpoints feature as a means to improve
75+
OFI MTL supports OFI Scalable Endpoints (SEP) feature as a means to improve
7676
multi-threaded application throughput and message rate. Currently the feature
7777
is designed to utilize multiple TX/RX contexts exposed by the OFI provider in
7878
conjunction with a multi-communicator MPI application model. Therefore, new OFI
@@ -81,12 +81,13 @@ instead of creating them all at once during init time and this approach also
8181
favours only creating as many contexts as needed.
8282

8383
1. Multi-communicator model:
84-
With this approach, the application first duplicates the communicators it
85-
wants to use with MPI operations (ideally creating as many communicators as
86-
the number of threads it wants to use to call into MPI). The duplicated
87-
communicators are then used by the corresponding threads to perform MPI
88-
operations. A possible usage scenario could be in an MPI + OMP
89-
application as follows (example limited to 2 ranks):
84+
With this approach, the MPI application is requried to first duplicate
85+
the communicators it wants to use with MPI operations (ideally creating
86+
as many communicators as the number of threads it wants to use to call
87+
into MPI). The duplicated communicators are then used by the
88+
corresponding threads to perform MPI operations. A possible usage
89+
scenario could be in an MPI + OMP application as follows
90+
(example limited to 2 ranks):
9091

9192
MPI_Comm dup_comm[n];
9293
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
@@ -112,21 +113,30 @@ favours only creating as many contexts as needed.
112113
}
113114

114115
2. MCA variables:
115-
To utilize the feature, the following MCA variable needs to be set:
116+
To utilize the feature, the following MCA variables need to be set:
116117
mtl_ofi_enable_sep:
117-
This MCA variable needs to be set to enable the use of Scalable Endpoints
118+
This MCA variable needs to be set to enable the use of Scalable Endpoints (SEP)
118119
feature in the OFI MTL. The underlying provider is also checked to ensure the
119120
feature is supported. If the provider chosen does not support it, user needs
120-
to either set this variable to 0 or select different provider which supports
121+
to either set this variable to 0 or select a different provider which supports
121122
the feature.
123+
For single-threaded applications one OFI context is sufficient, so OFI SEPs
124+
may not add benefit.
125+
Note that mtl_ofi_thread_grouping (see below) needs to be enabled to use the
126+
different OFI SEP contexts. Otherwise, only one context (ctxt 0) will be used.
122127

123128
Default: 0
124129

125130
Command-line syntax:
126131
"-mca mtl_ofi_enable_sep 1"
127132

128133
mtl_ofi_thread_grouping:
129-
This MCA variable needs to be set to switch Thread Grouping feature on.
134+
Turn Thread Grouping feature on. This is needed to use the Multi-communicator
135+
model explained above. This means that the OFI MTL will use the communicator
136+
ID to decide the SEP contexts to be used by the thread. In this way, each
137+
thread will have direct access to different OFI resources. If disabled,
138+
only context 0 will be used.
139+
Requires mtl_ofi_enable_sep to be set to 1.
130140

131141
Default: 0
132142

@@ -139,11 +149,11 @@ To utilize the feature, the following MCA variable needs to be set:
139149
"-mca mtl_ofi_thread_grouping 1"
140150

141151
mtl_ofi_num_ctxts:
142-
MCA variable allows user to set the number of OFI contexts the applications
143-
expects to use. For multi-threaded applications using Thread Grouping
144-
feature, this number should be set to the number of user threads that will
145-
call into MPI. For single-threaded applications one OFI context is
146-
sufficient.
152+
This MCA variable allows user to set the number of OFI SEP contexts the
153+
application expects to use. For multi-threaded applications using Thread
154+
Grouping feature, this number should be set to the number of user threads
155+
that will call into MPI. This variable will only have effect if
156+
mtl_ofi_enable_sep is set to 1.
147157

148158
Default: 1
149159

ompi/mca/mtl/ofi/mtl_ofi.h

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,18 @@ ompi_mtl_ofi_isend_callback(struct fi_cq_tagged_entry *wc,
325325
return OMPI_SUCCESS;
326326
}
327327

328-
#define MTL_OFI_MAP_COMM_TO_CONTEXT(comm_id, ctxt_id) \
329-
do { \
330-
ctxt_id = ompi_mtl_ofi.comm_to_context[comm_id]; \
331-
} while (0);
328+
/* Return OFI context ID associated with the specific communicator */
329+
__opal_attribute_always_inline__ static inline int
330+
ompi_mtl_ofi_map_comm_to_ctxt(uint32_t comm_id)
331+
{
332+
/* For non-thread-grouping use case, only one context is used which is
333+
* associated to MPI_COMM_WORLD, so use that. */
334+
if (0 == ompi_mtl_ofi.thread_grouping) {
335+
comm_id = 0;
336+
}
337+
338+
return ompi_mtl_ofi.comm_to_context[comm_id];
339+
}
332340

333341
__opal_attribute_always_inline__ static inline int
334342
ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req,
@@ -342,7 +350,7 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req,
342350
ssize_t ret = OMPI_SUCCESS;
343351
int ctxt_id = 0;
344352

345-
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
353+
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
346354
set_thread_context(ctxt_id);
347355

348356
ack_req = malloc(sizeof(ompi_mtl_ofi_request_t));
@@ -397,7 +405,7 @@ ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl,
397405
fi_addr_t src_addr = 0;
398406
fi_addr_t sep_peer_fiaddr = 0;
399407

400-
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
408+
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
401409
set_thread_context(ctxt_id);
402410

403411
/**
@@ -532,7 +540,7 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl,
532540
ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */
533541
fi_addr_t sep_peer_fiaddr = 0;
534542

535-
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
543+
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
536544
set_thread_context(ctxt_id);
537545

538546
ofi_req->event_callback = ompi_mtl_ofi_isend_callback;
@@ -617,7 +625,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
617625
ompi_status_public_t *status = NULL;
618626
struct fi_msg_tagged tagged_msg;
619627

620-
MTL_OFI_MAP_COMM_TO_CONTEXT(ofi_req->comm->c_contextid, ctxt_id);
628+
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid);
621629

622630
assert(ofi_req->super.ompi_req);
623631
status = &ofi_req->super.ompi_req->req_status;
@@ -758,7 +766,7 @@ ompi_mtl_ofi_irecv_generic(struct mca_mtl_base_module_t *mtl,
758766
size_t length;
759767
bool free_after;
760768

761-
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
769+
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
762770
set_thread_context(ctxt_id);
763771

764772
if (ofi_cq_data) {
@@ -884,7 +892,7 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl,
884892
uint64_t msgflags = FI_CLAIM | FI_COMPLETION;
885893
struct ompi_communicator_t *comm = (*message)->comm;
886894

887-
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
895+
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
888896
set_thread_context(ctxt_id);
889897

890898
ompi_ret = ompi_mtl_datatype_recv_buf(convertor,
@@ -977,7 +985,7 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
977985
uint64_t msgflags = FI_PEEK | FI_COMPLETION;
978986
int ctxt_id = 0;
979987

980-
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
988+
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
981989
set_thread_context(ctxt_id);
982990

983991
if (ofi_cq_data) {
@@ -1066,7 +1074,7 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
10661074
uint64_t msgflags = FI_PEEK | FI_CLAIM | FI_COMPLETION;
10671075
int ctxt_id = 0;
10681076

1069-
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
1077+
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
10701078
set_thread_context(ctxt_id);
10711079

10721080
ofi_req = malloc(sizeof *ofi_req);
@@ -1168,7 +1176,7 @@ ompi_mtl_ofi_cancel(struct mca_mtl_base_module_t *mtl,
11681176
int ret, ctxt_id = 0;
11691177
ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t*) mtl_request;
11701178

1171-
MTL_OFI_MAP_COMM_TO_CONTEXT(ofi_req->comm->c_contextid, ctxt_id);
1179+
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid);
11721180

11731181
switch (ofi_req->type) {
11741182
case OMPI_MTL_OFI_SEND:

0 commit comments

Comments
 (0)