Skip to content

Commit dbc5675

Browse files
authored
Merge pull request #7802 from badgerious/mtl_ofi_cqread_break
mtl/ofi: break from progress loop when events are read
2 parents a7ed13d + 35dbc18 commit dbc5675

File tree

1 file changed

+47
-53
lines changed

1 file changed

+47
-53
lines changed

ompi/mca/mtl/ofi/mtl_ofi.h

Lines changed: 47 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -109,64 +109,58 @@ ompi_mtl_ofi_context_progress(int ctxt_id)
109109
* From the completion's op_context, we get the associated OFI request.
110110
* Call the request's callback.
111111
*/
112-
while (true) {
113-
ret = fi_cq_read(ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, (void *)&wc,
114-
ompi_mtl_ofi.ofi_progress_event_count);
115-
if (ret > 0) {
116-
count+= ret;
117-
events_read = ret;
118-
for (i = 0; i < events_read; i++) {
119-
if (NULL != wc[i].op_context) {
120-
ofi_req = TO_OFI_REQ(wc[i].op_context);
121-
assert(ofi_req);
122-
ret = ofi_req->event_callback(&wc[i], ofi_req);
123-
if (OMPI_SUCCESS != ret) {
124-
opal_output(0, "%s:%d: Error returned by request event callback: %zd.\n"
125-
"*** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n",
126-
__FILE__, __LINE__, ret);
127-
fflush(stderr);
128-
exit(1);
129-
}
130-
}
131-
}
132-
} else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) {
133-
/**
134-
* An error occured and is being reported via the CQ.
135-
* Read the error and forward it to the upper layer.
136-
*/
137-
ret = fi_cq_readerr(ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq,
138-
&error,
139-
0);
140-
if (0 > ret) {
141-
opal_output(0, "%s:%d: Error returned from fi_cq_readerr: %s(%zd).\n"
142-
"*** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n",
143-
__FILE__, __LINE__, fi_strerror(-ret), ret);
144-
fflush(stderr);
145-
exit(1);
146-
}
147-
148-
assert(error.op_context);
149-
ofi_req = TO_OFI_REQ(error.op_context);
150-
assert(ofi_req);
151-
ret = ofi_req->error_callback(&error, ofi_req);
152-
if (OMPI_SUCCESS != ret) {
153-
opal_output(0, "%s:%d: Error returned by request error callback: %zd.\n"
112+
ret = fi_cq_read(ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq, (void *)&wc,
113+
ompi_mtl_ofi.ofi_progress_event_count);
114+
if (ret > 0) {
115+
count+= ret;
116+
events_read = ret;
117+
for (i = 0; i < events_read; i++) {
118+
if (NULL != wc[i].op_context) {
119+
ofi_req = TO_OFI_REQ(wc[i].op_context);
120+
assert(ofi_req);
121+
ret = ofi_req->event_callback(&wc[i], ofi_req);
122+
if (OMPI_SUCCESS != ret) {
123+
opal_output(0, "%s:%d: Error returned by request event callback: %zd.\n"
154124
"*** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n",
155125
__FILE__, __LINE__, ret);
156-
fflush(stderr);
157-
exit(1);
126+
fflush(stderr);
127+
exit(1);
128+
}
158129
}
159-
} else {
160-
if (ret == -FI_EAGAIN || ret == -EINTR) {
161-
break;
162-
} else {
163-
opal_output(0, "%s:%d: Error returned from fi_cq_read: %s(%zd).\n"
130+
}
131+
} else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) {
132+
/**
133+
* An error occured and is being reported via the CQ.
134+
* Read the error and forward it to the upper layer.
135+
*/
136+
ret = fi_cq_readerr(ompi_mtl_ofi.ofi_ctxt[ctxt_id].cq,
137+
&error,
138+
0);
139+
if (0 > ret) {
140+
opal_output(0, "%s:%d: Error returned from fi_cq_readerr: %s(%zd).\n"
141+
"*** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n",
142+
__FILE__, __LINE__, fi_strerror(-ret), ret);
143+
fflush(stderr);
144+
exit(1);
145+
}
146+
147+
assert(error.op_context);
148+
ofi_req = TO_OFI_REQ(error.op_context);
149+
assert(ofi_req);
150+
ret = ofi_req->error_callback(&error, ofi_req);
151+
if (OMPI_SUCCESS != ret) {
152+
opal_output(0, "%s:%d: Error returned by request error callback: %zd.\n"
164153
"*** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n",
165-
__FILE__, __LINE__, fi_strerror(-ret), ret);
166-
fflush(stderr);
167-
exit(1);
168-
}
154+
__FILE__, __LINE__, ret);
155+
fflush(stderr);
156+
exit(1);
169157
}
158+
} else if (ret != -FI_EAGAIN && ret != -EINTR) {
159+
opal_output(0, "%s:%d: Error returned from fi_cq_read: %s(%zd).\n"
160+
"*** The Open MPI OFI MTL is aborting the MPI job (via exit(3)).\n",
161+
__FILE__, __LINE__, fi_strerror(-ret), ret);
162+
fflush(stderr);
163+
exit(1);
170164
}
171165

172166
return count;

0 commit comments

Comments
 (0)