Skip to content

Commit c4d3685

Browse files
authored
Merge pull request #7228 from devreal/progress-returns
Harmonize return values of progress callbacks
2 parents 806b351 + 72501f8 commit c4d3685

File tree

11 files changed

+46
-29
lines changed

11 files changed

+46
-29
lines changed

ompi/communicator/comm_request.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ static int ompi_comm_request_progress (void)
100100
{
101101
ompi_comm_request_t *request, *next;
102102
static opal_atomic_int32_t progressing = 0;
103+
int completed = 0;
103104

104105
/* don't allow re-entry */
105106
if (opal_atomic_swap_32 (&progressing, 1)) {
@@ -126,6 +127,7 @@ static int ompi_comm_request_progress (void)
126127
}
127128
ompi_request_free (&subreq);
128129
request_item->subreq_count--;
130+
completed++;
129131
} else {
130132
item_complete = false;
131133
break;
@@ -163,7 +165,7 @@ static int ompi_comm_request_progress (void)
163165
opal_mutex_unlock (&ompi_comm_request_mutex);
164166
progressing = 0;
165167

166-
return 1;
168+
return completed;
167169
}
168170

169171
void ompi_comm_request_start (ompi_comm_request_t *request)

ompi/mca/coll/libnbc/coll_libnbc_component.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,7 @@ ompi_coll_libnbc_progress(void)
427427
{
428428
ompi_coll_libnbc_request_t* request, *next;
429429
int res;
430+
int completed = 0;
430431

431432
if (0 == opal_list_get_size (&mca_coll_libnbc_component.active_requests)) {
432433
/* no requests -- nothing to do. do not grab a lock */
@@ -464,14 +465,15 @@ ompi_coll_libnbc_progress(void)
464465
if(!request->super.super.req_persistent || !REQUEST_COMPLETE(&request->super.super)) {
465466
ompi_request_complete(&request->super.super, true);
466467
}
468+
completed++;
467469
}
468470
OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock);
469471
}
470472
libnbc_in_progress = false;
471473
}
472474
OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock);
473475

474-
return 0;
476+
return completed;
475477
}
476478

477479

ompi/mca/mtl/psm2/mtl_psm2.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ int ompi_mtl_psm2_progress( void ) {
403403
mca_mtl_psm2_request_t* mtl_psm2_request;
404404
psm2_mq_status2_t psm2_status;
405405
psm2_mq_req_t req;
406-
int completed = 1;
406+
int completed = 0;
407407

408408
do {
409409
OPAL_THREAD_LOCK(&mtl_psm2_mq_mutex);
@@ -469,5 +469,5 @@ int ompi_mtl_psm2_progress( void ) {
469469
opal_show_help("help-mtl-psm2.txt",
470470
"error polling network", true,
471471
psm2_error_get_string(err));
472-
return 1;
472+
return OMPI_ERROR;
473473
}

ompi/mca/osc/pt2pt/osc_pt2pt_component.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ static int component_register (void)
153153

154154
static int component_progress (void)
155155
{
156+
int completed = 0;
156157
int pending_count = opal_list_get_size (&mca_osc_pt2pt_component.pending_operations);
157158
int recv_count = opal_list_get_size (&mca_osc_pt2pt_component.pending_receives);
158159
ompi_osc_pt2pt_pending_t *pending, *next;
@@ -167,6 +168,7 @@ static int component_progress (void)
167168
}
168169

169170
(void) ompi_osc_pt2pt_process_receive (recv);
171+
completed++;
170172
}
171173
}
172174

@@ -194,12 +196,13 @@ static int component_progress (void)
194196
if (OMPI_SUCCESS == ret) {
195197
opal_list_remove_item (&mca_osc_pt2pt_component.pending_operations, &pending->super);
196198
OBJ_RELEASE(pending);
199+
completed++;
197200
}
198201
}
199202
OPAL_THREAD_UNLOCK(&mca_osc_pt2pt_component.pending_operations_lock);
200203
}
201204

202-
return 1;
205+
return completed;
203206
}
204207

205208
static int

ompi/mca/pml/ucx/pml_ucx.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -515,8 +515,7 @@ int mca_pml_ucx_enable(bool enable)
515515

516516
int mca_pml_ucx_progress(void)
517517
{
518-
ucp_worker_progress(ompi_pml_ucx.ucp_worker);
519-
return OMPI_SUCCESS;
518+
return ucp_worker_progress(ompi_pml_ucx.ucp_worker);
520519
}
521520

522521
int mca_pml_ucx_add_comm(struct ompi_communicator_t* comm)

ompi/request/grequest.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,13 @@ int ompi_grequest_invoke_query(ompi_request_t *request,
202202
int rc = OMPI_SUCCESS;
203203
ompi_grequest_t *g = (ompi_grequest_t*) request;
204204

205-
/* MPI-2:8.2 does not say what to do with the return value from
206-
the query function (i.e., the int return value from the C
207-
function or the ierr argument from the Fortran function).
208-
Making the command decision here to ignore it. If the handler
209-
wants to pass an error back, it should set it in the MPI_ERROR
210-
field in the status (which is always kept, regardless if the
211-
top-level function was invoked with MPI_STATUS[ES]_IGNORE or
212-
not). */
205+
/* MPI-3 mandates that the return value from the query function
206+
* (i.e., the int return value from the C function or the ierr
207+
* argument from the Fortran function) must be returned to the
208+
* user. Thus, if the return of the query function is not MPI_SUCCESS
209+
* we will update the MPI_ERROR field. Otherwise, the MPI_ERROR
210+
* field is untouched (or left to the discretion of the query function).
211+
*/
213212
if (NULL != g->greq_query.c_query) {
214213
if (g->greq_funcs_are_c) {
215214
rc = g->greq_query.c_query(g->greq_state, status);
@@ -221,7 +220,9 @@ int ompi_grequest_invoke_query(ompi_request_t *request,
221220
rc = OMPI_FINT_2_INT(ierr);
222221
}
223222
}
224-
223+
if( MPI_SUCCESS != rc ) {
224+
status->MPI_ERROR = rc;
225+
}
225226
return rc;
226227
}
227228

ompi/request/grequestx.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ static opal_mutex_t lock;
3434

3535
static int grequestx_progress(void) {
3636
ompi_grequest_t *request, *next;
37+
int completed = 0;
3738

3839
OPAL_THREAD_LOCK(&lock);
3940
if (!in_progress) {
@@ -43,18 +44,17 @@ static int grequestx_progress(void) {
4344
MPI_Status status;
4445
OPAL_THREAD_UNLOCK(&lock);
4546
request->greq_poll.c_poll(request->greq_state, &status);
47+
OPAL_THREAD_LOCK(&lock);
4648
if (REQUEST_COMPLETE(&request->greq_base)) {
47-
OPAL_THREAD_LOCK(&lock);
4849
opal_list_remove_item(&requests, &request->greq_base.super.super);
49-
OPAL_THREAD_UNLOCK(&lock);
50+
completed++;
5051
}
51-
OPAL_THREAD_LOCK(&lock);
5252
}
5353
in_progress = false;
5454
}
5555
OPAL_THREAD_UNLOCK(&lock);
5656

57-
return OMPI_SUCCESS;
57+
return completed;
5858
}
5959

6060
int ompi_grequestx_start(

opal/mca/btl/uct/btl_uct_component.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ static int mca_btl_uct_tl_progress (mca_btl_uct_tl_t *tl, int starting_index)
564564
static int mca_btl_uct_component_progress_pending (mca_btl_uct_module_t *uct_btl)
565565
{
566566
mca_btl_uct_base_frag_t *frag, *next;
567+
int completed = 0;
567568
size_t count;
568569

569570
if (0 == (count = opal_list_get_size (&uct_btl->pending_frags))) {
@@ -580,11 +581,13 @@ static int mca_btl_uct_component_progress_pending (mca_btl_uct_module_t *uct_btl
580581

581582
if (OPAL_SUCCESS > mca_btl_uct_send_frag (uct_btl, frag, false)) {
582583
opal_list_prepend (&uct_btl->pending_frags, (opal_list_item_t *) frag);
584+
} else {
585+
completed++;
583586
}
584587
}
585588
OPAL_THREAD_UNLOCK(&uct_btl->lock);
586589

587-
return OPAL_SUCCESS;
590+
return completed;
588591
}
589592

590593
/**

opal/mca/common/ucx/common_ucx_wpool.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,11 @@ void opal_common_ucx_wpool_finalize(opal_common_ucx_wpool_t *wpool)
279279
return;
280280
}
281281

282-
OPAL_DECLSPEC void
282+
OPAL_DECLSPEC int
283283
opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool)
284284
{
285285
_winfo_list_item_t *item = NULL, *next = NULL;
286+
int completed = 0, progressed = 0;
286287

287288
/* Go over all active workers and progress them
288289
* TODO: may want to have some partitioning to progress only part of
@@ -297,14 +298,19 @@ opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool)
297298
opal_list_remove_item(&wpool->active_workers, &item->super);
298299
_winfo_reset(winfo);
299300
opal_list_append(&wpool->idle_workers, &item->super);
301+
completed++;
300302
} else {
301303
/* Progress worker until there are existing events */
302-
while(ucp_worker_progress(winfo->worker));
304+
do {
305+
progressed = ucp_worker_progress(winfo->worker);
306+
completed += progressed;
307+
} while (progressed);
303308
}
304309
opal_mutex_unlock(&winfo->mutex);
305310
}
306311
opal_mutex_unlock(&wpool->mutex);
307312
}
313+
return completed;
308314
}
309315

310316
static int

opal/mca/common/ucx/common_ucx_wpool.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ OPAL_DECLSPEC void opal_common_ucx_wpool_free(opal_common_ucx_wpool_t *wpool);
165165
OPAL_DECLSPEC int opal_common_ucx_wpool_init(opal_common_ucx_wpool_t *wpool,
166166
int proc_world_size, bool enable_mt);
167167
OPAL_DECLSPEC void opal_common_ucx_wpool_finalize(opal_common_ucx_wpool_t *wpool);
168-
OPAL_DECLSPEC void opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool);
168+
OPAL_DECLSPEC int opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool);
169169

170170
/* Manage Communication context */
171171
OPAL_DECLSPEC int opal_common_ucx_wpctx_create(opal_common_ucx_wpool_t *wpool, int comm_size,

0 commit comments

Comments
 (0)