Skip to content

Commit d2c7d13

Browse files
committed
Correctly retain the datatypes for topological communicators.
I do not recall which issue this is related to, and I was not able to find it on github either, but this patch addresed the number of datatypes to release/retain for collective communications on topological communicators. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent e1b66cf commit d2c7d13

File tree

6 files changed

+47
-36
lines changed

6 files changed

+47
-36
lines changed

ompi/mca/coll/base/coll_base_util.c

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2021 The University of Tennessee and The University
5+
* Copyright (c) 2004-2022 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -244,25 +244,21 @@ int ompi_coll_base_retain_datatypes( ompi_request_t *req, ompi_datatype_t *stype
244244
return OMPI_SUCCESS;
245245
}
246246

247-
static void release_vecs_callback(ompi_coll_base_nbc_request_t *request) {
248-
ompi_communicator_t *comm = request->super.req_mpi_object.comm;
249-
int scount, rcount;
250-
if (OMPI_COMM_IS_TOPO(comm)) {
251-
(void)mca_topo_base_neighbor_count (comm, &rcount, &scount);
252-
} else {
253-
scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm);
254-
}
247+
static void release_vecs_callback(ompi_coll_base_nbc_request_t *request)
248+
{
255249
if (NULL != request->data.refcounted.vecs.stypes) {
256-
for (int i=0; i<scount; i++) {
257-
if (NULL != request->data.refcounted.vecs.stypes[i]) {
250+
for (int i = 0; i < request->data.refcounted.vecs.scount; i++) {
251+
if (NULL != request->data.refcounted.vecs.stypes[i] &&
252+
!ompi_datatype_is_predefined(request->data.refcounted.vecs.stypes[i])) {
258253
OMPI_DATATYPE_RELEASE_NO_NULLIFY(request->data.refcounted.vecs.stypes[i]);
259254
}
260255
}
261256
request->data.refcounted.vecs.stypes = NULL;
262257
}
263258
if (NULL != request->data.refcounted.vecs.rtypes) {
264-
for (int i=0; i<rcount; i++) {
265-
if (NULL != request->data.refcounted.vecs.rtypes[i]) {
259+
for (int i = 0; i < request->data.refcounted.vecs.rcount; i++) {
260+
if (NULL != request->data.refcounted.vecs.rtypes[i] &&
261+
!ompi_datatype_is_predefined(request->data.refcounted.vecs.rtypes[i])) {
266262
OMPI_DATATYPE_RELEASE_NO_NULLIFY(request->data.refcounted.vecs.rtypes[i]);
267263
}
268264
}
@@ -292,35 +288,47 @@ static int free_vecs_callback(struct ompi_request_t **rptr) {
292288
}
293289

294290
int ompi_coll_base_retain_datatypes_w( ompi_request_t *req,
295-
ompi_datatype_t * const stypes[], ompi_datatype_t * const rtypes[]) {
291+
ompi_datatype_t * const stypes[],
292+
ompi_datatype_t * const rtypes[],
293+
bool use_topo)
294+
{
296295
ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req;
297-
bool retain = false;
298296
ompi_communicator_t *comm = request->super.req_mpi_object.comm;
299297
int scount, rcount;
298+
300299
if (REQUEST_COMPLETE(req)) {
301300
return OMPI_SUCCESS;
302301
}
303-
if (OMPI_COMM_IS_TOPO(comm)) {
302+
303+
if (use_topo && OMPI_COMM_IS_TOPO(comm)) {
304304
(void)mca_topo_base_neighbor_count (comm, &rcount, &scount);
305305
} else {
306306
scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm);
307307
}
308308

309-
for (int i=0; i<scount; i++) {
310-
if (NULL != stypes && NULL != stypes[i] && !ompi_datatype_is_predefined(stypes[i])) {
311-
OBJ_RETAIN(stypes[i]);
312-
retain = true;
309+
request->data.refcounted.vecs.scount = 0; /* default value */
310+
if (NULL != stypes) {
311+
for (int i = 0; i < scount; i++) {
312+
if (NULL != stypes[i] && !ompi_datatype_is_predefined(stypes[i])) {
313+
OBJ_RETAIN(stypes[i]);
314+
request->data.refcounted.vecs.scount = i; /* last valid type */
315+
}
313316
}
314317
}
315-
for (int i=0; i<rcount; i++) {
316-
if (NULL != rtypes && NULL != rtypes[i] && !ompi_datatype_is_predefined(rtypes[i])) {
317-
OBJ_RETAIN(rtypes[i]);
318-
retain = true;
318+
request->data.refcounted.vecs.rcount = 0; /* default value */
319+
if (NULL != rtypes) {
320+
for (int i = 0; i < rcount; i++) {
321+
if (NULL != rtypes[i] && !ompi_datatype_is_predefined(rtypes[i])) {
322+
OBJ_RETAIN(rtypes[i]);
323+
request->data.refcounted.vecs.rcount = i; /* last valid type */
324+
}
319325
}
320326
}
321-
if (OPAL_UNLIKELY(retain)) {
327+
if (OPAL_LIKELY(request->data.refcounted.vecs.scount | request->data.refcounted.vecs.rcount) ) {
322328
request->data.refcounted.vecs.stypes = (ompi_datatype_t **) stypes;
323329
request->data.refcounted.vecs.rtypes = (ompi_datatype_t **) rtypes;
330+
request->data.refcounted.vecs.scount = scount;
331+
request->data.refcounted.vecs.rcount = rcount;
324332
if (req->req_persistent) {
325333
request->cb.req_free = req->req_free;
326334
req->req_free = free_vecs_callback;

ompi/mca/coll/base/coll_base_util.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2021 The University of Tennessee and The University
5+
* Copyright (c) 2004-2022 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
@@ -68,6 +68,8 @@ struct ompi_coll_base_nbc_request_t {
6868
struct {
6969
ompi_datatype_t * const *stypes;
7070
ompi_datatype_t * const *rtypes;
71+
int scount;
72+
int rcount;
7173
} vecs;
7274
} refcounted;
7375
void* release_arrays[OMPI_REQ_NB_RELEASE_ARRAYS];
@@ -175,7 +177,7 @@ int ompi_coll_base_retain_op( ompi_request_t *request,
175177
* (will be cast internally).
176178
*/
177179
int ompi_coll_base_retain_datatypes( ompi_request_t *request,
178-
ompi_datatype_t *stype,
180+
ompi_datatype_t *stype,
179181
ompi_datatype_t *rtype);
180182

181183
/**
@@ -185,7 +187,8 @@ int ompi_coll_base_retain_datatypes( ompi_request_t *request,
185187
*/
186188
int ompi_coll_base_retain_datatypes_w( ompi_request_t *request,
187189
ompi_datatype_t * const stypes[],
188-
ompi_datatype_t * const rtypes[]);
190+
ompi_datatype_t * const rtypes[],
191+
bool use_topo);
189192

190193
/* File reading function */
191194
int ompi_coll_base_file_getnext_long(FILE *fptr, int *fileline, long* val);

ompi/mpi/c/alltoallw_init.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2020 The University of Tennessee and The University
6+
* Copyright (c) 2004-2022 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@@ -127,7 +127,7 @@ int MPI_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int sd
127127
rdispls, recvtypes, comm, info, request,
128128
comm->c_coll->coll_alltoallw_init_module);
129129
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
130-
ompi_coll_base_retain_datatypes_w(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtypes, recvtypes);
130+
ompi_coll_base_retain_datatypes_w(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtypes, recvtypes, false);
131131
}
132132
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
133133
}

ompi/mpi/c/ialltoallw.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2020 The University of Tennessee and The University
6+
* Copyright (c) 2004-2022 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@@ -121,7 +121,7 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl
121121
rdispls, recvtypes, comm, request,
122122
comm->c_coll->coll_ialltoallw_module);
123123
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
124-
ompi_coll_base_retain_datatypes_w(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtypes, recvtypes);
124+
ompi_coll_base_retain_datatypes_w(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtypes, recvtypes, false);
125125
}
126126
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
127127
}

ompi/mpi/c/ineighbor_alltoallw.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2020 The University of Tennessee and The University
6+
* Copyright (c) 2004-2022 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@@ -146,7 +146,7 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const M
146146
recvbuf, recvcounts, rdispls, recvtypes, comm, request,
147147
comm->c_coll->coll_ineighbor_alltoallw_module);
148148
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
149-
ompi_coll_base_retain_datatypes_w(*request, sendtypes, recvtypes);
149+
ompi_coll_base_retain_datatypes_w(*request, sendtypes, recvtypes, true);
150150
}
151151
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
152152
}

ompi/mpi/c/neighbor_alltoallw_init.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2020 The University of Tennessee and The University
6+
* Copyright (c) 2004-2022 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@@ -148,7 +148,7 @@ int MPI_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], con
148148
info, request,
149149
comm->c_coll->coll_neighbor_alltoallw_init_module);
150150
if (OPAL_LIKELY(OMPI_SUCCESS == err)) {
151-
ompi_coll_base_retain_datatypes_w(*request, sendtypes, recvtypes);
151+
ompi_coll_base_retain_datatypes_w(*request, sendtypes, recvtypes, true);
152152
}
153153
OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
154154
}

0 commit comments

Comments
 (0)