Skip to content

Commit 7629295

Browse files
ggouaillardethjelmn
authored andcommitted
coll/libnbc: fix integer overflow
Use internal pack/unpack subroutines that operate on MPI_Aint instead of int and hence solve some integer overflows. Thanks Clyde Stanfield for reporting this issue. Refs #5383 Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
1 parent 22fa5a8 commit 7629295

File tree

2 files changed

+37
-80
lines changed

2 files changed

+37
-80
lines changed

ompi/mca/coll/libnbc/nbc_ialltoall.c

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
99
* reserved.
1010
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
11-
* Copyright (c) 2014-2017 Research Organization for Information Science
12-
* and Technology (RIST). All rights reserved.
11+
* Copyright (c) 2014-2018 Research Organization for Information Science
12+
* and Technology (RIST). All rights reserved.
1313
* Copyright (c) 2017 IBM Corporation. All rights reserved.
1414
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
1515
* $COPYRIGHT$
@@ -58,7 +58,8 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se
5858
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
5959
struct mca_coll_base_module_2_3_0_t *module, bool persistent)
6060
{
61-
int rank, p, res, datasize;
61+
int rank, p, res;
62+
MPI_Aint datasize;
6263
size_t a2asize, sndsize;
6364
NBC_Schedule *schedule;
6465
MPI_Aint rcvext, sndext;
@@ -118,16 +119,15 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se
118119
return OMPI_ERR_OUT_OF_RESOURCE;
119120
}
120121
} else if (alg == NBC_A2A_DISS) {
121-
/* persistent operation is not supported currently for this algorithm;
122-
* we need to replace PMPI_Pack, PMPI_Unpack, and mempcy */
122+
/* persistent operation is not supported currently for this algorithm */
123123
assert(! persistent);
124124

125125
if(NBC_Type_intrinsic(sendtype)) {
126126
datasize = sndext * sendcount;
127127
} else {
128-
res = PMPI_Pack_size (sendcount, sendtype, comm, &datasize);
128+
res = ompi_datatype_pack_external_size("external32", sendcount, sendtype, &datasize);
129129
if (MPI_SUCCESS != res) {
130-
NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res);
130+
NBC_Error("MPI Error in ompi_datatype_pack_external_size() (%i)", res);
131131
return res;
132132
}
133133
}
@@ -156,23 +156,23 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se
156156
memcpy ((char *) tmpbuf + datasize * (p - rank), sendbuf, datasize * rank);
157157
}
158158
} else {
159-
int pos=0;
159+
MPI_Aint pos=0;
160160

161161
/* non-contiguous - pack */
162-
res = PMPI_Pack ((char *) sendbuf + rank * sendcount * sndext, (p - rank) * sendcount, sendtype, tmpbuf,
163-
(p - rank) * datasize, &pos, comm);
162+
res = ompi_datatype_pack_external ("external32", (char *) sendbuf + (intptr_t)rank * (intptr_t)sendcount * sndext, (intptr_t)(p - rank) * (intptr_t)sendcount, sendtype, tmpbuf,
163+
(intptr_t)(p - rank) * datasize, &pos);
164164
if (OPAL_UNLIKELY(MPI_SUCCESS != res)) {
165-
NBC_Error("MPI Error in PMPI_Pack() (%i)", res);
165+
NBC_Error("MPI Error in ompi_datatype_pack_external() (%i)", res);
166166
free(tmpbuf);
167167
return res;
168168
}
169169

170170
if (rank != 0) {
171171
pos = 0;
172-
res = PMPI_Pack(sendbuf, rank * sendcount, sendtype, (char *) tmpbuf + datasize * (p - rank),
173-
rank * datasize, &pos, comm);
172+
res = ompi_datatype_pack_external("external32", sendbuf, (intptr_t)rank * (intptr_t)sendcount, sendtype, (char *) tmpbuf + datasize * (intptr_t)(p - rank),
173+
rank * datasize, &pos);
174174
if (OPAL_UNLIKELY(MPI_SUCCESS != res)) {
175-
NBC_Error("MPI Error in PMPI_Pack() (%i)", res);
175+
NBC_Error("MPI Error in ompi_datatype_pack_external() (%i)", res);
176176
free(tmpbuf);
177177
return res;
178178
}
@@ -200,8 +200,8 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se
200200

201201
if (!inplace) {
202202
/* copy my data to receive buffer */
203-
rbuf = (char *) recvbuf + rank * recvcount * rcvext;
204-
sbuf = (char *) sendbuf + rank * sendcount * sndext;
203+
rbuf = (char *) recvbuf + (MPI_Aint)rank * (MPI_Aint)recvcount * rcvext;
204+
sbuf = (char *) sendbuf + (MPI_Aint)rank * (MPI_Aint)sendcount * sndext;
205205
res = NBC_Sched_copy (sbuf, false, sendcount, sendtype,
206206
rbuf, false, recvcount, recvtype, schedule, false);
207207
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
@@ -424,13 +424,13 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rc
424424
continue;
425425
}
426426

427-
char *rbuf = (char *) recvbuf + r * recvcount * rcvext;
427+
char *rbuf = (char *) recvbuf + (intptr_t)r * (intptr_t)recvcount * rcvext;
428428
res = NBC_Sched_recv (rbuf, false, recvcount, recvtype, r, schedule, false);
429429
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
430430
return res;
431431
}
432432

433-
char *sbuf = (char *) sendbuf + r * sendcount * sndext;
433+
char *sbuf = (char *) sendbuf + (intptr_t)r * (intptr_t)sendcount * sndext;
434434
res = NBC_Sched_send (sbuf, false, sendcount, sendtype, r, schedule, false);
435435
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
436436
return res;
@@ -443,7 +443,8 @@ static inline int a2a_sched_linear(int rank, int p, MPI_Aint sndext, MPI_Aint rc
443443
static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule,
444444
const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
445445
MPI_Datatype recvtype, MPI_Comm comm, void* tmpbuf) {
446-
int res, speer, rpeer, datasize, offset, virtp;
446+
int res, speer, rpeer, virtp;
447+
MPI_Aint datasize, offset;
447448
char *rbuf, *rtmpbuf, *stmpbuf;
448449

449450
if (p < 2) {
@@ -453,9 +454,9 @@ static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcve
453454
if(NBC_Type_intrinsic(sendtype)) {
454455
datasize = sndext*sendcount;
455456
} else {
456-
res = PMPI_Pack_size(sendcount, sendtype, comm, &datasize);
457+
res = ompi_datatype_pack_external_size("external32", sendcount, sendtype, &datasize);
457458
if (MPI_SUCCESS != res) {
458-
NBC_Error("MPI Error in PMPI_Pack_size() (%i)", res);
459+
NBC_Error("MPI Error in ompi_datatype_pack_external_size() (%i)", res);
459460
return res;
460461
}
461462
}
@@ -540,8 +541,8 @@ static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, voi
540541
for (int i = 1 ; i < (p+1)/2 ; i++) {
541542
int speer = (rank + i) % p;
542543
int rpeer = (rank + p - i) % p;
543-
char *sbuf = (char *) buf + speer * count * ext;
544-
char *rbuf = (char *) buf + rpeer * count * ext;
544+
char *sbuf = (char *) buf + (intptr_t)speer * (intptr_t)count * ext;
545+
char *rbuf = (char *) buf + (intptr_t)rpeer * (intptr_t)count * ext;
545546

546547
res = NBC_Sched_copy (rbuf, false, count, type,
547548
(void *)(-gap), true, count, type,
@@ -570,7 +571,7 @@ static inline int a2a_sched_inplace(int rank, int p, NBC_Schedule* schedule, voi
570571
if (0 == (p%2)) {
571572
int peer = (rank + p/2) % p;
572573

573-
char *tbuf = (char *) buf + peer * count * ext;
574+
char *tbuf = (char *) buf + (intptr_t)peer * (intptr_t)count * ext;
574575
res = NBC_Sched_copy (tbuf, false, count, type,
575576
(void *)(-gap), true, count, type,
576577
schedule, true);

ompi/mca/coll/libnbc/nbc_internal.h

Lines changed: 12 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
*
1111
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
1212
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
13-
* Copyright (c) 2015-2017 Research Organization for Information Science
14-
* and Technology (RIST). All rights reserved.
13+
* Copyright (c) 2015-2018 Research Organization for Information Science
14+
* and Technology (RIST). All rights reserved.
1515
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
1616
* reserved.
1717
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
@@ -500,60 +500,20 @@ static inline int NBC_Type_intrinsic(MPI_Datatype type) {
500500

501501
/* let's give a try to inline functions */
502502
static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) {
503-
int size, pos, res;
504-
void *packbuf;
503+
int res;
505504

506-
#if OPAL_CUDA_SUPPORT
507-
if((srctype == tgttype) && NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
508-
#else
509-
if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
510-
#endif /* OPAL_CUDA_SUPPORT */
511-
/* if we have the same types and they are contiguous (intrinsic
512-
* types are contiguous), we can just use a single memcpy */
513-
ptrdiff_t gap, span;
514-
span = opal_datatype_span(&srctype->super, srccount, &gap);
515-
516-
memcpy(tgt, src, span);
517-
} else {
518-
/* we have to pack and unpack */
519-
res = PMPI_Pack_size(srccount, srctype, comm, &size);
520-
if (MPI_SUCCESS != res) {
521-
NBC_Error ("MPI Error in PMPI_Pack_size() (%i:%i)", res, size);
522-
return res;
523-
}
524-
525-
if (0 == size) {
526-
return OMPI_SUCCESS;
527-
}
528-
packbuf = malloc(size);
529-
if (NULL == packbuf) {
530-
NBC_Error("Error in malloc()");
531-
return res;
532-
}
533-
534-
pos=0;
535-
res = PMPI_Pack(src, srccount, srctype, packbuf, size, &pos, comm);
536-
537-
if (MPI_SUCCESS != res) {
538-
NBC_Error ("MPI Error in PMPI_Pack() (%i)", res);
539-
free (packbuf);
540-
return res;
541-
}
542-
543-
pos=0;
544-
res = PMPI_Unpack(packbuf, size, &pos, tgt, tgtcount, tgttype, comm);
545-
free(packbuf);
546-
if (MPI_SUCCESS != res) {
547-
NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res);
548-
return res;
549-
}
505+
res = ompi_datatype_sndrcv(src, srccount, srctype, tgt, tgtcount, tgttype);
506+
if (OMPI_SUCCESS != res) {
507+
NBC_Error ("MPI Error in ompi_datatype_sndrcv() (%i)", res);
508+
return res;
550509
}
551510

552511
return OMPI_SUCCESS;
553512
}
554513

555514
static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void *tgt, MPI_Comm comm) {
556-
int size, pos, res;
515+
MPI_Aint size, pos;
516+
int res;
557517
ptrdiff_t ext, lb;
558518

559519
#if OPAL_CUDA_SUPPORT
@@ -563,6 +523,7 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
563523
#endif /* OPAL_CUDA_SUPPORT */
564524
/* if we have the same types and they are contiguous (intrinsic
565525
* types are contiguous), we can just use a single memcpy */
526+
res = ompi_datatype_pack_external_size("external32", srccount, srctype, &size);
566527
res = ompi_datatype_get_extent (srctype, &lb, &ext);
567528
if (OMPI_SUCCESS != res) {
568529
NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res);
@@ -573,15 +534,10 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
573534

574535
} else {
575536
/* we have to unpack */
576-
res = PMPI_Pack_size(srccount, srctype, comm, &size);
577-
if (MPI_SUCCESS != res) {
578-
NBC_Error ("MPI Error in PMPI_Pack_size() (%i)", res);
579-
return res;
580-
}
581537
pos = 0;
582-
res = PMPI_Unpack(src, size, &pos, tgt, srccount, srctype, comm);
538+
res = ompi_datatype_unpack_external("external32", src, size, &pos, tgt, srccount, srctype);
583539
if (MPI_SUCCESS != res) {
584-
NBC_Error ("MPI Error in PMPI_Unpack() (%i)", res);
540+
NBC_Error ("MPI Error in ompi_datatype_unpack_external() (%i)", res);
585541
return res;
586542
}
587543
}

0 commit comments

Comments
 (0)