Skip to content

Commit 3c844c5

Browse files
authored
Merge pull request #8007 from markalle/mpich_vector
Improve predefined pack/unpack performance using mpich methods.
2 parents 7948278 + 3063916 commit 3c844c5

File tree

9 files changed

+864
-56
lines changed

9 files changed

+864
-56
lines changed

LICENSE

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ Copyright (c) 2017-2020 Amazon.com, Inc. or its affiliates. All Rights
5858
Copyright (c) 2018 DataDirect Networks. All rights reserved.
5959
Copyright (c) 2018-2020 Triad National Security, LLC. All rights reserved.
6060
Copyright (c) 2020 Google, LLC. All rights reserved.
61+
Copyright (c) 2002 University of Chicago
62+
Copyright (c) 2001 Argonne National Laboratory
6163

6264
$COPYRIGHT$
6365

@@ -99,3 +101,42 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
99101
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
100102
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
101103
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
104+
105+
----------------[Copyright from inclusion of MPICH code]----------------
106+
107+
The following is a notice of limited availability of the code, and disclaimer
108+
which must be included in the prologue of the code and in all source listings
109+
of the code.
110+
111+
Copyright Notice
112+
+ 2002 University of Chicago
113+
114+
Permission is hereby granted to use, reproduce, prepare derivative works, and
115+
to redistribute to others. This software was authored by:
116+
117+
Mathematics and Computer Science Division
118+
Argonne National Laboratory, Argonne IL 60439
119+
120+
(and)
121+
122+
Department of Computer Science
123+
University of Illinois at Urbana-Champaign
124+
125+
126+
GOVERNMENT LICENSE
127+
128+
Portions of this material resulted from work developed under a U.S.
129+
Government Contract and are subject to the following license: the Government
130+
is granted for itself and others acting on its behalf a paid-up, nonexclusive,
131+
irrevocable worldwide license in this computer software to reproduce, prepare
132+
derivative works, and perform publicly and display publicly.
133+
134+
DISCLAIMER
135+
136+
This computer code material was prepared, in part, as an account of work
137+
sponsored by an agency of the United States Government. Neither the United
138+
States, nor the University of Chicago, nor any of their employees, makes any
139+
warranty express or implied, or assumes any legal liability or responsibility
140+
for the accuracy, completeness, or usefulness of any information, apparatus,
141+
product, or process disclosed, or represents that its use would not infringe
142+
privately owned rights.

ompi/datatype/ompi_datatype.h

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
* Copyright (c) 2015-2020 Research Organization for Information Science
1111
* and Technology (RIST). All rights reserved.
1212
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
13+
* Copyright (c) 2021 IBM Corporation. All rights reserved.
1314
* $COPYRIGHT$
1415
*
1516
* Additional copyrights may follow
@@ -418,5 +419,94 @@ OMPI_DECLSPEC int ompi_datatype_pack_external_size( const char datarep[], int in
418419
} \
419420
}
420421

422+
/*
423+
* Sometimes it's faster to operate on a (count,datatype) pair if it's
424+
* converted to (1,larger_datatype). This comes up in pack/unpack if
425+
* the datatype is [int4b,empty4b] for example. With that datatype the
426+
* (count,datatype) path has to loop over the count processing each
427+
* occurrance of the datatype, but a larger type created via
428+
* MPI_Type_contiguous(count,datatype,) will have a single description
429+
* entry describing the whole vector and go through pack/unpack much
430+
* faster.
431+
*
432+
* These functions convert an incoming (count,dt) if the performance
433+
* is potentially better.
434+
*
435+
* Note this function is only likely to be useful if the (count,datatype)
436+
* describes a simple evenly spaced vector that will boil down to a
437+
* single description element, but I don't think it's cheap to traverse
438+
* the incoming datatype to check if that will be the case. Eg I'm not
439+
* sure it would be cheap enough to check that
440+
* [int,int,space,int,int,space] is going to convert nicely, vs
441+
* [int,int,space,int,space] which isn't.
442+
* So the only checks performed are that the (count,datatype) isn't
443+
* contiguous, and that the count is large enough to justify the
444+
* overhead of making a new datatype.
445+
*/
446+
typedef struct {
447+
MPI_Datatype dt;
448+
MPI_Count count;
449+
int new_type_was_created;
450+
} ompi_datatype_consolidate_t;
451+
452+
static inline int
453+
ompi_datatype_consolidate_create(
454+
MPI_Count count, MPI_Datatype dtype, ompi_datatype_consolidate_t *dtmod,
455+
int threshold)
456+
{
457+
int rc;
458+
size_t dtsize;
459+
MPI_Aint lb, extent;
460+
461+
/* default (do nothing) unless we decide otherwise below */
462+
dtmod->dt = dtype;
463+
dtmod->count = count;
464+
dtmod->new_type_was_created = 0;
465+
466+
if (count >= threshold) {
467+
opal_datatype_type_size ( &dtype->super, &dtsize);
468+
rc = ompi_datatype_get_extent( dtype, &lb, &extent );
469+
if (rc != OMPI_SUCCESS) { return rc; }
470+
if ((dtype->super.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) &&
471+
(MPI_Aint)dtsize == extent)
472+
{
473+
/* contig, no performance advantage to making a new type */
474+
} else {
475+
rc = ompi_datatype_create_contiguous( count, dtype, &dtmod->dt );
476+
if (rc != OMPI_SUCCESS) { return rc; }
477+
ompi_datatype_commit(&dtmod->dt);
478+
dtmod->count = 1;
479+
dtmod->new_type_was_created = 1;
480+
}
481+
}
482+
return OMPI_SUCCESS;
483+
}
484+
static inline int
485+
ompi_datatype_consolidate_free(ompi_datatype_consolidate_t *dtmod)
486+
{
487+
int rc = OMPI_SUCCESS;
488+
if (dtmod->new_type_was_created) {
489+
rc = ompi_datatype_destroy( &dtmod->dt );
490+
/* caller isn't supposed to free twice, but safety valve if they do: */
491+
dtmod->new_type_was_created = 0;
492+
}
493+
return rc;
494+
}
495+
/*
496+
* The magic number below just came from empirical testing on a couple
497+
* local PPC machines using [int,space] as the datatype. There's some
498+
* overhead in constructing a new datatype, so just walking a sequence of
499+
* description elements is better for a short list of elements vs
500+
* creating a potentially shorter list and hoping the vector-walking
501+
* of the new elements is faster. This could maybe be tuned dynamically
502+
* but it doesn't really seem worth it.
503+
*
504+
* I only tested on two machines, the crossover point for pack and unpack
505+
* were 80 and 62 on one machine, and 250 and 220 on the other. So I lean
506+
* toward using 250 for both and assuming that's likely to not waste too
507+
* much overhead on the datatype creation for most cases.
508+
*/
509+
#define OMPI_DATATYPE_CONSOLIDATE_THRESHOLD 250
510+
421511
END_C_DECLS
422512
#endif /* OMPI_DATATYPE_H_HAS_BEEN_INCLUDED */

ompi/mpi/c/pack.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* reserved.
1616
* Copyright (c) 2015-2018 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18+
* Copyright (c) 2021 IBM Corporation. All rights reserved.
1819
* $COPYRIGHT$
1920
*
2021
* Additional copyrights may follow
@@ -77,10 +78,25 @@ int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype,
7778

7879
OPAL_CR_ENTER_LIBRARY();
7980

81+
/*
82+
* If a datatype's description contains a single element that describes
83+
* a large vector that path is reasonably optimized in pack/unpack. On
84+
* the other hand if the count and datatype combined describe the same
85+
* vector, that gets processed one element at a time.
86+
*
87+
* So at the top level we morph the call if the count and datatype look
88+
* like a good vector.
89+
*/
90+
ompi_datatype_consolidate_t dtmod;
91+
rc = ompi_datatype_consolidate_create(incount, datatype, &dtmod,
92+
OMPI_DATATYPE_CONSOLIDATE_THRESHOLD);
93+
OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME);
94+
8095
OBJ_CONSTRUCT( &local_convertor, opal_convertor_t );
8196
/* the resulting convertor will be set to the position ZERO */
82-
opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor, &(datatype->super),
83-
incount, (void *) inbuf, 0, &local_convertor );
97+
opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor,
98+
&(dtmod.dt->super), dtmod.count,
99+
(void *) inbuf, 0, &local_convertor );
84100

85101
/* Check for truncation */
86102
opal_convertor_get_packed_size( &local_convertor, &size );
@@ -100,6 +116,9 @@ int MPI_Pack(const void *inbuf, int incount, MPI_Datatype datatype,
100116
*position += size;
101117
OBJ_DESTRUCT( &local_convertor );
102118

119+
rc = ompi_datatype_consolidate_free(&dtmod);
120+
OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME);
121+
103122
OPAL_CR_EXIT_LIBRARY();
104123

105124
/* All done. Note that the convertor returns 1 upon success, not

ompi/mpi/c/unpack.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2015-2018 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
15+
* Copyright (c) 2021 IBM Corporation. All rights reserved.
1516
* $COPYRIGHT$
1617
*
1718
* Additional copyrights may follow
@@ -79,12 +80,27 @@ int MPI_Unpack(const void *inbuf, int insize, int *position,
7980

8081
OPAL_CR_ENTER_LIBRARY();
8182

83+
/*
84+
* If a datatype's description contains a single element that describes
85+
* a large vector that path is reasonably optimized in pack/unpack. On
86+
* the other hand if the count and datatype combined describe the same
87+
* vector that is processed one element at a time.
88+
*
89+
* So at the top level we morph the call if the count and datatype look
90+
* like a good vector.
91+
*/
92+
ompi_datatype_consolidate_t dtmod;
93+
rc = ompi_datatype_consolidate_create(outcount, datatype, &dtmod,
94+
OMPI_DATATYPE_CONSOLIDATE_THRESHOLD);
95+
OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME);
96+
8297
if( insize > 0 ) {
8398
int ret;
8499
OBJ_CONSTRUCT( &local_convertor, opal_convertor_t );
85100
/* the resulting convertor will be set the the position ZERO */
86-
opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, &(datatype->super),
87-
outcount, outbuf, 0, &local_convertor );
101+
opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor,
102+
&(dtmod.dt->super), dtmod.count,
103+
outbuf, 0, &local_convertor );
88104

89105
/* Check for truncation */
90106
opal_convertor_get_packed_size( &local_convertor, &size );
@@ -110,6 +126,9 @@ int MPI_Unpack(const void *inbuf, int insize, int *position,
110126
}
111127
}
112128

129+
rc = ompi_datatype_consolidate_free(&dtmod);
130+
OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME);
131+
113132
OPAL_CR_EXIT_LIBRARY();
114133

115134
OMPI_ERRHANDLER_RETURN(rc, comm, MPI_ERR_UNKNOWN, FUNC_NAME);

opal/datatype/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved.
1818
# Copyright (c) 2018 Research Organization for Information Science
1919
# and Technology (RIST). All rights reserved.
20+
# Copyright (c) 2021 IBM Corporation. All rights reserved.
2021
# $COPYRIGHT$
2122
#
2223
# Additional copyrights may follow
@@ -32,6 +33,7 @@ headers = \
3233
opal_datatype_internal.h \
3334
opal_datatype_copy.h \
3435
opal_datatype_memcpy.h \
36+
opal_datatype_pack_unpack_predefined.h \
3537
opal_datatype_pack.h \
3638
opal_datatype_prototypes.h \
3739
opal_datatype_unpack.h

0 commit comments

Comments
 (0)