Skip to content

Commit 182a2db

Browse files
authored
Merge pull request #6029 from ggouaillardet/topic/large_datatypes
opal/datatype: correctly handle large datatypes
2 parents 922fcca + 1d8ad92 commit 182a2db

17 files changed

+413
-240
lines changed

opal/datatype/opal_convertor.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
* All rights reserved.
1313
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
1414
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
15-
* Copyright (c) 2013-2017 Research Organization for Information Science
16-
* and Technology (RIST). All rights reserved.
15+
* Copyright (c) 2013-2018 Research Organization for Information Science
16+
* and Technology (RIST). All rights reserved.
1717
* Copyright (c) 2017 Intel, Inc. All rights reserved
1818
* $COPYRIGHT$
1919
*
@@ -330,7 +330,7 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
330330
dt_stack_t* pStack; /* pointer to the position on the stack */
331331
const opal_datatype_t* pData = pConvertor->pDesc;
332332
dt_elem_desc_t* pElems;
333-
uint32_t count;
333+
size_t count;
334334
ptrdiff_t extent;
335335

336336
pStack = pConvertor->pStack;
@@ -340,7 +340,7 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
340340
*/
341341
pElems = pConvertor->use_desc->desc;
342342

343-
count = (uint32_t)(starting_point / pData->size);
343+
count = starting_point / pData->size;
344344
extent = pData->ub - pData->lb;
345345

346346
pStack[0].type = OPAL_DATATYPE_LOOP; /* the first one is always the loop */
@@ -349,7 +349,7 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
349349
pStack[0].disp = count * extent;
350350

351351
/* now compute the number of pending bytes */
352-
count = (uint32_t)(starting_point - count * pData->size);
352+
count = starting_point - count * pData->size;
353353
/**
354354
* We save the current displacement starting from the begining
355355
* of this data.
@@ -563,7 +563,7 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
563563

564564
int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
565565
const struct opal_datatype_t* datatype,
566-
int32_t count,
566+
size_t count,
567567
const void* pUserBuf )
568568
{
569569
/* Here I should check that the data is not overlapping */
@@ -605,7 +605,7 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
605605

606606
int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
607607
const struct opal_datatype_t* datatype,
608-
int32_t count,
608+
size_t count,
609609
const void* pUserBuf )
610610
{
611611
convertor->flags |= CONVERTOR_SEND;
@@ -699,11 +699,11 @@ int opal_convertor_clone( const opal_convertor_t* source,
699699

700700
void opal_convertor_dump( opal_convertor_t* convertor )
701701
{
702-
opal_output( 0, "Convertor %p count %d stack position %d bConverted %ld\n"
703-
"\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n"
702+
opal_output( 0, "Convertor %p count %" PRIsize_t" stack position %d bConverted %" PRIsize_t "\n"
703+
"\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %" PRIsize_t "\n"
704704
"\tremote_arch %u local_arch %u\n",
705705
(void*)convertor,
706-
convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted,
706+
convertor->count, convertor->stack_pos, convertor->bConverted,
707707
(unsigned long)convertor->local_size, (unsigned long)convertor->remote_size,
708708
convertor->flags, convertor->stack_size, convertor->partial_length,
709709
convertor->remoteArch, opal_local_arch );
@@ -734,8 +734,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos,
734734
{
735735
opal_output( 0, "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
736736
for( ; stack_pos >= 0; stack_pos-- ) {
737-
opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index,
738-
(int)pStack[stack_pos].count, (long)pStack[stack_pos].disp );
737+
opal_output( 0, "%d: pos %d count %" PRIsize_t " disp %ld ", stack_pos, pStack[stack_pos].index,
738+
pStack[stack_pos].count, pStack[stack_pos].disp );
739739
if( pStack->index != -1 )
740740
opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n",
741741
(unsigned long)pDesc[pStack[stack_pos].index].elem.count,

opal/datatype/opal_convertor.h

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
* All rights reserved.
1313
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
1414
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
15-
* Copyright (c) 2017 Research Organization for Information Science
16-
* and Technology (RIST). All rights reserved.
15+
* Copyright (c) 2017-2018 Research Organization for Information Science
16+
* and Technology (RIST). All rights reserved.
1717
* Copyright (c) 2017 Intel, Inc. All rights reserved
1818
* $COPYRIGHT$
1919
*
@@ -74,6 +74,7 @@ struct opal_convertor_master_t;
7474
struct dt_stack_t {
7575
int32_t index; /**< index in the element description */
7676
int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */
77+
int16_t padding;
7778
size_t count; /**< number of times we still have to do it */
7879
ptrdiff_t disp; /**< actual displacement depending on the count field */
7980
};
@@ -93,30 +94,33 @@ struct opal_convertor_t {
9394
const opal_datatype_t* pDesc; /**< the datatype description associated with the convertor */
9495
const dt_type_desc_t* use_desc; /**< the version used by the convertor (normal or optimized) */
9596
opal_datatype_count_t count; /**< the total number of full datatype elements */
97+
98+
/* --- cacheline boundary (64 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */
9699
uint32_t stack_size; /**< size of the allocated stack */
97-
/* --- cacheline 1 boundary (64 bytes) --- */
98100
unsigned char* pBaseBuf; /**< initial buffer as supplied by the user */
99101
dt_stack_t* pStack; /**< the local stack for the actual conversion */
100102
convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */
103+
104+
/* --- cacheline boundary (96 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */
101105
struct opal_convertor_master_t* master; /**< the master convertor */
102106

103107
/* All others fields get modified for every call to pack/unpack functions */
104108
uint32_t stack_pos; /**< the actual position on the stack */
105-
uint32_t partial_length; /**< amount of data left over from the last unpack */
109+
size_t partial_length; /**< amount of data left over from the last unpack */
106110
size_t bConverted; /**< # of bytes already converted */
111+
112+
/* --- cacheline boundary (128 bytes - if 64bits arch and !OPAL_ENABLE_DEBUG) --- */
107113
uint32_t checksum; /**< checksum computed by pack/unpack operation */
108114
uint32_t csum_ui1; /**< partial checksum computed by pack/unpack operation */
109115
size_t csum_ui2; /**< partial checksum computed by pack/unpack operation */
110-
/* --- cacheline 2 boundary (128 bytes) --- */
116+
117+
/* --- fields are no more aligned on cacheline --- */
111118
dt_stack_t static_stack[DT_STATIC_STACK_SIZE]; /**< local stack for small datatypes */
112-
/* --- cacheline 3 boundary (192 bytes) was 56 bytes ago --- */
113119

114120
#if OPAL_CUDA_SUPPORT
115121
memcpy_fct_t cbmemcpy; /**< memcpy or cuMemcpy */
116122
void * stream; /**< CUstream for async copy */
117123
#endif
118-
/* size: 248, cachelines: 4, members: 20 */
119-
/* last cacheline: 56 bytes */
120124
};
121125
OPAL_DECLSPEC OBJ_CLASS_DECLARATION( opal_convertor_t );
122126

@@ -251,12 +255,12 @@ static inline void opal_convertor_get_offset_pointer( const opal_convertor_t* pC
251255
*/
252256
OPAL_DECLSPEC int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
253257
const struct opal_datatype_t* datatype,
254-
int32_t count,
258+
size_t count,
255259
const void* pUserBuf);
256260

257261
static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_convertor_t* pSrcConv,
258262
const struct opal_datatype_t* datatype,
259-
int32_t count,
263+
size_t count,
260264
const void* pUserBuf,
261265
int32_t flags,
262266
opal_convertor_t* convertor )
@@ -273,11 +277,11 @@ static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_conve
273277
*/
274278
OPAL_DECLSPEC int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
275279
const struct opal_datatype_t* datatype,
276-
int32_t count,
280+
size_t count,
277281
const void* pUserBuf );
278282
static inline int32_t opal_convertor_copy_and_prepare_for_recv( const opal_convertor_t* pSrcConv,
279283
const struct opal_datatype_t* datatype,
280-
int32_t count,
284+
size_t count,
281285
const void* pUserBuf,
282286
int32_t flags,
283287
opal_convertor_t* convertor )

opal/datatype/opal_convertor_raw.c

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
* reserved.
66
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
77
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
8-
* Copyright (c) 2017 Research Organization for Information Science
9-
* and Technology (RIST). All rights reserved.
8+
* Copyright (c) 2017-2018 Research Organization for Information Science
9+
* and Technology (RIST). All rights reserved.
1010
* $COPYRIGHT$
1111
*
1212
* Additional copyrights may follow
@@ -43,11 +43,11 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
4343
const opal_datatype_t *pData = pConvertor->pDesc;
4444
dt_stack_t* pStack; /* pointer to the position on the stack */
4545
uint32_t pos_desc; /* actual position in the description of the derived datatype */
46-
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
46+
size_t count_desc; /* the number of items already done in the actual pos_desc */
4747
dt_elem_desc_t* description, *pElem;
4848
unsigned char *source_base; /* origin of the data */
4949
size_t raw_data = 0; /* sum of raw data lengths in the iov_len fields */
50-
uint32_t index = 0, i; /* the iov index and a simple counter */
50+
uint32_t index = 0; /* the iov index and a simple counter */
5151

5252
assert( (*iov_count) > 0 );
5353
if( OPAL_LIKELY(pConvertor->flags & CONVERTOR_COMPLETED) ) {
@@ -83,15 +83,15 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
8383
pStack = pConvertor->pStack + pConvertor->stack_pos;
8484
pos_desc = pStack->index;
8585
source_base = pConvertor->pBaseBuf + pStack->disp;
86-
count_desc = (uint32_t)pStack->count;
86+
count_desc = pStack->count;
8787
pStack--;
8888
pConvertor->stack_pos--;
8989
pElem = &(description[pos_desc]);
9090
source_base += pStack->disp;
91-
DO_DEBUG( opal_output( 0, "raw start pos_desc %d count_desc %d disp %ld\n"
92-
"stack_pos %d pos_desc %d count_desc %d disp %ld\n",
91+
DO_DEBUG( opal_output( 0, "raw start pos_desc %d count_desc %" PRIsize_t " disp %ld\n"
92+
"stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n",
9393
pos_desc, count_desc, (long)(source_base - pConvertor->pBaseBuf),
94-
pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); );
94+
pConvertor->stack_pos, pStack->index, pStack->count, (long)pStack->disp ); );
9595
while( 1 ) {
9696
while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
9797
size_t blength = opal_datatype_basicDatatypes[pElem->elem.common.type]->size;
@@ -112,7 +112,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
112112
count_desc = 0;
113113
}
114114
} else {
115-
for( i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) {
115+
for(size_t i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) {
116116
OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf,
117117
pConvertor->pDesc, pConvertor->count );
118118
DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %lu}\n",
@@ -134,9 +134,9 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
134134
goto complete_loop;
135135
}
136136
if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
137-
DO_DEBUG( opal_output( 0, "raw end_loop count %d stack_pos %d"
137+
DO_DEBUG( opal_output( 0, "raw end_loop count %" PRIsize_t " stack_pos %d"
138138
" pos_desc %d disp %ld space %lu\n",
139-
(int)pStack->count, pConvertor->stack_pos,
139+
pStack->count, pConvertor->stack_pos,
140140
pos_desc, (long)pStack->disp, (unsigned long)raw_data ); );
141141
if( --(pStack->count) == 0 ) { /* end of loop */
142142
if( pConvertor->stack_pos == 0 ) {
@@ -160,19 +160,18 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
160160
}
161161
source_base = pConvertor->pBaseBuf + pStack->disp;
162162
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
163-
DO_DEBUG( opal_output( 0, "raw new_loop count %d stack_pos %d "
163+
DO_DEBUG( opal_output( 0, "raw new_loop count %" PRIsize_t " stack_pos %d "
164164
"pos_desc %d disp %ld space %lu\n",
165-
(int)pStack->count, pConvertor->stack_pos,
165+
pStack->count, pConvertor->stack_pos,
166166
pos_desc, (long)pStack->disp, (unsigned long)raw_data ); );
167167
}
168168
if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) {
169169
ptrdiff_t local_disp = (ptrdiff_t)source_base;
170170
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)(pElem + pElem->loop.items);
171171

172172
if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
173-
uint32_t i;
174173
source_base += end_loop->first_elem_disp;
175-
for( i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) {
174+
for(size_t i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) {
176175
OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, end_loop->size, pConvertor->pBaseBuf,
177176
pConvertor->pDesc, pConvertor->count );
178177
iov[index].iov_base = (IOVBASE_TYPE *) source_base;
@@ -209,7 +208,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor,
209208
/* I complete an element, next step I should go to the next one */
210209
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc,
211210
source_base - pStack->disp - pConvertor->pBaseBuf );
212-
DO_DEBUG( opal_output( 0, "raw save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
213-
pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); );
211+
DO_DEBUG( opal_output( 0, "raw save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n",
212+
pConvertor->stack_pos, pStack->index, pStack->count, (long)pStack->disp ); );
214213
return 0;
215214
}

0 commit comments

Comments
 (0)