@@ -59,7 +59,7 @@ static int total_errors = 0;
59
59
_a < _b ? _a : _b; })
60
60
61
61
static void print_status (char * op , char * type , int type_size ,
62
- int count , double duration ,
62
+ int count , int max_shift , double * duration , int repeats ,
63
63
int correct )
64
64
{
65
65
if (correct ) {
@@ -68,7 +68,15 @@ static void print_status(char* op, char* type, int type_size,
68
68
printf ("%-10s %s [\033[1;31mfail\033[0m]" , op , type );
69
69
total_errors ++ ;
70
70
}
71
- printf (" count %-10d time %.6f seconds\n" , count , duration );
71
+ if ( 1 == max_shift ) {
72
+ printf (" count %-10d time (seconds) %.8f seconds\n" , count , duration [0 ] / repeats );
73
+ } else {
74
+ printf (" count %-10d time (seconds / shifts) " , count );
75
+ for ( int i = 0 ; i < max_shift ; i ++ ) {
76
+ printf ("%.8f " , duration [i ] / repeats );
77
+ }
78
+ printf ("\n" );
79
+ }
72
80
}
73
81
74
82
static int do_ops_built = 0 ;
@@ -115,19 +123,23 @@ do { \
115
123
const TYPE *_p1 = ((TYPE*)(INBUF)), *_p3 = ((TYPE*)(CHECK_BUF)); \
116
124
TYPE *_p2 = ((TYPE*)(INOUT_BUF)); \
117
125
skip_op_type = 0; \
118
- for(int _k = 0; _k < min((COUNT), 4); +_k++ ) { \
119
- memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \
120
- tstart = MPI_Wtime(); \
121
- MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT)-_k, (MPITYPE), (MPIOP)); \
122
- tend = MPI_Wtime(); \
123
- if( check ) { \
124
- for( i = 0; i < (COUNT)-_k; i++ ) { \
125
- if(((_p2+_k)[i]) == (((_p1+_k)[i]) OPNAME ((_p3+_k)[i]))) \
126
- continue; \
127
- printf("First error at alignment %d position %d (%" TYPE_PREFIX " %s %" TYPE_PREFIX " != %" TYPE_PREFIX ")\n", \
128
- _k, i, (_p1+_k)[i], (#OPNAME), (_p3+_k)[i], (_p2+_k)[i]); \
129
- correctness = 0; \
130
- break; \
126
+ for(int _k = 0; _k < min((COUNT), max_shift); +_k++ ) { \
127
+ duration[_k] = 0.0; \
128
+ for(int _r = repeats; _r > 0; _r--) { \
129
+ memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \
130
+ tstart = MPI_Wtime(); \
131
+ MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT)-_k, (MPITYPE), (MPIOP)); \
132
+ tend = MPI_Wtime(); \
133
+ duration[_k] += (tend - tstart); \
134
+ if( check ) { \
135
+ for( i = 0; i < (COUNT)-_k; i++ ) { \
136
+ if(((_p2+_k)[i]) == (((_p1+_k)[i]) OPNAME ((_p3+_k)[i]))) \
137
+ continue; \
138
+ printf("First error at alignment %d position %d (%" TYPE_PREFIX " %s %" TYPE_PREFIX " != %" TYPE_PREFIX ")\n", \
139
+ _k, i, (_p1+_k)[i], (#OPNAME), (_p3+_k)[i], (_p2+_k)[i]); \
140
+ correctness = 0; \
141
+ break; \
142
+ } \
131
143
} \
132
144
} \
133
145
} \
@@ -139,20 +151,24 @@ do { \
139
151
const TYPE *_p1 = ((TYPE*)(INBUF)), *_p3 = ((TYPE*)(CHECK_BUF)); \
140
152
TYPE *_p2 = ((TYPE*)(INOUT_BUF)); \
141
153
skip_op_type = 0; \
142
- for(int _k = 0; _k < min((COUNT), 4); +_k++ ) { \
143
- memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \
144
- tstart = MPI_Wtime(); \
145
- MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT), (MPITYPE), (MPIOP)); \
146
- tend = MPI_Wtime(); \
147
- if( check ) { \
148
- for( i = 0; i < (COUNT); i++ ) { \
149
- TYPE _v1 = *(_p1+_k), _v2 = *(_p2+_k), _v3 = *(_p3+_k); \
150
- if(_v2 == OPNAME(_v1, _v3)) \
151
- continue; \
152
- printf("First error at alignment %d position %d (%" TYPE_PREFIX " != %s(%" TYPE_PREFIX ", %" TYPE_PREFIX ")\n", \
153
- _k, i, _v1, (#OPNAME), _v3, _v2); \
154
- correctness = 0; \
155
- break; \
154
+ for(int _k = 0; _k < min((COUNT), max_shift); +_k++ ) { \
155
+ duration[_k] = 0.0; \
156
+ for(int _r = repeats; _r > 0; _r--) { \
157
+ memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \
158
+ tstart = MPI_Wtime(); \
159
+ MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT), (MPITYPE), (MPIOP)); \
160
+ tend = MPI_Wtime(); \
161
+ duration[_k] += (tend - tstart); \
162
+ if( check ) { \
163
+ for( i = 0; i < (COUNT); i++ ) { \
164
+ TYPE _v1 = *(_p1+_k), _v2 = *(_p2+_k), _v3 = *(_p3+_k); \
165
+ if(_v2 == OPNAME(_v1, _v3)) \
166
+ continue; \
167
+ printf("First error at alignment %d position %d (%" TYPE_PREFIX " != %s(%" TYPE_PREFIX ", %" TYPE_PREFIX ")\n", \
168
+ _k, i, _v1, (#OPNAME), _v3, _v2); \
169
+ correctness = 0; \
170
+ break; \
171
+ } \
156
172
} \
157
173
} \
158
174
} \
@@ -163,24 +179,36 @@ int main(int argc, char **argv)
163
179
{
164
180
static void * in_buf = NULL , * inout_buf = NULL , * inout_check_buf = NULL ;
165
181
int count , type_size = 8 , rank , size , provided , correctness = 1 ;
166
- int repeats = 1 , i , c ;
167
- double tstart , tend ;
182
+ int repeats = 1 , i , c , op1_alignment = 0 , res_alignment = 0 ;
183
+ int max_shift = 4 ;
184
+ double * duration , tstart , tend ;
168
185
bool check = true;
169
186
char type [5 ] = "uifd" , * op = "sum" , * mpi_type ;
170
187
int lower = 1 , upper = 1000000 , skip_op_type ;
171
188
MPI_Op mpi_op ;
172
189
173
- while ( -1 != (c = getopt (argc , argv , "l:u:t:o:s:n:vfh" )) ) {
190
+ while ( -1 != (c = getopt (argc , argv , "l:u:r: t:o:i: s:n:1:2 :vfh" )) ) {
174
191
switch (c ) {
175
192
case 'l' :
176
193
lower = atoi (optarg );
177
194
if ( lower <= 0 ) {
178
- fprintf (stderr , "The number of elements must be positive\n" );
195
+ fprintf (stderr , "The lower number of elements must be positive\n" );
179
196
exit (-1 );
180
197
}
181
198
break ;
182
199
case 'u' :
183
200
upper = atoi (optarg );
201
+ if ( lower <= 0 ) {
202
+ fprintf (stderr , "The upper number of elements must be positive\n" );
203
+ exit (-1 );
204
+ }
205
+ break ;
206
+ case 'i' :
207
+ max_shift = atoi (optarg );
208
+ if ( max_shift <= 0 ) {
209
+ fprintf (stderr , "The max shift must be positive\n" );
210
+ exit (-1 );
211
+ }
184
212
break ;
185
213
case 'f' :
186
214
check = false;
@@ -216,14 +244,32 @@ int main(int argc, char **argv)
216
244
exit (-1 );
217
245
}
218
246
break ;
247
+ case '1' :
248
+ op1_alignment = atoi (optarg );
249
+ if ( op1_alignment < 0 ) {
250
+ fprintf (stderr , "alignment for the first operand must be positive\n" );
251
+ exit (-1 );
252
+ }
253
+ break ;
254
+ case '2' :
255
+ res_alignment = atoi (optarg );
256
+ if ( res_alignment < 0 ) {
257
+ fprintf (stderr , "alignment for the result must be positive\n" );
258
+ exit (-1 );
259
+ }
260
+ break ;
219
261
case 'h' :
220
262
fprintf (stdout , "%s options are:\n"
221
263
" -l <number> : lower number of elements\n"
222
264
" -u <number> : upper number of elements\n"
223
265
" -s <type_size> : 8, 16, 32 or 64 bits elements\n"
224
266
" -t [i,u,f,d] : type of the elements to apply the operations on\n"
267
+ " -r <number> : number of repetitions for each test\n"
225
268
" -o <op> : comma separated list of operations to execute among\n"
226
269
" sum, min, max, prod, bor, bxor, band\n"
270
+ " -i <number> : shift on all buffers to check alignment\n"
271
+ " -1 <number> : (mis)alignment in elements for the first op\n"
272
+ " -2 <number> : (mis)alignment in elements for the result\n"
227
273
" -v: increase the verbosity level\n"
228
274
" -h: this help message\n" , argv [0 ]);
229
275
exit (0 );
@@ -233,9 +279,10 @@ int main(int argc, char **argv)
233
279
if ( !do_ops_built ) { /* not yet done, take the default */
234
280
build_do_ops ( "all" , do_ops );
235
281
}
236
- in_buf = malloc (upper * sizeof (double ));
237
- inout_buf = malloc (upper * sizeof (double ));
238
- inout_check_buf = malloc (upper * sizeof (double ));
282
+ posix_memalign ( & in_buf , 64 , (upper + op1_alignment ) * sizeof (double ));
283
+ posix_memalign ( & inout_buf , 64 , (upper + res_alignment ) * sizeof (double ));
284
+ posix_memalign ( & inout_check_buf , 64 , upper * sizeof (double ));
285
+ duration = (double * )malloc (max_shift * sizeof (double ));
239
286
240
287
ompi_mpi_init (argc , argv , MPI_THREAD_SERIALIZED , & provided , false);
241
288
@@ -253,8 +300,8 @@ int main(int argc, char **argv)
253
300
correctness = 1 ;
254
301
if ('i' == type [type_idx ]) {
255
302
if ( 8 == type_size ) {
256
- int8_t * in_int8 = (int8_t * )in_buf ,
257
- * inout_int8 = (int8_t * )inout_buf ,
303
+ int8_t * in_int8 = (int8_t * )(( char * ) in_buf + op1_alignment * sizeof ( int8_t )) ,
304
+ * inout_int8 = (int8_t * )(( char * ) inout_buf + res_alignment * sizeof ( int8_t )) ,
258
305
* inout_int8_for_check = (int8_t * )inout_check_buf ;
259
306
for ( i = 0 ; i < count ; i ++ ) {
260
307
in_int8 [i ] = 5 ;
@@ -299,8 +346,8 @@ int main(int argc, char **argv)
299
346
}
300
347
}
301
348
if ( 16 == type_size ) {
302
- int16_t * in_int16 = (int16_t * )in_buf ,
303
- * inout_int16 = (int16_t * )inout_buf ,
349
+ int16_t * in_int16 = (int16_t * )(( char * ) in_buf + op1_alignment * sizeof ( int16_t )) ,
350
+ * inout_int16 = (int16_t * )(( char * ) inout_buf + res_alignment * sizeof ( int16_t )) ,
304
351
* inout_int16_for_check = (int16_t * )inout_check_buf ;
305
352
for ( i = 0 ; i < count ; i ++ ) {
306
353
in_int16 [i ] = 5 ;
@@ -345,8 +392,8 @@ int main(int argc, char **argv)
345
392
}
346
393
}
347
394
if ( 32 == type_size ) {
348
- int32_t * in_int32 = (int32_t * )in_buf ,
349
- * inout_int32 = (int32_t * )inout_buf ,
395
+ int32_t * in_int32 = (int32_t * )(( char * ) in_buf + op1_alignment * sizeof ( int32_t )) ,
396
+ * inout_int32 = (int32_t * )(( char * ) inout_buf + res_alignment * sizeof ( int32_t )) ,
350
397
* inout_int32_for_check = (int32_t * )inout_check_buf ;
351
398
for ( i = 0 ; i < count ; i ++ ) {
352
399
in_int32 [i ] = 5 ;
@@ -391,8 +438,8 @@ int main(int argc, char **argv)
391
438
}
392
439
}
393
440
if ( 64 == type_size ) {
394
- int64_t * in_int64 = (int64_t * )in_buf ,
395
- * inout_int64 = (int64_t * )inout_buf ,
441
+ int64_t * in_int64 = (int64_t * )(( char * ) in_buf + op1_alignment * sizeof ( int64_t )) ,
442
+ * inout_int64 = (int64_t * )(( char * ) inout_buf + res_alignment * sizeof ( int64_t )) ,
396
443
* inout_int64_for_check = (int64_t * )inout_check_buf ;
397
444
for ( i = 0 ; i < count ; i ++ ) {
398
445
in_int64 [i ] = 5 ;
@@ -440,8 +487,8 @@ int main(int argc, char **argv)
440
487
441
488
if ( 'u' == type [type_idx ] ) {
442
489
if ( 8 == type_size ) {
443
- uint8_t * in_uint8 = (uint8_t * )in_buf ,
444
- * inout_uint8 = (uint8_t * )inout_buf ,
490
+ uint8_t * in_uint8 = (uint8_t * )(( char * ) in_buf + op1_alignment * sizeof ( uint8_t )) ,
491
+ * inout_uint8 = (uint8_t * )(( char * ) inout_buf + res_alignment * sizeof ( uint8_t )) ,
445
492
* inout_uint8_for_check = (uint8_t * )inout_check_buf ;
446
493
for ( i = 0 ; i < count ; i ++ ) {
447
494
in_uint8 [i ] = 5 ;
@@ -486,8 +533,8 @@ int main(int argc, char **argv)
486
533
}
487
534
}
488
535
if ( 16 == type_size ) {
489
- uint16_t * in_uint16 = (uint16_t * )in_buf ,
490
- * inout_uint16 = (uint16_t * )inout_buf ,
536
+ uint16_t * in_uint16 = (uint16_t * )(( char * ) in_buf + op1_alignment * sizeof ( uint16_t )) ,
537
+ * inout_uint16 = (uint16_t * )(( char * ) inout_buf + res_alignment * sizeof ( uint16_t )) ,
491
538
* inout_uint16_for_check = (uint16_t * )inout_check_buf ;
492
539
for ( i = 0 ; i < count ; i ++ ) {
493
540
in_uint16 [i ] = 5 ;
@@ -532,8 +579,8 @@ int main(int argc, char **argv)
532
579
}
533
580
}
534
581
if ( 32 == type_size ) {
535
- uint32_t * in_uint32 = (uint32_t * )in_buf ,
536
- * inout_uint32 = (uint32_t * )inout_buf ,
582
+ uint32_t * in_uint32 = (uint32_t * )(( char * ) in_buf + op1_alignment * sizeof ( uint32_t )) ,
583
+ * inout_uint32 = (uint32_t * )(( char * ) inout_buf + res_alignment * sizeof ( uint32_t )) ,
537
584
* inout_uint32_for_check = (uint32_t * )inout_check_buf ;
538
585
for ( i = 0 ; i < count ; i ++ ) {
539
586
in_uint32 [i ] = 5 ;
@@ -578,8 +625,8 @@ int main(int argc, char **argv)
578
625
}
579
626
}
580
627
if ( 64 == type_size ) {
581
- uint64_t * in_uint64 = (uint64_t * )in_buf ,
582
- * inout_uint64 = (uint64_t * )inout_buf ,
628
+ uint64_t * in_uint64 = (uint64_t * )(( char * ) in_buf + op1_alignment * sizeof ( uint64_t )) ,
629
+ * inout_uint64 = (uint64_t * )(( char * ) inout_buf + res_alignment * sizeof ( uint64_t )) ,
583
630
* inout_uint64_for_check = (uint64_t * )inout_check_buf ;
584
631
for ( i = 0 ; i < count ; i ++ ) {
585
632
in_uint64 [i ] = 5 ;
@@ -626,8 +673,8 @@ int main(int argc, char **argv)
626
673
}
627
674
628
675
if ( 'f' == type [type_idx ] ) {
629
- float * in_float = (float * )in_buf ,
630
- * inout_float = (float * )inout_buf ,
676
+ float * in_float = (float * )(( char * ) in_buf + op1_alignment * sizeof ( float )) ,
677
+ * inout_float = (float * )(( char * ) inout_buf + res_alignment * sizeof ( float )) ,
631
678
* inout_float_for_check = (float * )inout_check_buf ;
632
679
for ( i = 0 ; i < count ; i ++ ) {
633
680
in_float [i ] = 1000.0 + 1 ;
@@ -658,8 +705,8 @@ int main(int argc, char **argv)
658
705
}
659
706
660
707
if ( 'd' == type [type_idx ] ) {
661
- double * in_double = (double * )in_buf ,
662
- * inout_double = (double * )inout_buf ,
708
+ double * in_double = (double * )(( char * ) in_buf + op1_alignment * sizeof ( double )) ,
709
+ * inout_double = (double * )(( char * ) inout_buf + res_alignment * sizeof ( double )) ,
663
710
* inout_double_for_check = (double * )inout_check_buf ;
664
711
for ( i = 0 ; i < count ; i ++ ) {
665
712
in_double [i ] = 10.0 + 1 ;
@@ -691,7 +738,7 @@ int main(int argc, char **argv)
691
738
check_and_continue :
692
739
if ( !skip_op_type )
693
740
print_status (array_of_ops [do_ops [op_idx ]].mpi_op_name ,
694
- mpi_type , type_size , count , tend - tstart , correctness );
741
+ mpi_type , type_size , count , max_shift , duration , repeats , correctness );
695
742
}
696
743
if ( !skip_op_type )
697
744
printf ("\n" );
0 commit comments