15
15
16
16
#include " ../Vectorisation/VecX/dr3.h"
17
17
18
- // pick an instruction set for intrinsics by selecting a name space
19
-
20
- // using namespace DRC::VecDb;
21
- // using namespace DRC::VecD2D; //sse2 double
22
- // using namespace DRC::VecD4D; //avx2 double
23
- // using namespace DRC::VecF8F; // avx2 float
24
- // using namespace DRC::VecD8D; //avx512 double
25
- // using namespace DRC::VecF16F; //avx512 float
18
+
26
19
const double billion = 1000000000.0 ;
27
20
28
21
@@ -34,6 +27,7 @@ struct RunResults
34
27
{
35
28
Mapped_Performance_Results m_raw_results;
36
29
Calc_Values m_calc_results;
30
+ double time;
37
31
};
38
32
39
33
class TimerGuard
@@ -55,12 +49,7 @@ class TimerGuard
55
49
56
50
auto getRandomShuffledVectorxxx (int SZ, int instance_number = 0 )
57
51
{
58
- // using FloatType = double; // typename InstructionTraits<VecXX::INS>::FloatType;
59
-
60
-
61
52
static std::map<int , std::vector<double > > vectors;
62
-
63
-
64
53
int key = 10 * SZ + instance_number;
65
54
// store vectors with key 10 times size and add on 0-9 integer for instance of different random vector
66
55
@@ -109,157 +98,96 @@ auto runFunctionOverDifferentSize = [](int testRepeats, int vec_start_size, int
109
98
110
99
void doAVXMax512Dance ()
111
100
{
112
- /*
113
- const long TEST_LOOP_SZ = 1000;
114
- const int repeatRuns = 20;
115
- const int vectorStepSize = 200;
116
- const int maxVectorSize = 20000;
117
- const int minVectorSize = 400;
118
- */
119
-
120
- // const double sleepTime = 10000.;/// 10 seconds
121
-
122
101
123
102
const int maxVectorSize = 4400 ;
124
103
const int minVectorSize = 3800 ;
125
104
const long TEST_LOOP_SZ = 100000 ;
126
105
const int vectorStepSize = 8 ;
127
106
const int repeatRuns = 10 ;
128
107
129
- // auto zero = 0.0;// InstructionTraits<VecXX::INS>::nullValue;
130
-
131
108
getRandomShuffledVectorxxx (-1 ); // reset random input vectors
132
109
133
- /*
134
- auto accumulate_run = [&](int VEC_SZ, long TEST_LOOP_SZ)
135
- {
136
- double time = 0.;
137
- volatile double res = 0.;
138
- //auto v = getRandomShuffledVector(SZ); // std stl vector double or float
139
- auto v1 = getRandomShuffledVector(VEC_SZ, 0);
140
110
141
- {
142
- //warm up
143
- for (long l = 0; l < 100; l++)
144
- {
145
- res = *std::max_element(v1.begin(), v1.end());
146
- }
147
-
148
- TimerGuard timer(time);
149
- {
150
- for (long l = 0; l < TEST_LOOP_SZ; l++)
151
- {
152
- res = *std::max_element(v1.begin(), v1.end());
153
- }
154
- }
155
- }
156
- return std::make_pair(res, numOps(TEST_LOOP_SZ, VEC_SZ) / time);
157
- };
158
-
159
- */
160
-
161
- auto DR3_accumulate = [&](int SZ, long TEST_LOOP_SZ)
111
+ // avx512 lambda
112
+ auto DR3_avx512 = [&](int SZ, long TEST_LOOP_SZ)
162
113
{
163
114
using namespace DRC ::VecD8D;
164
115
165
116
double time = 0 .;
166
117
volatile double res = 0 .;
167
118
168
- // generic lambda for max either calling a max instruction or doing a selection with iff
169
- // auto mxDbl = [](auto lhs, auto rhs) { return max(lhs, rhs); };
170
- auto mxDbl = [](auto lhs, auto rhs) { return iff (lhs > rhs, lhs, rhs); }; // using iff fastest
119
+ auto mxDbl = [](auto lhs, auto rhs) { return iff (lhs > rhs, lhs, rhs); };
171
120
172
- auto v1 = getRandomShuffledVectorxxx (SZ, 0 ); // std stl vector double or float
121
+ auto v1 = getRandomShuffledVectorxxx (SZ, 0 );
173
122
VecXX vec (v1);
174
- {
123
+
175
124
176
- // warm up
177
- for (long l = 0 ; l < 100 ; l++)
178
- {
179
- res = reduce (vec, mxDbl);
180
- }
181
-
182
- TimerGuard timer (time);
183
- {
184
- for (long l = 0 ; l < TEST_LOOP_SZ; l++)
185
- {
186
- res = reduce (vec, mxDbl);
187
- }
188
- }
125
+ for (long l = 0 ; l < TEST_LOOP_SZ; l++)
126
+ {
127
+ res = reduce (vec, mxDbl);
189
128
}
190
- // return std::make_pair(res, numOps(TEST_LOOP_SZ, SZ) / time);
191
-
129
+
130
+ return std::make_pair (res, time);
192
131
};
193
132
194
133
195
- auto DR3_accumulate2 = [&](int SZ, long TEST_LOOP_SZ)
134
+ auto DR3_avx2 = [&](int SZ, long TEST_LOOP_SZ)
196
135
{
197
136
using namespace DRC ::VecD4D;
198
137
199
138
double time = 0 .;
200
139
volatile double res = 0 .;
201
140
202
- // generic lambda for max either calling a max instruction or doing a selection with iff
203
- // auto mxDbl = [](auto lhs, auto rhs) { return max(lhs, rhs); };
204
- auto mxDbl = [](auto lhs, auto rhs) { return iff (lhs > rhs, lhs, rhs); }; // using iff fastest
141
+ auto mxDbl = [](auto lhs, auto rhs) { return iff (lhs > rhs, lhs, rhs); };
205
142
206
- auto v1 = getRandomShuffledVectorxxx (SZ, 0 ); // std stl vector double or float
143
+ auto v1 = getRandomShuffledVectorxxx (SZ, 0 );
207
144
VecXX vec (v1);
145
+
146
+
147
+ for (long l = 0 ; l < TEST_LOOP_SZ; l++)
208
148
{
209
-
210
- // warm up
211
- for (long l = 0 ; l < 100 ; l++)
212
- {
213
- res = reduce (vec, mxDbl);
214
- }
215
-
216
- TimerGuard timer (time);
217
- {
218
- for (long l = 0 ; l < TEST_LOOP_SZ; l++)
219
- {
220
- res = reduce (vec, mxDbl);
221
- }
222
- }
149
+ res = reduce (vec, mxDbl);
223
150
}
224
- // return std::make_pair(res, numOps(TEST_LOOP_SZ, SZ) / time);
151
+
152
+
153
+ return std::make_pair (res, time);
225
154
226
155
};
227
156
228
157
158
+ using namespace std ::chrono_literals;
229
159
230
160
for (;;)
231
161
{
232
162
233
163
double time = 0.0 ;
164
+
165
+ // AVX512
166
+ for (int K = 0 ; K < 4 ; K++)
234
167
{
235
-
236
- for ( int K = 0 ; K < 4 ; K++)
168
+ time = 0 .;
169
+ std::cout << " AVX 512 " << K + 1 << " of 4 " << std::endl;
237
170
{
238
- time = 0 .;
239
- std::cout << " AVX 512 " << K + 1 << " of 4" << std::endl;
240
- auto dr3_raw_results = runFunctionOverDifferentSize (repeatRuns, minVectorSize, vectorStepSize, maxVectorSize, DR3_accumulate, TEST_LOOP_SZ);
241
- std::cout << " AVX 512 " << K + 1 << " of 4" << time << " seconds now sleep" << std::endl;
242
-
243
- using namespace std ::chrono_literals;
244
- std::this_thread::sleep_for (15000ms);
245
-
171
+ TimerGuard timer (time);
172
+ auto dr3_raw_results = runFunctionOverDifferentSize (repeatRuns, minVectorSize, vectorStepSize, maxVectorSize, DR3_avx512, TEST_LOOP_SZ);
246
173
}
174
+ std::cout << " AVX 512 " << K + 1 << " of 4 " << time << " seconds now sleep" << std::endl;
175
+ std::this_thread::sleep_for (15000ms);
247
176
}
248
- using namespace std ::chrono_literals ;
177
+
249
178
std::this_thread::sleep_for (15000ms);
250
-
251
- {
252
-
253
- for (int K = 0 ; K < 4 ; K++)
254
- {
255
- time = 0 .;
256
- std::cout << " AVX 2 " << K + 1 << " of 4" << std::endl;
257
- auto dr3_raw_results = runFunctionOverDifferentSize (repeatRuns, minVectorSize, vectorStepSize, maxVectorSize, DR3_accumulate2, TEST_LOOP_SZ);
258
- std::cout << " AVX 2 " << K + 1 << " of 4" << time << " seconds now sleep" << std::endl;
259
- using namespace std ::chrono_literals;
260
- std::this_thread::sleep_for (15000ms);
261
-
179
+
180
+ // AVX2
181
+ for (int K = 0 ; K < 4 ; K++)
182
+ {
183
+ time = 0 .;
184
+ std::cout << " AVX 2 " << K + 1 << " of 4 " << std::endl;
185
+ { TimerGuard timer (time);
186
+ auto dr3_raw_results = runFunctionOverDifferentSize (repeatRuns, minVectorSize, vectorStepSize, maxVectorSize, DR3_avx2, TEST_LOOP_SZ);
262
187
}
188
+ std::cout << " AVX 2 " << K + 1 << " of 4 " << time << " seconds now sleep" << std::endl;
189
+ std::this_thread::sleep_for (15000ms);
190
+
263
191
}
264
192
265
193
}
0 commit comments