Skip to content

Commit 9011d00

Browse files
committed
fix view writre bug andf improve tests and accumuylate example
1 parent 53032d7 commit 9011d00

File tree

3 files changed

+102
-131
lines changed

3 files changed

+102
-131
lines changed

VectorTest/TestFilterTransform.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#include "pch.h"
2+
3+
//#include "../Vectorisation/VecX/instruction_traits.h"
4+
5+
6+
#include "../Vectorisation/VecX/vec.h"
7+
#include "../Vectorisation/VecX/operations.h"
8+
#include "../Vectorisation/VecX/vec_bool_d.h"
9+
#include "../Vectorisation/VecX/vec_double.h"
10+
#include "../Vectorisation/VecX/alloc_policy.h"
11+
#include "../Vectorisation/VecX/vec_d.h"
12+
#include "../Vectorisation/VecX/vec_bool.h"
13+
#include "../Vectorisation/VecX/vec_view.h"
14+
15+
#include "../Vectorisation/VecX/target_name_space.h"
16+
17+
18+
#include "../Vectorisation/VecX/dr3.h"
19+
20+
21+
22+
23+
#include <numeric>
24+
25+
//using namespace DRC::VecDb;
26+
//using namespace DRC::VecD2D;
27+
using namespace DRC::VecD4D;
28+
//using namespace DRC::VecD8D;
29+
//using namespace DRC::VecF16F;
30+
//using namespace DRC::VecF8F;
31+
32+
33+
34+
35+
void testFilterTransform(int SZ )
36+
{
37+
38+
std::vector<double> input(SZ, 0.0);
39+
std::iota(begin(input), end(input), 0.0);
40+
41+
VecXX testVec(input);
42+
auto trueLambdaS = [&](auto x) { return x; };
43+
auto falseLambdaS = [&](auto x) { return -x; };
44+
45+
46+
for (int j = 0; j < SZ; ++j)
47+
{
48+
auto onlyJlambda = [=](auto x) { return (j > (x - 0.0001) && (j < x + 0.00001)); };
49+
VecXX res = filterTransform(onlyJlambda, testVec, trueLambdaS, falseLambdaS);
50+
51+
for (int k = 0; k < SZ; k++)
52+
{
53+
if( k==j)
54+
{
55+
EXPECT_DOUBLE_EQ(res[k], k);
56+
}
57+
else
58+
{
59+
EXPECT_DOUBLE_EQ(res[k], -1.0 * k);
60+
}
61+
}
62+
}
63+
64+
}
65+
66+
67+
68+
69+
TEST(TestFilterTransform, testTransformEachPoint)
70+
{
71+
72+
for (int SZ = 3; SZ < 33; SZ++)
73+
{
74+
testFilterTransform(SZ);
75+
}
76+
77+
78+
testFilterTransform(34);
79+
80+
testFilterTransform(65);
81+
testFilterTransform(63);
82+
testFilterTransform(64);
83+
84+
85+
}

Vectorisation/VecX/vec_view.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ class VecView
418418
for (; i < SZ - (int)(width * unrollFactor); i += width * unrollFactor)
419419
{
420420
idx0.load(pIdx + i);
421-
r0.load_a(m_pData);
421+
r0.load_a(m_pData+i);
422422
scatter(idx0, limit, r0, pRes);
423423

424424
idx1.load(pIdx + i + width);
@@ -471,7 +471,7 @@ class VecView
471471
for (; i < SZ - (int)(width * unrollFactor); i += width * unrollFactor)
472472
{
473473
idx0.load(pIdx +i);
474-
r0.load_a(m_pData);
474+
r0.load_a(m_pData + i);
475475
scatter(idx0, limit, r0, pRes);
476476

477477
idx1.load(pIdx + i + width);
@@ -512,7 +512,7 @@ Vec<INS_VEC> merge(std::tuple<VecView<INS_VEC>, VecView<INS_VEC> >& src)
512512
template< typename INS_VEC>
513513
VecView<INS_VEC> mergeToViews(VecView<INS_VEC>& lhs, VecView<INS_VEC>& rhs)
514514
{
515-
VecView<INS_VEC> ret(size_t(lhs.srxSize()));/// fillSize())); //srxSize()?
515+
VecView<INS_VEC> ret(size_t(lhs.srxSize()));
516516
lhs.write(ret);
517517
rhs.write(ret);
518518
return ret;

accumulateExample/accumulate_example.cpp

Lines changed: 14 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,19 @@ bool vectorsEqual(const std::vector<T>& C1, const std::vector<T>& C2, const std:
9090

9191

9292
template<typename T>
93-
bool vectorsEqualD(const std::vector<T>& C1, const std::vector<T>& C2, const std::vector<T>& C3, const std::vector<T>& input)
93+
bool vectorsEqualD(const std::vector<T>& C1, const std::vector<T>& C2, const std::vector<T>& C3, const std::vector<T>& input, double ERR = 1e-13)
9494
{
9595
bool testOK = true;
96-
const double ERR = 1e-13;
96+
//const double ERR = 1e-13;
9797
if (C1.size() != C2.size())
9898
{
99+
std::cout << "wrong size C1,C2"<< C1.size() << ", " << C2.size() <<std::endl;
99100
return false;
100101
}
101102

102103
if (C3.size() != C2.size())
103104
{
105+
std::cout << "wrong size C2,C3" << C2.size() << ", " << C3.size() << std::endl;
104106
return false;
105107
}
106108

@@ -114,6 +116,7 @@ bool vectorsEqualD(const std::vector<T>& C1, const std::vector<T>& C2, const std
114116
testOK = false;
115117
std::cout << "\n err diff@ " << i << " err1 =" << err1 << ", err2 = " << err2 << "\n";
116118
std::cout << "\n val @ " << i << " C1[i] =" << C1[i] << ", C3[i] = " << C3[i] << "input val=" << input[i] <<"\n";
119+
std::cout << std::endl;
117120
break;
118121
}
119122
}
@@ -331,15 +334,16 @@ int main()
331334
//binarySelectionBetweenConst();
332335

333336
//return 0;
334-
std::cout << "\n \n \n \n binarySelectionBetweenLinearFunction() \n" << std::endl;
335-
binarySelectionBetweenLinearFunction();
337+
//std::cout << "\n \n \n \n binarySelectionBetweenLinearFunction() \n" << std::endl;
338+
//binarySelectionBetweenLinearFunction();
339+
336340
// std::cout << "\n \n \n \n binarySelectionBetweenMiddleWeightFunction() \n" << std::endl;
337-
// binarySelectionBetweenMiddleWeightFunction();
341+
// binarySelectionBetweenMiddleWeightFunction();
338342

339343
// return 0;
340344

341-
// std::cout << "\n \n \n \n binarySelectionBetweenHeavyWeightFunction() \n" << std::endl;
342-
// binarySelectionBetweenHeavyWeightFunction();
345+
std::cout << "\n \n \n \n binarySelectionBetweenHeavyWeightFunction() \n" << std::endl;
346+
binarySelectionBetweenHeavyWeightFunction();
343347
// return 0;
344348
// doSum();
345349
return 0;
@@ -1251,124 +1255,6 @@ void binarySelectionBetweenLinearFunction()
12511255

12521256
}
12531257

1254-
/*
1255-
//selecting between middle weight functions
1256-
void testBinarySelection2()
1257-
{
1258-
using FloatType = typename InstructionTraits<VecXX::INS>::FloatType;
1259-
const int TEST_LOOP_SZ = 1000;
1260-
1261-
for (long SZ = 200; SZ < 20000; SZ += 200)
1262-
{
1263-
1264-
auto v1 = getRandomShuffledVector(SZ); // std stl vector double or float
1265-
VecXX testVec(v1);
1266-
1267-
auto C = v1; //copy of STL vector to write values to
1268-
auto C1 = v1;
1269-
auto C2 = v1;
1270-
1271-
double time = 0.;
1272-
auto runName = "";
1273-
const auto one = VecXX::scalar(.0);
1274-
const auto two = VecXX::scalar(2.0);
1275-
const auto half = VecXX::scalar(0.5);
1276-
1277-
auto MyOddLmbda = [&](auto x) { return (x - two * floor(x * half)) >= one; };
1278-
1279-
/// from acklams inverse cdf normal
1280-
static FloatType a[] = { 0.0, -3.969683028665376e+01, 2.209460984245205e+02,-2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00 };
1281-
static FloatType b[] = { 0.0, -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01 };
1282-
static FloatType c[] = { 0.0,-7.784894002430293e-03,-3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00 };
1283-
static FloatType d[] = { 0.0, 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 };
1284-
1285-
1286-
auto trueLambda = [&](auto q)
1287-
{
1288-
auto X = (((((c[1] * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) * q + c[6]) /
1289-
((((d[1] * q + d[2]) * q + d[3]) * q + d[4]) * q + VecXX::scalar(1.0));
1290-
return X;
1291-
};
1292-
1293-
1294-
auto falseLambda = [&](auto q)
1295-
{
1296-
auto X = (((((a[1] * q + a[2]) * q + a[3]) * q + a[4]) * q + a[5]) * q + a[6]) /
1297-
((((b[1] * q + b[2]) * q + b[3]) * q + b[4]) * q + VecXX::scalar(1.0));
1298-
return X;
1299-
};
1300-
1301-
auto writeResults = [&]() { std::cout << "size" << SZ << "," << runName << ", " << " # calcs, " << numOps(TEST_LOOP_SZ , SZ) << ", run time =, " << time << ", rate =, " << numOps(TEST_LOOP_SZ, SZ) / time << ", , "; };
1302-
1303-
1304-
{ runName = "for loop";
1305-
{
1306-
TimerGuard timer(time);
1307-
{
1308-
for (long l = 0; l < TEST_LOOP_SZ; l++)
1309-
{
1310-
for (int k = 0; k < SZ; k++)
1311-
{
1312-
auto x = v1[k];
1313-
C[k] = MyOddLmbda(x) ? trueLambda(x) : falseLambda(x);
1314-
}
1315-
}
1316-
}
1317-
}
1318-
writeResults();
1319-
}
1320-
1321-
//calculate both true and false lambdas for all values and select/blend values together
1322-
{ runName = "DR3 selectTransform ";
1323-
VecXX res;
1324-
{
1325-
TimerGuard timer(time);
1326-
{
1327-
for (long l = 0; l < TEST_LOOP_SZ; l++)
1328-
{
1329-
res = selectTransform(MyOddLmbda, testVec, trueLambda, falseLambda);
1330-
}
1331-
}
1332-
}
1333-
writeResults();
1334-
C1 = res;
1335-
}
1336-
1337-
1338-
1339-
//binary filter into concrete views of true and false elements according to MyOddLambda. Then apply true and false lambdas and finally merge values together
1340-
{ runName = "DR3 filterTransform";
1341-
VecXX res;
1342-
{
1343-
TimerGuard timer(time);
1344-
{
1345-
for (long l = 0; l < TEST_LOOP_SZ; l++)
1346-
{
1347-
res = filterTransform(MyOddLmbda, testVec, trueLambda, falseLambda);
1348-
}
1349-
}
1350-
}
1351-
writeResults();
1352-
C2 = res;
1353-
}
1354-
1355-
//compare resulyts of calcs
1356-
bool testOK = vectorsEqual(C1, C2, C);
1357-
1358-
if (testOK)
1359-
{
1360-
std::cout << "Matching results";
1361-
}
1362-
else
1363-
{
1364-
std::cout << " FAIL results dont match";
1365-
}
1366-
std::cout << "\n";
1367-
}
1368-
1369-
}
1370-
*/
1371-
13721258

13731259
void binarySelectionBetweenMiddleWeightFunction()
13741260
{
@@ -1882,14 +1768,14 @@ void binarySelectionBetweenHeavyWeightFunction()
18821768
//warm up
18831769
for (long l = 0; l < WARM_UP_LOOP; l++)
18841770
{
1885-
auto res = filterTransform(MyOddLmbda, testVec, trueLambdaS, falseLambdaS);
1771+
res = filterTransform(MyOddLmbda, testVec, trueLambdaS, falseLambdaS);
18861772
}
18871773

18881774
TimerGuard timer(time);
18891775
{
18901776
for (long l = 0; l < TEST_LOOP_SZ; l++)
18911777
{
1892-
auto res = filterTransform(MyOddLmbda, testVec, trueLambdaS, falseLambdaS);
1778+
res = filterTransform(MyOddLmbda, testVec, trueLambdaS, falseLambdaS);
18931779
}
18941780
}
18951781
}
@@ -1919,7 +1805,7 @@ void binarySelectionBetweenHeavyWeightFunction()
19191805
auto valStl = run_res_for_loop.m_calc_results[elem.first];
19201806
auto valDr3_filterTransform = dr3_raw_resultsFilter.m_calc_results[elem.first];
19211807

1922-
bool VecsOK = vectorsEqualD(valDr3_select, valStl, valDr3_filterTransform, valDr3_filterTransform);
1808+
bool VecsOK = vectorsEqualD(valDr3_select, valStl,valDr3_filterTransform, valDr3_filterTransform,3e-11);
19231809
auto strMatch = VecsOK ? "calcs match" : "cal difference";
19241810

19251811
std::cout << "for loop binarySelectionBetweenSimpleAndHeavyFunctions , size " << elem.first << " , " << elem.second.first << ", +- ," << elem.second.second << "\t \t DR3 filter_transform heavy weight , size " << elem.first << " , " << stats_DR3_filter[elem.first].first << ", +- ," << stats_DR3_filter[elem.first].second << "\t \t DR3 binarySelection heavy Weight , size " << elem.first << " , " << stats_DR3[elem.first].first << " , +- , " << stats_DR3[elem.first].second << ", numerical check: " << strMatch << "\n";

0 commit comments

Comments
 (0)