Skip to content

Commit ad29fae

Browse files
committed
Add a benchmark for parallel loops
1 parent 9faa101 commit ad29fae

File tree

1 file changed

+112
-0
lines changed

1 file changed

+112
-0
lines changed

benchmarks/parallel_for.cc

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#include <iostream>
2+
#include <vpp/boxNd.hh>
3+
#include <vpp/vpp.hh>
4+
5+
#include "get_time.hh"
6+
7+
using namespace vpp;
8+
9+
template <typename I, typename F>
10+
void fast_pixel_wise(I img, F f)
11+
{
12+
int nr = img.nrows();
13+
#pragma omp parallel for
14+
for (int r = 0; r < nr; r++)
15+
{
16+
int* cur = &img(vint2(r, 0));
17+
// vint2 cur_(r,0);
18+
int* end = cur + img.nrows();
19+
int c = 0;
20+
while (cur != end)
21+
{
22+
f(cur, r, c);
23+
//f(cur, cur_[0], cur_[1]);
24+
++cur;
25+
// cur_[1]++;
26+
c++;
27+
}
28+
}
29+
30+
}
31+
32+
int main()
33+
{
34+
35+
image2d<int> img(2000,2000);
36+
37+
int K = 400;
38+
double time;
39+
40+
41+
// check
42+
vpp::pixel_wise(img) << [] (auto&& p)
43+
{
44+
p = 42;
45+
};
46+
47+
for (auto p : img.domain())
48+
{
49+
if (img(p) != 42)
50+
std::cout << "error at " << p.transpose() << " -> " << img(p) << std::endl;
51+
}
52+
53+
// Cache warm up.
54+
for (int k = 0; k < K; k++)
55+
fast_pixel_wise(img, [] (int* p, int r, int c)
56+
{
57+
*p = r + c;
58+
});
59+
60+
time = get_time_in_seconds();
61+
for (int k = 0; k < K; k++)
62+
{
63+
int nr = img.nrows();
64+
#pragma omp parallel for
65+
for (int r = 0; r < nr; r++)
66+
{
67+
int* cur = &img(vint2(r, 0));
68+
int* end = cur + img.nrows();
69+
int c = 0;
70+
while (cur != end)
71+
{
72+
*cur = r + c;
73+
++cur;
74+
c++;
75+
}
76+
77+
}
78+
}
79+
double ref_time = get_time_in_seconds() - time;
80+
81+
82+
time = get_time_in_seconds();
83+
for (int k = 0; k < K; k++)
84+
fast_pixel_wise(img, [] (int* p, int r, int c)
85+
{
86+
*p = r + c;
87+
});
88+
double ref2_time = get_time_in_seconds() - time;
89+
90+
time = get_time_in_seconds();
91+
for (int k = 0; k < K; k++)
92+
for (auto& p : img)
93+
p = p.coord()[0] + p.coord()[1];
94+
double ii_time = get_time_in_seconds() - time;
95+
96+
time = get_time_in_seconds();
97+
for (int k = 0; k < K; k++)
98+
vpp::pixel_wise(img, img.domain()) << [] (auto& p, auto& p2)
99+
{
100+
p = p2[0] + p2[1];
101+
};
102+
double pw_time = get_time_in_seconds() - time;
103+
104+
std::cout << "image iterator: " << std::endl;
105+
std::cout << "ref_time (ms): " << 1000. * ref_time / K << std::endl;
106+
std::cout << "ref2_time (ms): " << 1000. * ref2_time / K << std::endl;
107+
std::cout << "ii_time (ms): " << 1000. * ii_time / K << std::endl;
108+
std::cout << "image iterator overhead: " << 100. * ii_time / ref_time - 100. << "%" << std::endl;
109+
std::cout << "pw_time (ms): " << 1000. * pw_time / K << std::endl;
110+
//std::cout << "domain iteration overhead: " << 100. * id_time / ref_time - 100. << "%" << std::endl;
111+
112+
}

0 commit comments

Comments
 (0)