Skip to content

Commit 68e047e

Browse files
committed
Returned library/cpp
1 parent c4bc83e commit 68e047e

File tree

3,095 files changed

+471600
-27
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,095 files changed

+471600
-27
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#include "accurate_accumulate.h"
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
#pragma once
2+
3+
#include <util/ysaveload.h>
4+
#include <util/generic/vector.h>
5+
#include <util/system/yassert.h>
6+
7+
//! See more details here http://en.wikipedia.org/wiki/Kahan_summation_algorithm
8+
template <typename TAccumulateType>
9+
class TKahanAccumulator {
10+
public:
11+
using TValueType = TAccumulateType;
12+
13+
template <typename TFloatType>
14+
explicit TKahanAccumulator(const TFloatType x)
15+
: Sum_(x)
16+
, Compensation_()
17+
{
18+
}
19+
20+
TKahanAccumulator()
21+
: Sum_()
22+
, Compensation_()
23+
{
24+
}
25+
26+
template <typename TFloatType>
27+
TKahanAccumulator& operator=(const TFloatType& rhs) {
28+
Sum_ = TValueType(rhs);
29+
Compensation_ = TValueType();
30+
return *this;
31+
}
32+
33+
TValueType Get() const {
34+
return Sum_ + Compensation_;
35+
}
36+
37+
template <typename TFloatType>
38+
inline operator TFloatType() const {
39+
return Get();
40+
}
41+
42+
template <typename TFloatType>
43+
inline bool operator<(const TKahanAccumulator<TFloatType>& other) const {
44+
return Get() < other.Get();
45+
}
46+
47+
template <typename TFloatType>
48+
inline bool operator<=(const TKahanAccumulator<TFloatType>& other) const {
49+
return !(other < *this);
50+
}
51+
52+
template <typename TFloatType>
53+
inline bool operator>(const TKahanAccumulator<TFloatType>& other) const {
54+
return other < *this;
55+
}
56+
57+
template <typename TFloatType>
58+
inline bool operator>=(const TKahanAccumulator<TFloatType>& other) const {
59+
return !(*this < other);
60+
}
61+
62+
template <typename TFloatType>
63+
inline TKahanAccumulator& operator+=(const TFloatType x) {
64+
const TValueType y = TValueType(x) - Compensation_;
65+
const TValueType t = Sum_ + y;
66+
Compensation_ = (t - Sum_) - y;
67+
Sum_ = t;
68+
return *this;
69+
}
70+
71+
template <typename TFloatType>
72+
inline TKahanAccumulator& operator-=(const TFloatType x) {
73+
return *this += -TValueType(x);
74+
}
75+
76+
template <typename TFloatType>
77+
inline TKahanAccumulator& operator*=(const TFloatType x) {
78+
return *this = TValueType(*this) * TValueType(x);
79+
}
80+
81+
template <typename TFloatType>
82+
inline TKahanAccumulator& operator/=(const TFloatType x) {
83+
return *this = TValueType(*this) / TValueType(x);
84+
}
85+
86+
Y_SAVELOAD_DEFINE(Sum_, Compensation_);
87+
88+
private:
89+
TValueType Sum_;
90+
TValueType Compensation_;
91+
};
92+
93+
template <typename TAccumulateType, typename TFloatType>
94+
inline const TKahanAccumulator<TAccumulateType>
95+
operator+(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
96+
return lhs += rhs;
97+
}
98+
99+
template <typename TAccumulateType, typename TFloatType>
100+
inline const TKahanAccumulator<TAccumulateType>
101+
operator-(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
102+
return lhs -= rhs;
103+
}
104+
105+
template <typename TAccumulateType, typename TFloatType>
106+
inline const TKahanAccumulator<TAccumulateType>
107+
operator*(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
108+
return lhs *= rhs;
109+
}
110+
111+
template <typename TAccumulateType, typename TFloatType>
112+
inline const TKahanAccumulator<TAccumulateType>
113+
operator/(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
114+
return lhs /= rhs;
115+
}
116+
117+
template <typename TAccumulatorType, typename It>
118+
static inline TAccumulatorType TypedFastAccumulate(It begin, It end) {
119+
TAccumulatorType accumulator = TAccumulatorType();
120+
121+
for (; begin + 15 < end; begin += 16) {
122+
accumulator += *(begin + 0) +
123+
*(begin + 1) +
124+
*(begin + 2) +
125+
*(begin + 3) +
126+
*(begin + 4) +
127+
*(begin + 5) +
128+
*(begin + 6) +
129+
*(begin + 7) +
130+
*(begin + 8) +
131+
*(begin + 9) +
132+
*(begin + 10) +
133+
*(begin + 11) +
134+
*(begin + 12) +
135+
*(begin + 13) +
136+
*(begin + 14) +
137+
*(begin + 15);
138+
}
139+
for (; begin != end; ++begin) {
140+
accumulator += *begin;
141+
}
142+
143+
return accumulator;
144+
}
145+
146+
template <class TOperation, typename TAccumulatorType, typename It1, typename It2>
147+
static inline TAccumulatorType TypedFastInnerOperation(It1 begin1, It1 end1, It2 begin2) {
148+
TAccumulatorType accumulator = TAccumulatorType();
149+
150+
const TOperation op;
151+
for (; begin1 + 15 < end1; begin1 += 16, begin2 += 16) {
152+
accumulator += op(*(begin1 + 0), *(begin2 + 0)) +
153+
op(*(begin1 + 1), *(begin2 + 1)) +
154+
op(*(begin1 + 2), *(begin2 + 2)) +
155+
op(*(begin1 + 3), *(begin2 + 3)) +
156+
op(*(begin1 + 4), *(begin2 + 4)) +
157+
op(*(begin1 + 5), *(begin2 + 5)) +
158+
op(*(begin1 + 6), *(begin2 + 6)) +
159+
op(*(begin1 + 7), *(begin2 + 7)) +
160+
op(*(begin1 + 8), *(begin2 + 8)) +
161+
op(*(begin1 + 9), *(begin2 + 9)) +
162+
op(*(begin1 + 10), *(begin2 + 10)) +
163+
op(*(begin1 + 11), *(begin2 + 11)) +
164+
op(*(begin1 + 12), *(begin2 + 12)) +
165+
op(*(begin1 + 13), *(begin2 + 13)) +
166+
op(*(begin1 + 14), *(begin2 + 14)) +
167+
op(*(begin1 + 15), *(begin2 + 15));
168+
}
169+
for (; begin1 != end1; ++begin1, ++begin2) {
170+
accumulator += op(*begin1, *begin2);
171+
}
172+
173+
return accumulator;
174+
}
175+
176+
template <typename TAccumulatorType, typename It1, typename It2>
177+
static inline TAccumulatorType TypedFastInnerProduct(It1 begin1, It1 end1, It2 begin2) {
178+
return TypedFastInnerOperation<std::multiplies<>, TAccumulatorType>(begin1, end1, begin2);
179+
}
180+
181+
template <typename It>
182+
static inline double FastAccumulate(It begin, It end) {
183+
return TypedFastAccumulate<double>(begin, end);
184+
}
185+
186+
template <typename T>
187+
static inline double FastAccumulate(const TVector<T>& sequence) {
188+
return FastAccumulate(sequence.begin(), sequence.end());
189+
}
190+
191+
template <typename It>
192+
static inline double FastKahanAccumulate(It begin, It end) {
193+
return TypedFastAccumulate<TKahanAccumulator<double>>(begin, end);
194+
}
195+
196+
template <typename T>
197+
static inline double FastKahanAccumulate(const TVector<T>& sequence) {
198+
return FastKahanAccumulate(sequence.begin(), sequence.end());
199+
}
200+
201+
template <typename It1, typename It2>
202+
static inline double FastInnerProduct(It1 begin1, It1 end1, It2 begin2) {
203+
return TypedFastInnerProduct<double>(begin1, end1, begin2);
204+
}
205+
206+
template <typename T>
207+
static inline double FastInnerProduct(const TVector<T>& lhs, const TVector<T>& rhs) {
208+
Y_ASSERT(lhs.size() == rhs.size());
209+
return FastInnerProduct(lhs.begin(), lhs.end(), rhs.begin());
210+
}
211+
212+
template <typename It1, typename It2>
213+
static inline double FastKahanInnerProduct(It1 begin1, It1 end1, It2 begin2) {
214+
return TypedFastInnerProduct<TKahanAccumulator<double>>(begin1, end1, begin2);
215+
}
216+
217+
template <typename T>
218+
static inline double FastKahanInnerProduct(const TVector<T>& lhs, const TVector<T>& rhs) {
219+
Y_ASSERT(lhs.size() == rhs.size());
220+
return FastKahanInnerProduct(lhs.begin(), lhs.end(), rhs.begin());
221+
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#include <library/cpp/accurate_accumulate/accurate_accumulate.h>
2+
#include <library/cpp/testing/benchmark/bench.h>
3+
4+
#include <util/generic/algorithm.h>
5+
#include <util/generic/singleton.h>
6+
#include <util/generic/vector.h>
7+
#include <util/generic/xrange.h>
8+
#include <util/random/fast.h>
9+
10+
namespace {
11+
template <typename T, size_t N>
12+
struct TNormalizedExamplesHolder {
13+
TVector<T> Examples;
14+
TNormalizedExamplesHolder()
15+
: Examples(N)
16+
{
17+
TFastRng<ui64> prng{sizeof(T) * N * 42u};
18+
for (auto& x : Examples) {
19+
x = prng.GenRandReal4();
20+
}
21+
}
22+
};
23+
24+
template <typename T, size_t N>
25+
struct TExamplesHolder {
26+
TVector<T> Examples;
27+
TExamplesHolder()
28+
: Examples(N)
29+
{
30+
TFastRng<ui64> prng{sizeof(T) * N * 42u + 100500u};
31+
for (auto& x : Examples) {
32+
// operations with non-normalized floating point numbers are rumored to work slower
33+
x = prng.GenRandReal4() + prng.Uniform(1024u);
34+
}
35+
}
36+
};
37+
}
38+
39+
#define DEFINE_BENCHMARK(type, count) \
40+
Y_CPU_BENCHMARK(SimpleNorm_##type##_##count, iface) { \
41+
const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \
42+
for (const auto i : xrange(iface.Iterations())) { \
43+
Y_UNUSED(i); \
44+
Y_DO_NOT_OPTIMIZE_AWAY( \
45+
(type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \
46+
} \
47+
} \
48+
\
49+
Y_CPU_BENCHMARK(KahanNorm_##type##_##count, iface) { \
50+
const auto& examples = Default<TNormalizedExamplesHolder<type, count>>().Examples; \
51+
for (const auto i : xrange(iface.Iterations())) { \
52+
Y_UNUSED(i); \
53+
Y_DO_NOT_OPTIMIZE_AWAY( \
54+
(type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \
55+
} \
56+
} \
57+
\
58+
Y_CPU_BENCHMARK(Simple_##type##_##count, iface) { \
59+
const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
60+
for (const auto i : xrange(iface.Iterations())) { \
61+
Y_UNUSED(i); \
62+
Y_DO_NOT_OPTIMIZE_AWAY( \
63+
(type)Accumulate(std::cbegin(examples), std::cend(examples), type{})); \
64+
} \
65+
} \
66+
\
67+
Y_CPU_BENCHMARK(Kahan_##type##_##count, iface) { \
68+
const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
69+
for (const auto i : xrange(iface.Iterations())) { \
70+
Y_UNUSED(i); \
71+
Y_DO_NOT_OPTIMIZE_AWAY( \
72+
(type)Accumulate(std::cbegin(examples), std::cend(examples), TKahanAccumulator<type>{})); \
73+
} \
74+
}
75+
76+
DEFINE_BENCHMARK(float, 2)
77+
DEFINE_BENCHMARK(float, 4)
78+
DEFINE_BENCHMARK(float, 8)
79+
DEFINE_BENCHMARK(float, 16)
80+
DEFINE_BENCHMARK(float, 32)
81+
DEFINE_BENCHMARK(float, 64)
82+
DEFINE_BENCHMARK(float, 128)
83+
DEFINE_BENCHMARK(float, 256)
84+
DEFINE_BENCHMARK(float, 512)
85+
DEFINE_BENCHMARK(float, 1024)
86+
DEFINE_BENCHMARK(double, 2)
87+
DEFINE_BENCHMARK(double, 4)
88+
DEFINE_BENCHMARK(double, 8)
89+
DEFINE_BENCHMARK(double, 16)
90+
DEFINE_BENCHMARK(double, 32)
91+
DEFINE_BENCHMARK(double, 64)
92+
DEFINE_BENCHMARK(double, 128)
93+
DEFINE_BENCHMARK(double, 256)
94+
DEFINE_BENCHMARK(double, 512)
95+
DEFINE_BENCHMARK(double, 1024)
96+
97+
#undef DEFINE_BENCHMARK
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import yatest.common as yc
2+
3+
4+
def test_export_metrics(metrics):
5+
metrics.set_benchmark(yc.execute_benchmark(
6+
'library/cpp/accurate_accumulate/benchmark/benchmark',
7+
threads=8))
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
PY2TEST()
2+
3+
SIZE(LARGE)
4+
5+
TAG(
6+
ya:force_sandbox
7+
sb:intel_e5_2660v1
8+
ya:fat
9+
)
10+
11+
TEST_SRCS(main.py)
12+
13+
DEPENDS(library/cpp/accurate_accumulate/benchmark)
14+
15+
END()
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Y_BENCHMARK()
2+
3+
BENCHMARK_OPTS(--budget=10)
4+
5+
SRCS(
6+
main.cpp
7+
)
8+
9+
PEERDIR(
10+
library/cpp/accurate_accumulate
11+
)
12+
13+
END()
14+
15+
RECURSE(
16+
metrics
17+
)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
LIBRARY()
2+
3+
SRCS(
4+
accurate_accumulate.h
5+
accurate_accumulate.cpp
6+
)
7+
8+
END()
9+
10+
RECURSE(
11+
benchmark
12+
)

0 commit comments

Comments
 (0)