File tree 1 file changed +9
-11
lines changed
1 file changed +9
-11
lines changed Original file line number Diff line number Diff line change @@ -20,31 +20,29 @@ namespace cp_algo {
20
20
#endif
21
21
}
22
22
23
- i64x4 lround (dx4 a) {
24
- #ifdef __AVX2__
25
- return __builtin_convertvector (_mm256_round_pd (a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC), i64x4);
26
- #else
27
- return __builtin_convertvector (a < 0 ? a - 0.5 : a + 0.5 , i64x4);
28
- #endif
23
+ i64x4 lround (dx4 x) {
24
+ // https://stackoverflow.com/a/77376595
25
+ static constexpr dx4 magic = dx4 () + double (3ULL << 51 );
26
+ return i64x4 (x + magic) - i64x4 (magic);
29
27
}
30
28
31
29
dx4 round (dx4 a) {
32
30
#ifdef __AVX2__
33
31
return _mm256_round_pd (a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
34
32
#else
35
- return __builtin_convertvector (lround (a), Simd );
33
+ return __builtin_convertvector (lround (a), dx4 );
36
34
#endif
37
35
}
38
36
39
37
u64x4 montgomery_reduce (u64x4 x, u64x4 mod, u64x4 imod) {
40
- #ifdef __AVX2__
38
+ #ifndef __AVX2__
41
39
auto x_ninv = _mm256_mul_epu32 (__m256i (x), __m256i (imod));
42
40
auto x_res = _mm256_add_epi64 (__m256i (x), _mm256_mul_epu32 (x_ninv, __m256i (mod)));
43
41
return u64x4 (_mm256_bsrli_epi128 (x_res, 4 ));
44
42
#else
45
- auto x_ninv = x * imod;
46
- auto x_res = x + ((x_ninv << 32 ) >> 32 ) * mod ;
47
- return u64x4 (x_res >> 32 ) ;
43
+
44
+ auto x_ninv = u64x4 ( u32x8 (x) * u32x8 (imod)) ;
45
+ return (x + x_ninv * mod) >> 32 ;
48
46
#endif
49
47
}
50
48
You can’t perform that action at this time.
0 commit comments