Skip to content

Commit 08ec0aa

Browse files
committed
intrinsics module with fast sums
1 parent 35e7146 commit 08ec0aa

File tree

1 file changed

+159
-0
lines changed

1 file changed

+159
-0
lines changed

src/stdlib_intrinsics.fypp

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
#:include "common.fypp"
2+
#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
3+
#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
4+
#:set RANKS = range(2, 2)
5+
module stdlib_intrinsics
6+
use stdlib_kinds
7+
implicit none
8+
private
9+
10+
interface fsum
11+
#:for rk, rt in REAL_KINDS_TYPES
12+
module procedure :: fsum_1d_${rk}$
13+
module procedure :: fsum_1d_${rk}$_mask
14+
#:endfor
15+
end interface
16+
public :: fsum
17+
18+
interface fsum_kahan
19+
#:for rk, rt in REAL_KINDS_TYPES
20+
module procedure :: fsum_kahan_1d_${rk}$
21+
module procedure :: fsum_kahan_1d_${rk}$_mask
22+
#:endfor
23+
end interface
24+
public :: fsum_kahan
25+
26+
interface vkahan
27+
#:for rk, rt in REAL_KINDS_TYPES
28+
module procedure :: vkahan_${rk}$
29+
module procedure :: vkahan_m_${rk}$
30+
#:endfor
31+
end interface
32+
33+
#:for k1, t1, s1 in (R_KINDS_TYPES)
34+
${t1}$, parameter :: zero_${s1}$ = 0._${k1}$
35+
#:endfor
36+
#:for k1, t1, s1 in (C_KINDS_TYPES)
37+
${t1}$, parameter :: zero_${s1}$ = (0._${k1}$,0._${k1}$)
38+
#:endfor
39+
integer, parameter :: chunk = 64
40+
41+
contains
42+
43+
#:for k1, t1, s1 in R_KINDS_TYPES
44+
pure function fsum_1d_${s1}$(a) result(s)
45+
${t1}$, intent(in) :: a(:)
46+
${t1}$ :: s
47+
${t1}$ :: abatch(chunk)
48+
integer :: i, dr, rr
49+
! -----------------------------
50+
dr = size(a)/chunk
51+
rr = size(a) - dr*chunk
52+
53+
abatch = zero_${s1}$
54+
do i = 1, dr
55+
abatch(1:chunk) = abatch(1:chunk) + a(chunk*i-chunk+1:chunk*i)
56+
end do
57+
abatch(1:rr) = abatch(1:rr) + a(size(a)-rr+1:size(a))
58+
59+
s = zero_${s1}$
60+
do i = 1, chunk/2
61+
s = s + abatch(i)+abatch(chunk/2+i)
62+
end do
63+
end function
64+
65+
pure function fsum_1d_${s1}$_mask(a,mask) result(s)
66+
${t1}$, intent(in) :: a(:)
67+
logical, intent(in) :: mask(:)
68+
${t1}$ :: s
69+
${t1}$ :: abatch(chunk)
70+
integer :: i, dr, rr
71+
! -----------------------------
72+
dr = size(a)/chunk
73+
rr = size(a) - dr*chunk
74+
75+
abatch = zero_${s1}$
76+
do i = 1, dr
77+
abatch(1:chunk) = abatch(1:chunk) + merge( zero_${s1}$ , a(chunk*i-chunk+1:chunk*i) , mask(chunk*i-chunk+1:chunk*i) )
78+
end do
79+
abatch(1:rr) = abatch(1:rr) + merge( zero_${s1}$ , a(size(a)-rr+1:size(a)) , mask(size(a)-rr+1:size(a)) )
80+
81+
s = zero_${s1}$
82+
do i = 1, chunk/2
83+
s = s + abatch(i)+abatch(chunk/2+i)
84+
end do
85+
end function
86+
#:endfor
87+
88+
#:for k1, t1, s1 in R_KINDS_TYPES
89+
pure function fsum_kahan_1d_${s1}$(a) result(s)
90+
${t1}$, intent(in) :: a(:)
91+
${t1}$ :: s
92+
${t1}$ :: sbatch(chunk)
93+
${t1}$ :: cbatch(chunk)
94+
integer :: i, dr, rr
95+
! -----------------------------
96+
dr = size(a)/(chunk)
97+
rr = size(a) - dr*chunk
98+
sbatch = zero_${s1}$
99+
cbatch = zero_${s1}$
100+
do i = 1, dr
101+
call vkahan( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) )
102+
end do
103+
call vkahan( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) )
104+
105+
s = zero_${s1}$
106+
do i = 1,chunk
107+
call vkahan( sbatch(i) , s , cbatch(i) )
108+
end do
109+
end function
110+
111+
pure function fsum_kahan_1d_${s1}$_mask(a,mask) result(s)
112+
${t1}$, intent(in) :: a(:)
113+
logical, intent(in) :: mask(:)
114+
${t1}$ :: s
115+
${t1}$ :: sbatch(chunk)
116+
${t1}$ :: cbatch(chunk)
117+
integer :: i, dr, rr
118+
! -----------------------------
119+
dr = size(a)/(chunk)
120+
rr = size(a) - dr*chunk
121+
sbatch = zero_${s1}$
122+
cbatch = zero_${s1}$
123+
do i = 1, dr
124+
call vkahan( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) , mask(chunk*i-chunk+1:chunk*i) )
125+
end do
126+
call vkahan( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) , mask(size(a)-rr+1:size(a)) )
127+
128+
s = zero_${s1}$
129+
do i = 1,chunk
130+
call vkahan( sbatch(i) , s , cbatch(i) )
131+
end do
132+
end function
133+
#:endfor
134+
135+
#:for k1, t1, s1 in R_KINDS_TYPES
136+
elemental subroutine vkahan_${s1}$(a,s,c)
137+
${t1}$, intent(in) :: a
138+
${t1}$, intent(inout) :: s
139+
${t1}$, intent(inout) :: c
140+
${t1}$ :: t, y
141+
y = a - c
142+
t = s + y
143+
c = (t - s) - y
144+
s = t
145+
end subroutine
146+
elemental subroutine vkahan_m_${s1}$(a,s,c,m)
147+
${t1}$, intent(in) :: a
148+
${t1}$, intent(inout) :: s
149+
${t1}$, intent(inout) :: c
150+
logical, intent(in) :: m
151+
${t1}$ :: t, y
152+
y = a - c
153+
t = s + y
154+
c = (t - s) - y
155+
s = merge( s , t , m )
156+
end subroutine
157+
#:endfor
158+
159+
end module stdlib_intrinsics

0 commit comments

Comments
 (0)