Skip to content

Commit 9c5b2e0

Browse files
committed
Refactor stdlib module functions to unify handling of integer, real, and complex types
1 parent a4370c2 commit 9c5b2e0

File tree

5 files changed

+261
-207
lines changed

5 files changed

+261
-207
lines changed

src/stdlib_constants.fypp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
#:include "common.fypp"
22
#:set KINDS = REAL_KINDS
3+
#:set I_KINDS_TYPES = list(zip(INT_KINDS, INT_TYPES, INT_KINDS))
34
#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
45
#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
56

67
module stdlib_constants
78
!! Constants
89
!! ([Specification](../page/specs/stdlib_constants.html))
9-
use stdlib_kinds, only: #{for k in KINDS[:-1]}#${k}$, #{endfor}#${KINDS[-1]}$
10+
use stdlib_kinds
1011
use stdlib_codata, only: SPEED_OF_LIGHT_IN_VACUUM, &
1112
VACUUM_ELECTRIC_PERMITTIVITY, &
1213
VACUUM_MAG_PERMEABILITY, &
@@ -63,6 +64,10 @@ module stdlib_constants
6364
real(dp), parameter, public :: u = ATOMIC_MASS_CONSTANT%value !! Atomic mass constant
6465

6566
! Additional constants if needed
67+
#:for k, t, s in I_KINDS_TYPES
68+
${t}$, parameter, public :: zero_${s}$ = 0_${k}$
69+
${t}$, parameter, public :: one_${s}$ = 1_${k}$
70+
#:endfor
6671
#:for k, t, s in R_KINDS_TYPES
6772
${t}$, parameter, public :: zero_${s}$ = 0._${k}$
6873
${t}$, parameter, public :: one_${s}$ = 1._${k}$

src/stdlib_intrinsics.fypp

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#:include "common.fypp"
2+
#:set I_KINDS_TYPES = list(zip(INT_KINDS, INT_TYPES, INT_KINDS))
23
#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
34
#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
4-
#:set RC_KINDS_TYPES = R_KINDS_TYPES + C_KINDS_TYPES
55
#:set RANKS = range(2, MAXRANK + 1)
66

77
module stdlib_intrinsics
@@ -25,27 +25,27 @@ module stdlib_intrinsics
2525
!! The `N-D` interfaces calls upon the `(N-1)-D` implementation.
2626
!! Supported data types include `real` and `complex`.
2727
!!
28-
#:for rk, rt, rs in RC_KINDS_TYPES
29-
pure module function stdlib_sum_1d_${rs}$(a) result(s)
30-
${rt}$, intent(in) :: a(:)
31-
${rt}$ :: s
28+
#:for k, t, s in I_KINDS_TYPES + R_KINDS_TYPES + C_KINDS_TYPES
29+
pure module function stdlib_sum_1d_${s}$(a) result(s)
30+
${t}$, intent(in) :: a(:)
31+
${t}$ :: s
3232
end function
33-
pure module function stdlib_sum_1d_${rs}$_mask(a,mask) result(s)
34-
${rt}$, intent(in) :: a(:)
33+
pure module function stdlib_sum_1d_${s}$_mask(a,mask) result(s)
34+
${t}$, intent(in) :: a(:)
3535
logical, intent(in) :: mask(:)
36-
${rt}$ :: s
36+
${t}$ :: s
3737
end function
3838
#:for rank in RANKS
39-
pure module function stdlib_sum_${rank}$d_${rs}$( x, mask ) result( s )
40-
${rt}$, intent(in) :: x${ranksuffix(rank)}$
39+
pure module function stdlib_sum_${rank}$d_${s}$( x, mask ) result( s )
40+
${t}$, intent(in) :: x${ranksuffix(rank)}$
4141
logical, intent(in), optional :: mask${ranksuffix(rank)}$
42-
${rt}$ :: s
42+
${t}$ :: s
4343
end function
44-
pure module function stdlib_sum_${rank}$d_dim_${rs}$( x , dim, mask ) result( s )
45-
${rt}$, intent(in) :: x${ranksuffix(rank)}$
44+
pure module function stdlib_sum_${rank}$d_dim_${s}$( x , dim, mask ) result( s )
45+
${t}$, intent(in) :: x${ranksuffix(rank)}$
4646
integer, intent(in):: dim
4747
logical, intent(in), optional :: mask${ranksuffix(rank)}$
48-
${rt}$ :: s${reduced_shape('x', rank, 'dim')}$
48+
${t}$ :: s${reduced_shape('x', rank, 'dim')}$
4949
end function
5050
#:endfor
5151
#:endfor
@@ -66,27 +66,27 @@ module stdlib_intrinsics
6666
!! The `N-D` interfaces calls upon the `(N-1)-D` implementation.
6767
!! Supported data types include `real` and `complex`.
6868
!!
69-
#:for rk, rt, rs in RC_KINDS_TYPES
70-
pure module function stdlib_sum_kahan_1d_${rs}$(a) result(s)
71-
${rt}$, intent(in) :: a(:)
72-
${rt}$ :: s
69+
#:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
70+
pure module function stdlib_sum_kahan_1d_${s}$(a) result(s)
71+
${t}$, intent(in) :: a(:)
72+
${t}$ :: s
7373
end function
74-
pure module function stdlib_sum_kahan_1d_${rs}$_mask(a,mask) result(s)
75-
${rt}$, intent(in) :: a(:)
74+
pure module function stdlib_sum_kahan_1d_${s}$_mask(a,mask) result(s)
75+
${t}$, intent(in) :: a(:)
7676
logical, intent(in) :: mask(:)
77-
${rt}$ :: s
77+
${t}$ :: s
7878
end function
7979
#:for rank in RANKS
80-
pure module function stdlib_sum_kahan_${rank}$d_${rs}$( x, mask ) result( s )
81-
${rt}$, intent(in) :: x${ranksuffix(rank)}$
80+
pure module function stdlib_sum_kahan_${rank}$d_${s}$( x, mask ) result( s )
81+
${t}$, intent(in) :: x${ranksuffix(rank)}$
8282
logical, intent(in), optional :: mask${ranksuffix(rank)}$
83-
${rt}$ :: s
83+
${t}$ :: s
8484
end function
85-
pure module function stdlib_sum_kahan_${rank}$d_dim_${rs}$( x , dim, mask ) result( s )
86-
${rt}$, intent(in) :: x${ranksuffix(rank)}$
85+
pure module function stdlib_sum_kahan_${rank}$d_dim_${s}$( x , dim, mask ) result( s )
86+
${t}$, intent(in) :: x${ranksuffix(rank)}$
8787
integer, intent(in):: dim
8888
logical, intent(in), optional :: mask${ranksuffix(rank)}$
89-
${rt}$ :: s${reduced_shape('x', rank, 'dim')}$
89+
${t}$ :: s${reduced_shape('x', rank, 'dim')}$
9090
end function
9191
#:endfor
9292
#:endfor
@@ -106,11 +106,11 @@ module stdlib_intrinsics
106106
!! The 1-D base implementation follows a chunked approach for optimizing performance and increasing accuracy.
107107
!! Supported data types include `real` and `complex`.
108108
!!
109-
#:for rk, rt, rs in RC_KINDS_TYPES
110-
pure module function stdlib_dot_product_${rs}$(a,b) result(p)
111-
${rt}$, intent(in) :: a(:)
112-
${rt}$, intent(in) :: b(:)
113-
${rt}$ :: p
109+
#:for k, t, s in I_KINDS_TYPES + R_KINDS_TYPES + C_KINDS_TYPES
110+
pure module function stdlib_dot_product_${s}$(a,b) result(p)
111+
${t}$, intent(in) :: a(:)
112+
${t}$, intent(in) :: b(:)
113+
${t}$ :: p
114114
end function
115115
#:endfor
116116
end interface
@@ -129,43 +129,43 @@ module stdlib_intrinsics
129129
!! The implementation follows a chunked approach combined with a kahan kernel for optimizing performance and increasing accuracy.
130130
!! Supported data types include `real` and `complex`.
131131
!!
132-
#:for rk, rt, rs in RC_KINDS_TYPES
133-
pure module function stdlib_dot_product_kahan_${rs}$(a,b) result(p)
134-
${rt}$, intent(in) :: a(:)
135-
${rt}$, intent(in) :: b(:)
136-
${rt}$ :: p
132+
#:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
133+
pure module function stdlib_dot_product_kahan_${s}$(a,b) result(p)
134+
${t}$, intent(in) :: a(:)
135+
${t}$, intent(in) :: b(:)
136+
${t}$ :: p
137137
end function
138138
#:endfor
139139
end interface
140140
public :: stdlib_dot_product_kahan
141141

142142
interface kahan_kernel
143-
#:for rk, rt, rs in RC_KINDS_TYPES
144-
module procedure :: kahan_kernel_${rs}$
145-
module procedure :: kahan_kernel_m_${rs}$
143+
#:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
144+
module procedure :: kahan_kernel_${s}$
145+
module procedure :: kahan_kernel_m_${s}$
146146
#:endfor
147147
end interface
148148
public :: kahan_kernel
149149

150150
contains
151151

152-
#:for rk, rt, rs in RC_KINDS_TYPES
153-
elemental subroutine kahan_kernel_${rs}$(a,s,c)
154-
${rt}$, intent(in) :: a
155-
${rt}$, intent(inout) :: s
156-
${rt}$, intent(inout) :: c
157-
${rt}$ :: t, y
152+
#:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
153+
elemental subroutine kahan_kernel_${s}$(a,s,c)
154+
${t}$, intent(in) :: a
155+
${t}$, intent(inout) :: s
156+
${t}$, intent(inout) :: c
157+
${t}$ :: t, y
158158
y = a - c
159159
t = s + y
160160
c = (t - s) - y
161161
s = t
162162
end subroutine
163-
elemental subroutine kahan_kernel_m_${rs}$(a,s,c,m)
164-
${rt}$, intent(in) :: a
165-
${rt}$, intent(inout) :: s
166-
${rt}$, intent(inout) :: c
163+
elemental subroutine kahan_kernel_m_${s}$(a,s,c,m)
164+
${t}$, intent(in) :: a
165+
${t}$, intent(inout) :: s
166+
${t}$, intent(inout) :: c
167167
logical, intent(in) :: m
168-
${rt}$ :: t, y
168+
${t}$ :: t, y
169169
y = a - c
170170
t = s + y
171171
c = (t - s) - y

src/stdlib_intrinsics_dot_product.fypp

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#:include "common.fypp"
2+
#:set I_KINDS_TYPES = list(zip(INT_KINDS, INT_TYPES, INT_KINDS))
23
#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
34
#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
4-
#:set RC_KINDS_TYPES = R_KINDS_TYPES + C_KINDS_TYPES
55

66
#:def cnjg(type,expression)
77
#:if 'complex' in type
@@ -18,55 +18,58 @@ submodule(stdlib_intrinsics) stdlib_intrinsics_dot_product
1818
use stdlib_constants
1919
implicit none
2020

21-
integer, parameter :: chunk = 64
21+
integer, parameter :: ilp = int64
2222

2323
contains
2424
! This implementation is based on https://github.com/jalvesz/fast_math
25-
#:for k1, t1, s1 in RC_KINDS_TYPES
26-
pure module function stdlib_dot_product_${s1}$(a,b) result(p)
27-
${t1}$, intent(in) :: a(:)
28-
${t1}$, intent(in) :: b(:)
29-
${t1}$ :: p
30-
${t1}$ :: abatch(chunk)
31-
integer :: i, dr, rr
25+
#:for k, t, s in I_KINDS_TYPES + R_KINDS_TYPES + C_KINDS_TYPES
26+
pure module function stdlib_dot_product_${s}$(a,b) result(p)
27+
integer(ilp), parameter :: chunk = 64
28+
${t}$, intent(in) :: a(:)
29+
${t}$, intent(in) :: b(:)
30+
${t}$ :: p
31+
${t}$ :: abatch(chunk)
32+
integer(ilp) :: i, n, r
3233
! -----------------------------
33-
dr = size(a)/chunk
34-
rr = size(a) - dr*chunk
34+
n = size(a,kind=ilp)
35+
r = mod(n,chunk)
3536

36-
abatch = zero_${s1}$
37-
do i = 1, dr
38-
abatch(1:chunk) = abatch(1:chunk) + a(chunk*i-chunk+1:chunk*i)*${cnjg(t1,'b(chunk*i-chunk+1:chunk*i)')}$
37+
abatch(1:r) = a(1:r)*${cnjg(t,'b(1:r)')}$
38+
abatch(r+1:chunk) = zero_${s}$
39+
do i = r+1, n-r, chunk
40+
abatch(1:chunk) = abatch(1:chunk) + a(i:i+chunk-1)*${cnjg(t,'b(i:i+chunk-1)')}$
3941
end do
40-
abatch(1:rr) = abatch(1:rr) + a(size(a)-rr+1:size(a))*${cnjg(t1,'b(size(a)-rr+1:size(a))')}$
4142

42-
p = zero_${s1}$
43+
p = zero_${s}$
4344
do i = 1, chunk/2
4445
p = p + abatch(i)+abatch(chunk/2+i)
4546
end do
4647
end function
4748
#:endfor
4849

49-
#:for k1, t1, s1 in RC_KINDS_TYPES
50-
pure module function stdlib_dot_product_kahan_${s1}$(a,b) result(p)
51-
${t1}$, intent(in) :: a(:)
52-
${t1}$, intent(in) :: b(:)
53-
${t1}$ :: p
54-
${t1}$ :: pbatch(chunk)
55-
${t1}$ :: cbatch(chunk)
56-
integer :: i, dr, rr
50+
#:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
51+
pure module function stdlib_dot_product_kahan_${s}$(a,b) result(p)
52+
integer(ilp), parameter :: chunk = 64
53+
${t}$, intent(in) :: a(:)
54+
${t}$, intent(in) :: b(:)
55+
${t}$ :: p
56+
${t}$ :: abatch(chunk)
57+
${t}$ :: cbatch(chunk)
58+
integer(ilp) :: i, n, r
5759
! -----------------------------
58-
dr = size(a)/(chunk)
59-
rr = size(a) - dr*chunk
60-
pbatch = zero_${s1}$
61-
cbatch = zero_${s1}$
62-
do i = 1, dr
63-
call kahan_kernel( a(chunk*i-chunk+1:chunk*i)*${cnjg(t1,'b(chunk*i-chunk+1:chunk*i)')}$ , pbatch(1:chunk) , cbatch(1:chunk) )
64-
end do
65-
call kahan_kernel( a(size(a)-rr+1:size(a))*${cnjg(t1,'b(size(a)-rr+1:size(a))')}$ , pbatch(1:rr) , cbatch(1:rr) )
60+
n = size(a,kind=ilp)
61+
r = mod(n,chunk)
62+
63+
abatch(1:r) = a(1:r)*${cnjg(t,'b(1:r)')}$
64+
abatch(r+1:chunk) = zero_${s}$
65+
cbatch = zero_${s}$
66+
do i = r+1, n-r, chunk
67+
call kahan_kernel( a(i:i+chunk-1)*${cnjg(t,'b(i:i+chunk-1)')}$ , abatch(1:chunk) , cbatch(1:chunk) )
68+
end do
6669

67-
p = zero_${s1}$
68-
do i = 1,chunk
69-
call kahan_kernel( pbatch(i) , p , cbatch(i) )
70+
p = zero_${s}$
71+
do i = 1, chunk
72+
call kahan_kernel( abatch(i) , p , cbatch(i) )
7073
end do
7174
end function
7275
#:endfor

0 commit comments

Comments
 (0)