Skip to content

Commit 47396ac

Browse files
committed
update specs, change name of kahan kernel
1 parent 5a1fdcb commit 47396ac

File tree

4 files changed

+26
-22
lines changed

4 files changed

+26
-22
lines changed

doc/specs/stdlib_intrinsics.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ The `stdlib_intrinsics` module provides replacements for some of the well known
1515

1616
#### Description
1717

18-
The `fsum` function can replace the intrinsic `sum` for 1D `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is recommended when summing large arrays, for repetitive summation of smaller arrays consider the classical `sum`.
18+
The `fsum` function can replace the intrinsic `sum` for `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is recommended when summing large arrays, for repetitive summation of smaller arrays consider the classical `sum`.
1919

2020
#### Syntax
2121

2222
`res = ` [[stdlib_intrinsics(module):fsum(interface)]] ` (x [,mask] )`
2323

24+
`res = ` [[stdlib_intrinsics(module):fsum(interface)]] ` (x, dim [,mask] )`
25+
2426
#### Status
2527

2628
Experimental
@@ -31,13 +33,15 @@ Pure function.
3133

3234
#### Argument(s)
3335

34-
`x`: 1D array of either `real` or `complex` type. This argument is `intent(in)`.
36+
`x`: N-D array of either `real` or `complex` type. This argument is `intent(in)`.
3537

36-
`mask` (optional): 1D array of `logical` values. This argument is `intent(in)`.
38+
`dim` (optional): scalar of type `integer` with a value in the range from 1 to n, where n equals the rank of `x`.
39+
40+
`mask` (optional): N-D array of `logical` values, with the same shape as `x`. This argument is `intent(in)`.
3741

3842
#### Output value or Result value
3943

40-
The output is a scalar of `type` and `kind` same as to that of `x`.
44+
If `dim` is absent, the output is a scalar of the same `type` and `kind` as to that of `x`. Otherwise, an array of rank n-1, where n equals the rank of `x`, and a shape similar to that of `x` with dimension `dim` dropped is returned.
4145

4246
<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
4347
### `fsum_kahan` function
@@ -47,7 +51,7 @@ The output is a scalar of `type` and `kind` same as to that of `x`.
4751
The `fsum_kahan` function can replace the intrinsic `sum` for 1D `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential, complemented by an `elemental` kernel based on the [kahan summation](https://en.wikipedia.org/wiki/Kahan_summation_algorithm) strategy to reduce the round-off error:
4852

4953
```fortran
50-
elemental subroutine vkahan_<kind>(a,s,c)
54+
elemental subroutine kahan_kernel_<kind>(a,s,c)
5155
type(<kind>), intent(in) :: a
5256
type(<kind>), intent(inout) :: s
5357
type(<kind>), intent(inout) :: c

src/stdlib_intrinsics.fypp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
33
#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
44
#:set RC_KINDS_TYPES = R_KINDS_TYPES + C_KINDS_TYPES
5-
#:set RANKS = range(1, MAXRANK + 1)
5+
#:set RANKS = range(2, MAXRANK + 1)
66

77
! This module is based on https://github.com/jalvesz/fast_math
88
module stdlib_intrinsics
@@ -77,18 +77,18 @@ module stdlib_intrinsics
7777
end interface
7878
public :: fprod_kahan
7979

80-
interface vkahan
80+
interface kahan_kernel
8181
#:for rk, rt, rs in RC_KINDS_TYPES
82-
module procedure :: vkahan_${rs}$
83-
module procedure :: vkahan_m_${rs}$
82+
module procedure :: kahan_kernel_${rs}$
83+
module procedure :: kahan_kernel_m_${rs}$
8484
#:endfor
8585
end interface
86-
public :: vkahan
86+
public :: kahan_kernel
8787

8888
contains
8989

9090
#:for rk, rt, rs in RC_KINDS_TYPES
91-
elemental subroutine vkahan_${rs}$(a,s,c)
91+
elemental subroutine kahan_kernel_${rs}$(a,s,c)
9292
${rt}$, intent(in) :: a
9393
${rt}$, intent(inout) :: s
9494
${rt}$, intent(inout) :: c
@@ -98,7 +98,7 @@ elemental subroutine vkahan_${rs}$(a,s,c)
9898
c = (t - s) - y
9999
s = t
100100
end subroutine
101-
elemental subroutine vkahan_m_${rs}$(a,s,c,m)
101+
elemental subroutine kahan_kernel_m_${rs}$(a,s,c,m)
102102
${rt}$, intent(in) :: a
103103
${rt}$, intent(inout) :: s
104104
${rt}$, intent(inout) :: c

src/stdlib_intrinsics_dot_product.fypp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,13 @@ pure module function fprod_kahan_${s1}$(a,b) result(p)
6161
pbatch = zero_${s1}$
6262
cbatch = zero_${s1}$
6363
do i = 1, dr
64-
call vkahan( a(chunk*i-chunk+1:chunk*i)*${cnjg(t1,'b(chunk*i-chunk+1:chunk*i)')}$ , pbatch(1:chunk) , cbatch(1:chunk) )
64+
call kahan_kernel( a(chunk*i-chunk+1:chunk*i)*${cnjg(t1,'b(chunk*i-chunk+1:chunk*i)')}$ , pbatch(1:chunk) , cbatch(1:chunk) )
6565
end do
66-
call vkahan( a(size(a)-rr+1:size(a))*${cnjg(t1,'b(size(a)-rr+1:size(a))')}$ , pbatch(1:rr) , cbatch(1:rr) )
66+
call kahan_kernel( a(size(a)-rr+1:size(a))*${cnjg(t1,'b(size(a)-rr+1:size(a))')}$ , pbatch(1:rr) , cbatch(1:rr) )
6767

6868
p = zero_${s1}$
6969
do i = 1,chunk
70-
call vkahan( pbatch(i) , p , cbatch(i) )
70+
call kahan_kernel( pbatch(i) , p , cbatch(i) )
7171
end do
7272
end function
7373
#:endfor

src/stdlib_intrinsics_sum.fypp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
33
#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
44
#:set RC_KINDS_TYPES = R_KINDS_TYPES + C_KINDS_TYPES
5-
#:set RANKS = range(1, MAXRANK + 1)
5+
#:set RANKS = range(2, MAXRANK + 1)
66

77
! This module is based on https://github.com/jalvesz/fast_math
88
submodule(stdlib_intrinsics) stdlib_intrinsics_sum
@@ -146,13 +146,13 @@ pure module function fsum_kahan_1d_${rs}$(a) result(s)
146146
sbatch = zero_${rs}$
147147
cbatch = zero_${rs}$
148148
do i = 1, dr
149-
call vkahan( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) )
149+
call kahan_kernel( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) )
150150
end do
151-
call vkahan( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) )
151+
call kahan_kernel( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) )
152152

153153
s = zero_${rs}$
154154
do i = 1,chunk
155-
call vkahan( sbatch(i) , s , cbatch(i) )
155+
call kahan_kernel( sbatch(i) , s , cbatch(i) )
156156
end do
157157
end function
158158

@@ -169,13 +169,13 @@ pure module function fsum_kahan_1d_${rs}$_mask(a,mask) result(s)
169169
sbatch = zero_${rs}$
170170
cbatch = zero_${rs}$
171171
do i = 1, dr
172-
call vkahan( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) , mask(chunk*i-chunk+1:chunk*i) )
172+
call kahan_kernel( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) , mask(chunk*i-chunk+1:chunk*i) )
173173
end do
174-
call vkahan( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) , mask(size(a)-rr+1:size(a)) )
174+
call kahan_kernel( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) , mask(size(a)-rr+1:size(a)) )
175175

176176
s = zero_${rs}$
177177
do i = 1,chunk
178-
call vkahan( sbatch(i) , s , cbatch(i) )
178+
call kahan_kernel( sbatch(i) , s , cbatch(i) )
179179
end do
180180
end function
181181
#:endfor

0 commit comments

Comments
 (0)