update specs, change name of kahan kernel

jalvesz · jalvesz · commit 47396acf367f · 2025-01-05T21:56:50.000+01:00
diff --git a/doc/specs/stdlib_intrinsics.md b/doc/specs/stdlib_intrinsics.md
@@ -15,12 +15,14 @@ The `stdlib_intrinsics` module provides replacements for some of the well known
 
 #### Description
 
-The `fsum` function can replace the intrinsic `sum` for 1D `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is recommended when summing large arrays, for repetitive summation of smaller arrays consider the classical `sum`.
+The `fsum` function can replace the intrinsic `sum` for `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential as well as reducing the round-off error. This procedure is recommended when summing large arrays, for repetitive summation of smaller arrays consider the classical `sum`.
 
 #### Syntax
 
 `res = ` [[stdlib_intrinsics(module):fsum(interface)]] ` (x [,mask] )`
 
+`res = ` [[stdlib_intrinsics(module):fsum(interface)]] ` (x, dim [,mask] )`
+
 #### Status
 
 Experimental
@@ -31,13 +33,15 @@ Pure function.
 
 #### Argument(s)
 
-`x`: 1D array of either `real` or `complex` type. This argument is `intent(in)`.
+`x`: N-D array of either `real` or `complex` type. This argument is `intent(in)`.
 
-`mask` (optional): 1D array of `logical` values. This argument is `intent(in)`.
+`dim` (optional): scalar of type `integer` with a value in the range from 1 to n, where n equals the rank of `x`.
+
+`mask` (optional): N-D array of `logical` values, with the same shape as `x`. This argument is `intent(in)`.
 
 #### Output value or Result value
 
-The output is a scalar of `type` and `kind` same as to that of `x`.
+If `dim` is absent, the output is a scalar of the same `type` and `kind` as to that of `x`. Otherwise, an array of rank n-1, where n equals the rank of `x`, and a shape similar to that of `x` with dimension `dim` dropped is returned.
 
 <!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
 ### `fsum_kahan` function
@@ -47,7 +51,7 @@ The output is a scalar of `type` and `kind` same as to that of `x`.
 The `fsum_kahan` function can replace the intrinsic `sum` for 1D `real` or `complex` arrays. It follows a chunked implementation which maximizes vectorization potential, complemented by an `elemental` kernel based on the [kahan summation](https://en.wikipedia.org/wiki/Kahan_summation_algorithm) strategy to reduce the round-off error:
 
 ```fortran
-elemental subroutine vkahan_<kind>(a,s,c)
+elemental subroutine kahan_kernel_<kind>(a,s,c)
     type(<kind>), intent(in) :: a
     type(<kind>), intent(inout) :: s
     type(<kind>), intent(inout) :: c
diff --git a/src/stdlib_intrinsics.fypp b/src/stdlib_intrinsics.fypp
@@ -2,7 +2,7 @@
 #:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
 #:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
 #:set RC_KINDS_TYPES = R_KINDS_TYPES + C_KINDS_TYPES
-#:set RANKS = range(1, MAXRANK + 1)
+#:set RANKS = range(2, MAXRANK + 1)
 
 ! This module is based on https://github.com/jalvesz/fast_math
 module stdlib_intrinsics
@@ -77,18 +77,18 @@ module stdlib_intrinsics
     end interface
     public :: fprod_kahan
 
-    interface vkahan 
+    interface kahan_kernel 
         #:for rk, rt, rs in RC_KINDS_TYPES
-        module procedure :: vkahan_${rs}$
-        module procedure :: vkahan_m_${rs}$
+        module procedure :: kahan_kernel_${rs}$
+        module procedure :: kahan_kernel_m_${rs}$
         #:endfor
     end interface
-    public :: vkahan
+    public :: kahan_kernel
     
 contains
 
 #:for rk, rt, rs in RC_KINDS_TYPES
-elemental subroutine vkahan_${rs}$(a,s,c)
+elemental subroutine kahan_kernel_${rs}$(a,s,c)
     ${rt}$, intent(in) :: a
     ${rt}$, intent(inout) :: s
     ${rt}$, intent(inout) :: c
@@ -98,7 +98,7 @@ elemental subroutine vkahan_${rs}$(a,s,c)
     c = (t - s) - y
     s = t
 end subroutine  
-elemental subroutine vkahan_m_${rs}$(a,s,c,m)
+elemental subroutine kahan_kernel_m_${rs}$(a,s,c,m)
     ${rt}$, intent(in) :: a
     ${rt}$, intent(inout) :: s
     ${rt}$, intent(inout) :: c
diff --git a/src/stdlib_intrinsics_dot_product.fypp b/src/stdlib_intrinsics_dot_product.fypp
@@ -61,13 +61,13 @@ pure module function fprod_kahan_${s1}$(a,b) result(p)
     pbatch = zero_${s1}$
     cbatch = zero_${s1}$
     do i = 1, dr
-        call vkahan( a(chunk*i-chunk+1:chunk*i)*${cnjg(t1,'b(chunk*i-chunk+1:chunk*i)')}$ , pbatch(1:chunk) , cbatch(1:chunk) )
+        call kahan_kernel( a(chunk*i-chunk+1:chunk*i)*${cnjg(t1,'b(chunk*i-chunk+1:chunk*i)')}$ , pbatch(1:chunk) , cbatch(1:chunk) )
     end do
-    call vkahan( a(size(a)-rr+1:size(a))*${cnjg(t1,'b(size(a)-rr+1:size(a))')}$ , pbatch(1:rr) , cbatch(1:rr) )      
+    call kahan_kernel( a(size(a)-rr+1:size(a))*${cnjg(t1,'b(size(a)-rr+1:size(a))')}$ , pbatch(1:rr) , cbatch(1:rr) )      
 
     p = zero_${s1}$
     do i = 1,chunk
-        call vkahan( pbatch(i) , p , cbatch(i) )
+        call kahan_kernel( pbatch(i) , p , cbatch(i) )
     end do      
 end function
 #:endfor
diff --git a/src/stdlib_intrinsics_sum.fypp b/src/stdlib_intrinsics_sum.fypp
@@ -2,7 +2,7 @@
 #:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
 #:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
 #:set RC_KINDS_TYPES = R_KINDS_TYPES + C_KINDS_TYPES
-#:set RANKS = range(1, MAXRANK + 1)
+#:set RANKS = range(2, MAXRANK + 1)
 
 ! This module is based on https://github.com/jalvesz/fast_math
 submodule(stdlib_intrinsics) stdlib_intrinsics_sum
@@ -146,13 +146,13 @@ pure module function fsum_kahan_1d_${rs}$(a) result(s)
     sbatch = zero_${rs}$
     cbatch = zero_${rs}$
     do i = 1, dr
-        call vkahan( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) )
+        call kahan_kernel( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) )
     end do
-    call vkahan( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) )      
+    call kahan_kernel( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) )      
 
     s = zero_${rs}$
     do i = 1,chunk
-        call vkahan( sbatch(i) , s , cbatch(i) )
+        call kahan_kernel( sbatch(i) , s , cbatch(i) )
     end do
 end function
 
@@ -169,13 +169,13 @@ pure module function fsum_kahan_1d_${rs}$_mask(a,mask) result(s)
     sbatch = zero_${rs}$
     cbatch = zero_${rs}$
     do i = 1, dr
-        call vkahan( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) , mask(chunk*i-chunk+1:chunk*i) )
+        call kahan_kernel( a(chunk*i-chunk+1:chunk*i) , sbatch(1:chunk) , cbatch(1:chunk) , mask(chunk*i-chunk+1:chunk*i) )
     end do
-    call vkahan( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) , mask(size(a)-rr+1:size(a)) )
+    call kahan_kernel( a(size(a)-rr+1:size(a)) , sbatch(1:rr) , cbatch(1:rr) , mask(size(a)-rr+1:size(a)) )
 
     s = zero_${rs}$
     do i = 1,chunk
-        call vkahan( sbatch(i) , s , cbatch(i) )
+        call kahan_kernel( sbatch(i) , s , cbatch(i) )
     end do
 end function
 #:endfor