fortran-lang
diff --git a/‎src/stdlib_constants.fypp
Lines changed: 6 additions & 1 deletion b/‎src/stdlib_constants.fypp
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/stdlib_intrinsics.fypp
Lines changed: 51 additions & 51 deletions b/‎src/stdlib_intrinsics.fypp
Lines changed: 51 additions & 51 deletions
diff --git a/‎src/stdlib_intrinsics_dot_product.fypp
Lines changed: 38 additions & 35 deletions b/‎src/stdlib_intrinsics_dot_product.fypp
Lines changed: 38 additions & 35 deletions
@@ -1,12 +1,13 @@
 #:include "common.fypp"
 #:set KINDS = REAL_KINDS
+#:set I_KINDS_TYPES = list(zip(INT_KINDS, INT_TYPES, INT_KINDS))
 #:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
 #:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
 
 module stdlib_constants
     !! Constants
     !! ([Specification](../page/specs/stdlib_constants.html))
-    use stdlib_kinds, only: #{for k in KINDS[:-1]}#${k}$, #{endfor}#${KINDS[-1]}$
+    use stdlib_kinds
     use stdlib_codata, only: SPEED_OF_LIGHT_IN_VACUUM, &
                              VACUUM_ELECTRIC_PERMITTIVITY, &
                              VACUUM_MAG_PERMEABILITY, &
@@ -63,6 +64,10 @@ module stdlib_constants
     real(dp), parameter, public :: u = ATOMIC_MASS_CONSTANT%value !! Atomic mass constant
 
     ! Additional constants if needed
+    #:for k, t, s in I_KINDS_TYPES
+    ${t}$, parameter, public :: zero_${s}$ = 0_${k}$
+    ${t}$, parameter, public :: one_${s}$  = 1_${k}$
+    #:endfor
     #:for k, t, s in R_KINDS_TYPES
     ${t}$, parameter, public :: zero_${s}$ = 0._${k}$
     ${t}$, parameter, public :: one_${s}$  = 1._${k}$
 
@@ -1,7 +1,7 @@
 #:include "common.fypp"
+#:set I_KINDS_TYPES = list(zip(INT_KINDS, INT_TYPES, INT_KINDS))
 #:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
 #:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
-#:set RC_KINDS_TYPES = R_KINDS_TYPES + C_KINDS_TYPES
 #:set RANKS = range(2, MAXRANK + 1)
 
 module stdlib_intrinsics
@@ -25,27 +25,27 @@ module stdlib_intrinsics
         !! The `N-D` interfaces calls upon the `(N-1)-D` implementation. 
         !! Supported data types include `real` and `complex`.
         !!
-        #:for rk, rt, rs in RC_KINDS_TYPES
-        pure module function stdlib_sum_1d_${rs}$(a) result(s)
-            ${rt}$, intent(in) :: a(:)
-            ${rt}$ :: s
+        #:for k, t, s in I_KINDS_TYPES + R_KINDS_TYPES + C_KINDS_TYPES
+        pure module function stdlib_sum_1d_${s}$(a) result(s)
+            ${t}$, intent(in) :: a(:)
+            ${t}$ :: s
         end function
-        pure module function stdlib_sum_1d_${rs}$_mask(a,mask) result(s)
-            ${rt}$, intent(in) :: a(:)
+        pure module function stdlib_sum_1d_${s}$_mask(a,mask) result(s)
+            ${t}$, intent(in) :: a(:)
             logical, intent(in) :: mask(:)
-            ${rt}$ :: s
+            ${t}$ :: s
         end function
         #:for rank in RANKS
-        pure module function stdlib_sum_${rank}$d_${rs}$( x, mask ) result( s )
-            ${rt}$, intent(in) :: x${ranksuffix(rank)}$
+        pure module function stdlib_sum_${rank}$d_${s}$( x, mask ) result( s )
+            ${t}$, intent(in) :: x${ranksuffix(rank)}$
             logical, intent(in), optional :: mask${ranksuffix(rank)}$
-            ${rt}$ :: s
+            ${t}$ :: s
         end function
-        pure module function stdlib_sum_${rank}$d_dim_${rs}$( x , dim, mask ) result( s )
-            ${rt}$, intent(in) :: x${ranksuffix(rank)}$
+        pure module function stdlib_sum_${rank}$d_dim_${s}$( x , dim, mask ) result( s )
+            ${t}$, intent(in) :: x${ranksuffix(rank)}$
             integer, intent(in):: dim
             logical, intent(in), optional :: mask${ranksuffix(rank)}$
-            ${rt}$ :: s${reduced_shape('x', rank, 'dim')}$
+            ${t}$ :: s${reduced_shape('x', rank, 'dim')}$
         end function
         #:endfor
         #:endfor
@@ -66,27 +66,27 @@ module stdlib_intrinsics
         !! The `N-D` interfaces calls upon the `(N-1)-D` implementation. 
         !! Supported data types include `real` and `complex`.
         !!
-        #:for rk, rt, rs in RC_KINDS_TYPES
-        pure module function stdlib_sum_kahan_1d_${rs}$(a) result(s)
-            ${rt}$, intent(in) :: a(:)
-            ${rt}$ :: s
+        #:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
+        pure module function stdlib_sum_kahan_1d_${s}$(a) result(s)
+            ${t}$, intent(in) :: a(:)
+            ${t}$ :: s
         end function
-        pure module function stdlib_sum_kahan_1d_${rs}$_mask(a,mask) result(s)
-            ${rt}$, intent(in) :: a(:)
+        pure module function stdlib_sum_kahan_1d_${s}$_mask(a,mask) result(s)
+            ${t}$, intent(in) :: a(:)
             logical, intent(in) :: mask(:)
-            ${rt}$ :: s
+            ${t}$ :: s
         end function
         #:for rank in RANKS
-        pure module function stdlib_sum_kahan_${rank}$d_${rs}$( x, mask ) result( s )
-            ${rt}$, intent(in) :: x${ranksuffix(rank)}$
+        pure module function stdlib_sum_kahan_${rank}$d_${s}$( x, mask ) result( s )
+            ${t}$, intent(in) :: x${ranksuffix(rank)}$
             logical, intent(in), optional :: mask${ranksuffix(rank)}$
-            ${rt}$ :: s
+            ${t}$ :: s
         end function
-        pure module function stdlib_sum_kahan_${rank}$d_dim_${rs}$( x , dim, mask ) result( s )
-            ${rt}$, intent(in) :: x${ranksuffix(rank)}$
+        pure module function stdlib_sum_kahan_${rank}$d_dim_${s}$( x , dim, mask ) result( s )
+            ${t}$, intent(in) :: x${ranksuffix(rank)}$
             integer, intent(in):: dim
             logical, intent(in), optional :: mask${ranksuffix(rank)}$
-            ${rt}$ :: s${reduced_shape('x', rank, 'dim')}$
+            ${t}$ :: s${reduced_shape('x', rank, 'dim')}$
         end function
         #:endfor
         #:endfor
@@ -106,11 +106,11 @@ module stdlib_intrinsics
         !! The 1-D base implementation follows a chunked approach for optimizing performance and increasing accuracy.
         !! Supported data types include `real` and `complex`.
         !!
-        #:for rk, rt, rs in RC_KINDS_TYPES
-        pure module function stdlib_dot_product_${rs}$(a,b) result(p)
-            ${rt}$, intent(in) :: a(:)
-            ${rt}$, intent(in) :: b(:)
-            ${rt}$ :: p
+        #:for k, t, s in I_KINDS_TYPES + R_KINDS_TYPES + C_KINDS_TYPES
+        pure module function stdlib_dot_product_${s}$(a,b) result(p)
+            ${t}$, intent(in) :: a(:)
+            ${t}$, intent(in) :: b(:)
+            ${t}$ :: p
         end function
         #:endfor
     end interface
@@ -129,43 +129,43 @@ module stdlib_intrinsics
         !! The implementation follows a chunked approach combined with a kahan kernel for optimizing performance and increasing accuracy.
         !! Supported data types include `real` and `complex`.
         !!
-        #:for rk, rt, rs in RC_KINDS_TYPES
-        pure module function stdlib_dot_product_kahan_${rs}$(a,b) result(p)
-            ${rt}$, intent(in) :: a(:)
-            ${rt}$, intent(in) :: b(:)
-            ${rt}$ :: p
+        #:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
+        pure module function stdlib_dot_product_kahan_${s}$(a,b) result(p)
+            ${t}$, intent(in) :: a(:)
+            ${t}$, intent(in) :: b(:)
+            ${t}$ :: p
         end function
         #:endfor
     end interface
     public :: stdlib_dot_product_kahan
 
     interface kahan_kernel 
-        #:for rk, rt, rs in RC_KINDS_TYPES
-        module procedure :: kahan_kernel_${rs}$
-        module procedure :: kahan_kernel_m_${rs}$
+        #:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
+        module procedure :: kahan_kernel_${s}$
+        module procedure :: kahan_kernel_m_${s}$
         #:endfor
     end interface
     public :: kahan_kernel
 
 contains
 
-#:for rk, rt, rs in RC_KINDS_TYPES
-elemental subroutine kahan_kernel_${rs}$(a,s,c)
-    ${rt}$, intent(in) :: a
-    ${rt}$, intent(inout) :: s
-    ${rt}$, intent(inout) :: c
-    ${rt}$ :: t, y
+#:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
+elemental subroutine kahan_kernel_${s}$(a,s,c)
+    ${t}$, intent(in) :: a
+    ${t}$, intent(inout) :: s
+    ${t}$, intent(inout) :: c
+    ${t}$ :: t, y
     y = a - c
     t = s + y
     c = (t - s) - y
     s = t
 end subroutine  
-elemental subroutine kahan_kernel_m_${rs}$(a,s,c,m)
-    ${rt}$, intent(in) :: a
-    ${rt}$, intent(inout) :: s
-    ${rt}$, intent(inout) :: c
+elemental subroutine kahan_kernel_m_${s}$(a,s,c,m)
+    ${t}$, intent(in) :: a
+    ${t}$, intent(inout) :: s
+    ${t}$, intent(inout) :: c
     logical, intent(in) :: m
-    ${rt}$ :: t, y
+    ${t}$ :: t, y
     y = a - c
     t = s + y
     c = (t - s) - y
 
@@ -1,7 +1,7 @@
 #:include "common.fypp"
+#:set I_KINDS_TYPES = list(zip(INT_KINDS, INT_TYPES, INT_KINDS))
 #:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
 #:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
-#:set RC_KINDS_TYPES = R_KINDS_TYPES + C_KINDS_TYPES
 
 #:def cnjg(type,expression)
 #:if 'complex' in type
@@ -18,55 +18,58 @@ submodule(stdlib_intrinsics) stdlib_intrinsics_dot_product
     use stdlib_constants
     implicit none
 
-    integer, parameter :: chunk = 64
+    integer, parameter :: ilp = int64
 
 contains
 ! This implementation is based on https://github.com/jalvesz/fast_math
-#:for k1, t1, s1 in RC_KINDS_TYPES
-pure module function stdlib_dot_product_${s1}$(a,b) result(p)
-    ${t1}$, intent(in) :: a(:)
-    ${t1}$, intent(in) :: b(:)
-    ${t1}$ :: p
-    ${t1}$ :: abatch(chunk)
-    integer :: i, dr, rr
+#:for k, t, s in I_KINDS_TYPES + R_KINDS_TYPES + C_KINDS_TYPES
+pure module function stdlib_dot_product_${s}$(a,b) result(p)
+    integer(ilp), parameter :: chunk = 64
+    ${t}$, intent(in) :: a(:)
+    ${t}$, intent(in) :: b(:)
+    ${t}$ :: p
+    ${t}$ :: abatch(chunk)
+    integer(ilp) :: i, n, r
     ! -----------------------------
-    dr = size(a)/chunk
-    rr = size(a) - dr*chunk
+    n = size(a,kind=ilp)
+    r = mod(n,chunk)
 
-    abatch = zero_${s1}$
-    do i = 1, dr
-        abatch(1:chunk) = abatch(1:chunk) + a(chunk*i-chunk+1:chunk*i)*${cnjg(t1,'b(chunk*i-chunk+1:chunk*i)')}$
+    abatch(1:r)       = a(1:r)*${cnjg(t,'b(1:r)')}$
+    abatch(r+1:chunk) = zero_${s}$
+    do i = r+1, n-r, chunk
+        abatch(1:chunk) = abatch(1:chunk) + a(i:i+chunk-1)*${cnjg(t,'b(i:i+chunk-1)')}$
     end do
-    abatch(1:rr) = abatch(1:rr) + a(size(a)-rr+1:size(a))*${cnjg(t1,'b(size(a)-rr+1:size(a))')}$
 
-    p = zero_${s1}$
+    p = zero_${s}$
     do i = 1, chunk/2
         p = p + abatch(i)+abatch(chunk/2+i)
     end do
 end function
 #:endfor
 
-#:for k1, t1, s1 in RC_KINDS_TYPES
-pure module function stdlib_dot_product_kahan_${s1}$(a,b) result(p)
-    ${t1}$, intent(in) :: a(:)
-    ${t1}$, intent(in) :: b(:)
-    ${t1}$ :: p
-    ${t1}$ :: pbatch(chunk)
-    ${t1}$ :: cbatch(chunk)
-    integer :: i, dr, rr
+#:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
+pure module function stdlib_dot_product_kahan_${s}$(a,b) result(p)
+    integer(ilp), parameter :: chunk = 64
+    ${t}$, intent(in) :: a(:)
+    ${t}$, intent(in) :: b(:)
+    ${t}$ :: p
+    ${t}$ :: abatch(chunk)
+    ${t}$ :: cbatch(chunk)
+    integer(ilp) :: i, n, r
     ! -----------------------------
-    dr = size(a)/(chunk)
-    rr = size(a) - dr*chunk     
-    pbatch = zero_${s1}$
-    cbatch = zero_${s1}$
-    do i = 1, dr
-        call kahan_kernel( a(chunk*i-chunk+1:chunk*i)*${cnjg(t1,'b(chunk*i-chunk+1:chunk*i)')}$ , pbatch(1:chunk) , cbatch(1:chunk) )
-    end do
-    call kahan_kernel( a(size(a)-rr+1:size(a))*${cnjg(t1,'b(size(a)-rr+1:size(a))')}$ , pbatch(1:rr) , cbatch(1:rr) )      
+    n = size(a,kind=ilp)
+    r = mod(n,chunk)
+
+    abatch(1:r)       = a(1:r)*${cnjg(t,'b(1:r)')}$
+    abatch(r+1:chunk) = zero_${s}$
+    cbatch = zero_${s}$
+    do i = r+1, n-r, chunk
+        call kahan_kernel( a(i:i+chunk-1)*${cnjg(t,'b(i:i+chunk-1)')}$ , abatch(1:chunk) , cbatch(1:chunk) )
+    end do     
 
-    p = zero_${s1}$
-    do i = 1,chunk
-        call kahan_kernel( pbatch(i) , p , cbatch(i) )
+    p = zero_${s}$
+    do i = 1, chunk
+        call kahan_kernel( abatch(i) , p , cbatch(i) )
     end do      
 end function
 #:endfor