|
| 1 | +!***************************************************************************************** |
| 2 | +!> |
| 3 | +! BLAS procedures, which can be use used if not linking with a BLAS library, |
| 4 | +! if one is not available, or if a real kind /= `real64` is required. |
| 5 | +! |
| 6 | +! The original code has been slightly modernized. |
| 7 | +! |
| 8 | +!### Notes |
| 9 | +!``` |
| 10 | +! reference blas level1 routines |
| 11 | +! reference blas is a software package provided by univ. of tennessee, |
| 12 | +! univ. of california berkeley, univ. of colorado denver and nag ltd.. |
| 13 | +!``` |
| 14 | +! |
| 15 | +!### See also |
| 16 | +! * [BLAS Sourcecode](https://github.com/Reference-LAPACK/lapack/tree/master/BLAS/SRC) |
| 17 | + |
| 18 | +module bspline_blas_module |
| 19 | + |
| 20 | +#ifndef HAS_BLAS |
| 21 | + |
| 22 | + use bspline_kinds_module, only: wp, ip |
| 23 | + |
| 24 | + implicit none |
| 25 | + private |
| 26 | + |
| 27 | + public :: daxpy,dcopy,dscal,dswap,ddot |
| 28 | + |
| 29 | +contains |
| 30 | + |
| 31 | + subroutine daxpy(n, da, dx, incx, dy, incy) |
| 32 | + !! DAXPY constant times a vector plus a vector. |
| 33 | + !! uses unrolled loops for increments equal to one. |
| 34 | + |
| 35 | + real(wp) :: da |
| 36 | + integer(ip) :: incx, incy, n |
| 37 | + real(wp) :: dx(*), dy(*) |
| 38 | + |
| 39 | + integer(ip) :: i, ix, iy, m, mp1 |
| 40 | + |
| 41 | + if (n <= 0_ip) return |
| 42 | + if (da == 0.0_wp) return |
| 43 | + if (incx == 1_ip .and. incy == 1_ip) then |
| 44 | +! |
| 45 | +! code for both increments equal to 1 |
| 46 | +! |
| 47 | +! |
| 48 | +! clean-up loop |
| 49 | +! |
| 50 | + m = mod(n, 4_ip) |
| 51 | + if (m /= 0_ip) then |
| 52 | + do i = 1_ip, m |
| 53 | + dy(i) = dy(i) + da*dx(i) |
| 54 | + end do |
| 55 | + end if |
| 56 | + if (n < 4_ip) return |
| 57 | + mp1 = m + 1_ip |
| 58 | + do i = mp1, n, 4_ip |
| 59 | + dy(i) = dy(i) + da*dx(i) |
| 60 | + dy(i + 1_ip) = dy(i + 1_ip) + da*dx(i + 1_ip) |
| 61 | + dy(i + 2_ip) = dy(i + 2_ip) + da*dx(i + 2_ip) |
| 62 | + dy(i + 3_ip) = dy(i + 3_ip) + da*dx(i + 3_ip) |
| 63 | + end do |
| 64 | + else |
| 65 | +! |
| 66 | +! code for unequal increments or equal increments |
| 67 | +! not equal to 1 |
| 68 | +! |
| 69 | + ix = 1_ip |
| 70 | + iy = 1_ip |
| 71 | + if (incx < 0_ip) ix = (-n + 1_ip)*incx + 1_ip |
| 72 | + if (incy < 0_ip) iy = (-n + 1_ip)*incy + 1_ip |
| 73 | + do i = 1_ip, n |
| 74 | + dy(iy) = dy(iy) + da*dx(ix) |
| 75 | + ix = ix + incx |
| 76 | + iy = iy + incy |
| 77 | + end do |
| 78 | + end if |
| 79 | + |
| 80 | + end subroutine daxpy |
| 81 | + |
| 82 | + subroutine dcopy(n, dx, incx, dy, incy) |
| 83 | + !! DCOPY copies a vector, x, to a vector, y. |
| 84 | + !! uses unrolled loops for increments equal to 1. |
| 85 | + |
| 86 | + integer(ip) :: incx, incy, n |
| 87 | + real(wp) :: dx(*), dy(*) |
| 88 | + |
| 89 | + integer(ip) :: i, ix, iy, m, mp1 |
| 90 | + |
| 91 | + if (n <= 0_ip) return |
| 92 | + if (incx == 1_ip .and. incy == 1_ip) then |
| 93 | +! |
| 94 | +! code for both increments equal to 1 |
| 95 | +! |
| 96 | +! |
| 97 | +! clean-up loop |
| 98 | +! |
| 99 | + m = mod(n, 7_ip) |
| 100 | + if (m /= 0_ip) then |
| 101 | + do i = 1_ip, m |
| 102 | + dy(i) = dx(i) |
| 103 | + end do |
| 104 | + if (n < 7_ip) return |
| 105 | + end if |
| 106 | + mp1 = m + 1_ip |
| 107 | + do i = mp1, n, 7_ip |
| 108 | + dy(i) = dx(i) |
| 109 | + dy(i + 1_ip) = dx(i + 1_ip) |
| 110 | + dy(i + 2_ip) = dx(i + 2_ip) |
| 111 | + dy(i + 3_ip) = dx(i + 3_ip) |
| 112 | + dy(i + 4_ip) = dx(i + 4_ip) |
| 113 | + dy(i + 5_ip) = dx(i + 5_ip) |
| 114 | + dy(i + 6_ip) = dx(i + 6_ip) |
| 115 | + end do |
| 116 | + else |
| 117 | +! |
| 118 | +! code for unequal increments or equal increments |
| 119 | +! not equal to 1 |
| 120 | +! |
| 121 | + ix = 1_ip |
| 122 | + iy = 1_ip |
| 123 | + if (incx < 0_ip) ix = (-n + 1_ip)*incx + 1_ip |
| 124 | + if (incy < 0_ip) iy = (-n + 1_ip)*incy + 1_ip |
| 125 | + do i = 1_ip, n |
| 126 | + dy(iy) = dx(ix) |
| 127 | + ix = ix + incx |
| 128 | + iy = iy + incy |
| 129 | + end do |
| 130 | + end if |
| 131 | + |
| 132 | + end subroutine dcopy |
| 133 | + |
| 134 | + subroutine dscal(n, da, dx, incx) |
| 135 | + !! DSCAL scales a vector by a constant. |
| 136 | + !! uses unrolled loops for increment equal to 1. |
| 137 | + |
| 138 | + real(wp) :: da |
| 139 | + integer(ip) :: incx, n |
| 140 | + real(wp) :: dx(*) |
| 141 | + |
| 142 | + integer i, m, mp1, nincx |
| 143 | + |
| 144 | + if (n <= 0_ip .or. incx <= 0_ip) return |
| 145 | + if (incx == 1_ip) then |
| 146 | +! |
| 147 | +! code for increment equal to 1 |
| 148 | +! |
| 149 | +! |
| 150 | +! clean-up loop |
| 151 | +! |
| 152 | + m = mod(n, 5_ip) |
| 153 | + if (m /= 0_ip) then |
| 154 | + do i = 1_ip, m |
| 155 | + dx(i) = da*dx(i) |
| 156 | + end do |
| 157 | + if (n < 5_ip) return |
| 158 | + end if |
| 159 | + mp1 = m + 1_ip |
| 160 | + do i = mp1, n, 5_ip |
| 161 | + dx(i) = da*dx(i) |
| 162 | + dx(i + 1_ip) = da*dx(i + 1_ip) |
| 163 | + dx(i + 2_ip) = da*dx(i + 2_ip) |
| 164 | + dx(i + 3_ip) = da*dx(i + 3_ip) |
| 165 | + dx(i + 4_ip) = da*dx(i + 4_ip) |
| 166 | + end do |
| 167 | + else |
| 168 | +! |
| 169 | +! code for increment not equal to 1 |
| 170 | +! |
| 171 | + nincx = n*incx |
| 172 | + do i = 1_ip, nincx, incx |
| 173 | + dx(i) = da*dx(i) |
| 174 | + end do |
| 175 | + end if |
| 176 | + |
| 177 | + end subroutine dscal |
| 178 | + |
| 179 | + subroutine dswap(n, dx, incx, dy, incy) |
| 180 | + !! DSWAP interchanges two vectors. |
| 181 | + !! uses unrolled loops for increments equal to 1. |
| 182 | + |
| 183 | + integer(ip) :: incx, incy, n |
| 184 | + real(wp) :: dx(*), dy(*) |
| 185 | + |
| 186 | + real(wp) :: dtemp |
| 187 | + integer(ip) :: i, ix, iy, m, mp1 |
| 188 | + |
| 189 | + if (n <= 0_ip) return |
| 190 | + if (incx == 1_ip .and. incy == 1_ip) then |
| 191 | +! |
| 192 | +! code for both increments equal to 1 |
| 193 | +! |
| 194 | +! |
| 195 | +! clean-up loop |
| 196 | +! |
| 197 | + m = mod(n, 3_ip) |
| 198 | + if (m /= 0_ip) then |
| 199 | + do i = 1_ip, m |
| 200 | + dtemp = dx(i) |
| 201 | + dx(i) = dy(i) |
| 202 | + dy(i) = dtemp |
| 203 | + end do |
| 204 | + if (n < 3_ip) return |
| 205 | + end if |
| 206 | + mp1 = m + 1_ip |
| 207 | + do i = mp1, n, 3_ip |
| 208 | + dtemp = dx(i) |
| 209 | + dx(i) = dy(i) |
| 210 | + dy(i) = dtemp |
| 211 | + dtemp = dx(i + 1_ip) |
| 212 | + dx(i + 1_ip) = dy(i + 1_ip) |
| 213 | + dy(i + 1_ip) = dtemp |
| 214 | + dtemp = dx(i + 2_ip) |
| 215 | + dx(i + 2_ip) = dy(i + 2_ip) |
| 216 | + dy(i + 2_ip) = dtemp |
| 217 | + end do |
| 218 | + else |
| 219 | +! |
| 220 | +! code for unequal increments or equal increments not equal |
| 221 | +! to 1 |
| 222 | +! |
| 223 | + ix = 1 |
| 224 | + iy = 1 |
| 225 | + if (incx < 0_ip) ix = (-n + 1_ip)*incx + 1_ip |
| 226 | + if (incy < 0_ip) iy = (-n + 1_ip)*incy + 1_ip |
| 227 | + do i = 1_ip, n |
| 228 | + dtemp = dx(ix) |
| 229 | + dx(ix) = dy(iy) |
| 230 | + dy(iy) = dtemp |
| 231 | + ix = ix + incx |
| 232 | + iy = iy + incy |
| 233 | + end do |
| 234 | + end if |
| 235 | + |
| 236 | + end subroutine dswap |
| 237 | + |
| 238 | + real(wp) function ddot(n, dx, incx, dy, incy) |
| 239 | + !! ddot forms the dot product of two vectors. |
| 240 | + !! uses unrolled loops for increments equal to one. |
| 241 | + |
| 242 | + integer(ip) :: incx, incy, n |
| 243 | + real(wp) :: dx(*), dy(*) |
| 244 | + |
| 245 | + real(wp) :: dtemp |
| 246 | + integer(ip) :: i, ix, iy, m, mp1 |
| 247 | + |
| 248 | + ddot = 0.0_wp |
| 249 | + dtemp = 0.0_wp |
| 250 | + if (n <= 0_ip) return |
| 251 | + if (incx == 1_ip .and. incy == 1_ip) then |
| 252 | + ! code for both increments equal to 1 |
| 253 | + ! clean-up loop |
| 254 | + m = mod(n, 5_ip) |
| 255 | + if (m /= 0_ip) then |
| 256 | + do i = 1_ip, m |
| 257 | + dtemp = dtemp + dx(i)*dy(i) |
| 258 | + end do |
| 259 | + if (n < 5_ip) then |
| 260 | + ddot = dtemp |
| 261 | + return |
| 262 | + end if |
| 263 | + end if |
| 264 | + mp1 = m + 1_ip |
| 265 | + do i = mp1, n, 5_ip |
| 266 | + dtemp = dtemp + dx(i)*dy(i) + & |
| 267 | + dx(i + 1_ip)*dy(i + 1_ip) + dx(i + 2_ip)*dy(i + 2_ip) + & |
| 268 | + dx(i + 3_ip)*dy(i + 3_ip) + dx(i + 4_ip)*dy(i + 4_ip) |
| 269 | + end do |
| 270 | + else |
| 271 | + ! code for unequal increments or equal increments |
| 272 | + ! not equal to 1 |
| 273 | + ix = 1_ip |
| 274 | + iy = 1_ip |
| 275 | + if (incx < 0_ip) ix = (-n + 1_ip)*incx + 1_ip |
| 276 | + if (incy < 0_ip) iy = (-n + 1_ip)*incy + 1_ip |
| 277 | + do i = 1_ip, n |
| 278 | + dtemp = dtemp + dx(ix)*dy(iy) |
| 279 | + ix = ix + incx |
| 280 | + iy = iy + incy |
| 281 | + end do |
| 282 | + end if |
| 283 | + ddot = dtemp |
| 284 | + |
| 285 | + end function ddot |
| 286 | + |
| 287 | +#endif |
| 288 | + |
| 289 | + end module bspline_blas_module |
0 commit comments