Skip to content

Commit 415e140

Browse files
Merge branch 'master' into try-xLARTG-from-Edward-Anderson
2 parents ffb4666 + 5b07c5e commit 415e140

22 files changed

+1878
-1184
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ script:
5959
-DLAPACKE:BOOL=ON
6060
-DBUILD_TESTING=ON
6161
-DLAPACKE_WITH_TMG:BOOL=ON
62-
-DBUILD_SHARED_LIBS=BOOL:ON
62+
-DBUILD_SHARED_LIBS:BOOL=ON
6363
-DCMAKE_Fortran_FLAGS:STRING="-fimplicit-none -frecursive -fcheck=all"
6464
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
6565
${SRC_DIR}

BLAS/SRC/CMakeLists.txt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,22 @@
2828
#---------------------------------------------------------
2929
# Level 1 BLAS
3030
#---------------------------------------------------------
31-
set(SBLAS1 isamax.f sasum.f saxpy.f scopy.f sdot.f snrm2.f
31+
32+
set(SBLAS1 isamax.f sasum.f saxpy.f scopy.f sdot.f snrm2.f90
3233
srot.f srotg.f90 sscal.f sswap.f sdsdot.f srotmg.f srotm.f)
3334

34-
set(CBLAS1 scabs1.f scasum.f scnrm2.f icamax.f caxpy.f ccopy.f
35+
set(CBLAS1 scabs1.f scasum.f scnrm2.f90 icamax.f caxpy.f ccopy.f
3536
cdotc.f cdotu.f csscal.f crotg.f90 cscal.f cswap.f csrot.f)
3637

37-
set(DBLAS1 idamax.f dasum.f daxpy.f dcopy.f ddot.f dnrm2.f
38+
set(DBLAS1 idamax.f dasum.f daxpy.f dcopy.f ddot.f dnrm2.f90
3839
drot.f drotg.f90 dscal.f dsdot.f dswap.f drotmg.f drotm.f)
3940

40-
set(ZBLAS1 dcabs1.f dzasum.f dznrm2.f izamax.f zaxpy.f zcopy.f
41+
set(ZBLAS1 dcabs1.f dzasum.f dznrm2.f90 izamax.f zaxpy.f zcopy.f
4142
zdotc.f zdotu.f zdscal.f zrotg.f90 zscal.f zswap.f zdrot.f)
4243

43-
set(CB1AUX isamax.f sasum.f saxpy.f scopy.f snrm2.f sscal.f)
44+
set(CB1AUX isamax.f sasum.f saxpy.f scopy.f snrm2.f90 sscal.f)
4445

45-
set(ZB1AUX idamax.f dasum.f daxpy.f dcopy.f dnrm2.f dscal.f)
46+
set(ZB1AUX idamax.f dasum.f daxpy.f dcopy.f dnrm2.f90 dscal.f)
4647

4748
#---------------------------------------------------------------------
4849
# Auxiliary routines needed by both the Level 2 and Level 3 BLAS

BLAS/SRC/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@
5656
TOPSRCDIR = ../..
5757
include $(TOPSRCDIR)/make.inc
5858

59-
.SUFFIXES: .f90 .o
59+
.SUFFIXES: .F .f90 .o
60+
.F.o:
61+
$(FC) $(FFLAGS) -c -o $@ $<
6062
.f90.o:
6163
$(FC) $(FFLAGS) -c -o $@ $<
6264

BLAS/SRC/dnrm2.f

Lines changed: 0 additions & 129 deletions
This file was deleted.

BLAS/SRC/dnrm2.f90

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
!> \brief \b DNRM2
2+
!
3+
! =========== DOCUMENTATION ===========
4+
!
5+
! Online html documentation available at
6+
! http://www.netlib.org/lapack/explore-html/
7+
!
8+
! Definition:
9+
! ===========
10+
!
11+
! DOUBLE PRECISION FUNCTION DNRM2(N,X,INCX)
12+
!
13+
! .. Scalar Arguments ..
14+
! INTEGER INCX,N
15+
! ..
16+
! .. Array Arguments ..
17+
! DOUBLE PRECISION X(*)
18+
! ..
19+
!
20+
!
21+
!> \par Purpose:
22+
! =============
23+
!>
24+
!> \verbatim
25+
!>
26+
!> DNRM2 returns the euclidean norm of a vector via the function
27+
!> name, so that
28+
!>
29+
!> DNRM2 := sqrt( x'*x )
30+
!> \endverbatim
31+
!
32+
! Arguments:
33+
! ==========
34+
!
35+
!> \param[in] N
36+
!> \verbatim
37+
!> N is INTEGER
38+
!> number of elements in input vector(s)
39+
!> \endverbatim
40+
!>
41+
!> \param[in] X
42+
!> \verbatim
43+
!> X is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) )
44+
!> \endverbatim
45+
!>
46+
!> \param[in] INCX
47+
!> \verbatim
48+
!> INCX is INTEGER, storage spacing between elements of X
49+
!> If INCX > 0, X(1+(i-1)*INCX) = x(i) for 1 <= i <= n
50+
!> If INCX < 0, X(1-(n-i)*INCX) = x(i) for 1 <= i <= n
51+
!> If INCX = 0, x isn't a vector so there is no need to call
52+
!> this subroutine. If you call it anyway, it will count x(1)
53+
!> in the vector norm N times.
54+
!> \endverbatim
55+
!
56+
! Authors:
57+
! ========
58+
!
59+
!> \author Edward Anderson, Lockheed Martin
60+
!
61+
!> \date August 2016
62+
!
63+
!> \ingroup single_blas_level1
64+
!
65+
!> \par Contributors:
66+
! ==================
67+
!>
68+
!> Weslley Pereira, University of Colorado Denver, USA
69+
!
70+
!> \par Further Details:
71+
! =====================
72+
!>
73+
!> \verbatim
74+
!>
75+
!> Anderson E. (2017)
76+
!> Algorithm 978: Safe Scaling in the Level 1 BLAS
77+
!> ACM Trans Math Softw 44:1--28
78+
!> https://doi.org/10.1145/3061665
79+
!>
80+
!> Blue, James L. (1978)
81+
!> A Portable Fortran Program to Find the Euclidean Norm of a Vector
82+
!> ACM Trans Math Softw 4:15--23
83+
!> https://doi.org/10.1145/355769.355771
84+
!>
85+
!> \endverbatim
86+
!>
87+
! =====================================================================
88+
function DNRM2( n, x, incx )
89+
integer, parameter :: wp = kind(1.d0)
90+
real(wp) :: DNRM2
91+
!
92+
! -- Reference BLAS level1 routine (version 3.9.1) --
93+
! -- Reference BLAS is a software package provided by Univ. of Tennessee, --
94+
! -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
95+
! March 2021
96+
!
97+
! .. Constants ..
98+
real(wp), parameter :: zero = 0.0_wp
99+
real(wp), parameter :: one = 1.0_wp
100+
real(wp), parameter :: maxN = huge(0.0_wp)
101+
! ..
102+
! .. Blue's ccaling constants ..
103+
real(wp), parameter :: tsml = real(radix(0._wp), wp)**ceiling( &
104+
(minexponent(0._wp) - 1) * 0.5_wp)
105+
real(wp), parameter :: tbig = real(radix(0._wp), wp)**floor( &
106+
(maxexponent(0._wp) - digits(0._wp) + 1) * 0.5_wp)
107+
real(wp), parameter :: ssml = real(radix(0._wp), wp)**( - floor( &
108+
(minexponent(0._wp) - 1) * 0.5_wp))
109+
real(wp), parameter :: sbig = real(radix(0._wp), wp)**( - ceiling( &
110+
(maxexponent(0._wp) - digits(0._wp) + 1) * 0.5_wp))
111+
! ..
112+
! .. Scalar Arguments ..
113+
integer :: incx, n
114+
! ..
115+
! .. Array Arguments ..
116+
real(wp) :: x(*)
117+
! ..
118+
! .. Local Scalars ..
119+
integer :: i, ix
120+
logical :: notbig
121+
real(wp) :: abig, amed, asml, ax, scl, sumsq, ymax, ymin
122+
!
123+
! Quick return if possible
124+
!
125+
DNRM2 = zero
126+
if( n <= 0 ) return
127+
!
128+
scl = one
129+
sumsq = zero
130+
!
131+
! Compute the sum of squares in 3 accumulators:
132+
! abig -- sums of squares scaled down to avoid overflow
133+
! asml -- sums of squares scaled up to avoid underflow
134+
! amed -- sums of squares that do not require scaling
135+
! The thresholds and multipliers are
136+
! tbig -- values bigger than this are scaled down by sbig
137+
! tsml -- values smaller than this are scaled up by ssml
138+
!
139+
notbig = .true.
140+
asml = zero
141+
amed = zero
142+
abig = zero
143+
ix = 1
144+
if( incx < 0 ) ix = 1 - (n-1)*incx
145+
do i = 1, n
146+
ax = abs(x(ix))
147+
if (ax > tbig) then
148+
abig = abig + (ax*sbig)**2
149+
notbig = .false.
150+
else if (ax < tsml) then
151+
if (notbig) asml = asml + (ax*ssml)**2
152+
else
153+
amed = amed + ax**2
154+
end if
155+
ix = ix + incx
156+
end do
157+
!
158+
! Combine abig and amed or amed and asml if more than one
159+
! accumulator was used.
160+
!
161+
if (abig > zero) then
162+
!
163+
! Combine abig and amed if abig > 0.
164+
!
165+
if ( (amed > zero) .or. (amed > maxN) .or. (amed /= amed) ) then
166+
abig = abig + (amed*sbig)*sbig
167+
end if
168+
scl = one / sbig
169+
sumsq = abig
170+
else if (asml > zero) then
171+
!
172+
! Combine amed and asml if asml > 0.
173+
!
174+
if ( (amed > zero) .or. (amed > maxN) .or. (amed /= amed) ) then
175+
amed = sqrt(amed)
176+
asml = sqrt(asml) / ssml
177+
if (asml > amed) then
178+
ymin = amed
179+
ymax = asml
180+
else
181+
ymin = asml
182+
ymax = amed
183+
end if
184+
scl = one
185+
sumsq = ymax**2*( one + (ymin/ymax)**2 )
186+
else
187+
scl = one / ssml
188+
sumsq = asml
189+
end if
190+
else
191+
!
192+
! Otherwise all values are mid-range
193+
!
194+
scl = one
195+
sumsq = amed
196+
end if
197+
DNRM2 = scl*sqrt( sumsq )
198+
return
199+
end function

0 commit comments

Comments
 (0)