Skip to content

Commit 43613a7

Browse files
Adds safe scaling xLARTG routines proposed in https://doi.org/10.1145/3061665; Let the compiler determine the Fortran layout by the file extension
1 parent 77a0ceb commit 43613a7

File tree

8 files changed

+596
-7
lines changed

8 files changed

+596
-7
lines changed

CMakeLists.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@ set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH})
1616
# Export all symbols on Windows when building shared libraries
1717
SET(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
1818

19-
# Tell CMake that our Fortran sources are written in fixed format.
20-
set(CMAKE_Fortran_FORMAT FIXED)
21-
2219
# Set a default build type if none was specified
2320
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
2421
message(STATUS "Setting build type to 'Release' as none was specified.")

SRC/CMakeLists.txt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F
4141
../INSTALL/slamch.f)
4242

4343
set(SCLAUX
44+
la_constants32.f90
4445
sbdsdc.f
4546
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
4647
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
@@ -49,7 +50,7 @@ set(SCLAUX
4950
slapy2.f slapy3.f slarnv.f
5051
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
5152
slarrk.f slarrr.f slaneg.f
52-
slartg.f slaruv.f slas2.f slascl.f
53+
slartg.f90 slaruv.f slas2.f slascl.f
5354
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
5455
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
5556
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
@@ -59,6 +60,7 @@ set(SCLAUX
5960
${SECOND_SRC})
6061

6162
set(DZLAUX
63+
la_constants.f90
6264
dbdsdc.f
6365
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
6466
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
@@ -67,7 +69,7 @@ set(DZLAUX
6769
dlapy2.f dlapy3.f dlarnv.f
6870
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
6971
dlarrk.f dlarrr.f dlaneg.f
70-
dlartg.f dlaruv.f dlas2.f dlascl.f
72+
dlartg.f90 dlaruv.f dlas2.f dlascl.f
7173
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
7274
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
7375
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
@@ -208,7 +210,7 @@ set(CLASRC
208210
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
209211
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
210212
clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f
211-
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
213+
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f90 clartv.f
212214
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
213215
claswp.f clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
214216
clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
@@ -403,7 +405,7 @@ set(ZLASRC
403405
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
404406
zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f
405407
zlarfg.f zlarfgp.f zlarft.f
406-
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
408+
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f90 zlartv.f
407409
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
408410
zlassq.f zlaswp.f zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f
409411
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlauu2.f

SRC/clartg.f90

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
subroutine CLARTG( f, g, c, s, r )
2+
use LA_CONSTANTS32, only: wp, zero, one, two, czero, rtmin, rtmax, &
3+
safmin, safmax
4+
!
5+
! LAPACK auxiliary routine
6+
! E. Anderson
7+
! August 4, 2016
8+
!
9+
! .. Scalar Arguments ..
10+
real(wp) c
11+
complex(wp) f, g, r, s
12+
! ..
13+
!
14+
! Purpose
15+
! =======
16+
!
17+
! CLARTG generates a plane rotation so that
18+
!
19+
! [ C S ] . [ F ] = [ R ]
20+
! [ -conjg(S) C ] [ G ] [ 0 ]
21+
!
22+
! where C is real and C**2 + |S|**2 = 1.
23+
!
24+
! The mathematical formulas used for C and S are
25+
!
26+
! sgn(x) = { x / |x|, x != 0
27+
! { 1, x = 0
28+
!
29+
! R = sgn(F) * sqrt(|F|**2 + |G|**2)
30+
!
31+
! C = |F| / sqrt(|F|**2 + |G|**2)
32+
!
33+
! S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2)
34+
!
35+
! When F and G are real, the formulas simplify to C = F/R and
36+
! S = G/R, and the returned values of C, S, and R should be
37+
! identical to those returned by SLARTG.
38+
!
39+
! The algorithm used to compute these quantities incorporates scaling
40+
! to avoid overflow or underflow in computing the square root of the
41+
! sum of squares.
42+
!
43+
! Arguments
44+
! =========
45+
!
46+
! F (input) COMPLEX
47+
! The first component of vector to be rotated.
48+
!
49+
! G (input) COMPLEX
50+
! The second component of vector to be rotated.
51+
!
52+
! C (output) REAL
53+
! The cosine of the rotation.
54+
!
55+
! S (output) COMPLEX
56+
! The sine of the rotation.
57+
!
58+
! R (output) COMPLEX
59+
! The nonzero component of the rotated vector.
60+
!
61+
! =====================================================================
62+
!
63+
! .. Local Scalars ..
64+
real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
65+
complex(wp) :: fs, gs, t
66+
! ..
67+
! .. Intrinsic Functions ..
68+
intrinsic :: abs, aimag, conjg, max, min, real, sqrt
69+
! ..
70+
! .. Statement Functions ..
71+
real(wp) :: ABSSQ
72+
! ..
73+
! .. Statement Function definitions ..
74+
ABSSQ( t ) = real( t )**2 + aimag( t )**2
75+
! ..
76+
! .. Executable Statements ..
77+
!
78+
if( g == czero ) then
79+
c = one
80+
s = czero
81+
r = f
82+
else if( f == czero ) then
83+
c = zero
84+
g1 = max( abs(real(g)), abs(aimag(g)) )
85+
if( g1 > rtmin .and. g1 < rtmax ) then
86+
!
87+
! Use unscaled algorithm
88+
!
89+
g2 = ABSSQ( g )
90+
d = sqrt( g2 )
91+
s = conjg( g ) / d
92+
r = d
93+
else
94+
!
95+
! Use scaled algorithm
96+
!
97+
u = min( safmax, max( safmin, g1 ) )
98+
uu = one / u
99+
gs = g*uu
100+
g2 = ABSSQ( gs )
101+
d = sqrt( g2 )
102+
s = conjg( gs ) / d
103+
r = d*u
104+
end if
105+
else
106+
f1 = max( abs(real(f)), abs(aimag(f)) )
107+
g1 = max( abs(real(g)), abs(aimag(g)) )
108+
if( f1 > rtmin .and. f1 < rtmax .and. &
109+
g1 > rtmin .and. g1 < rtmax ) then
110+
!
111+
! Use unscaled algorithm
112+
!
113+
f2 = ABSSQ( f )
114+
g2 = ABSSQ( g )
115+
h2 = f2 + g2
116+
if( f2 > rtmin .and. h2 < rtmax ) then
117+
d = sqrt( f2*h2 )
118+
else
119+
d = sqrt( f2 )*sqrt( h2 )
120+
end if
121+
p = 1 / d
122+
c = f2*p
123+
s = conjg( g )*( f*p )
124+
r = f*( h2*p )
125+
else
126+
!
127+
! Use scaled algorithm
128+
!
129+
u = min( safmax, max( safmin, f1, g1 ) )
130+
uu = one / u
131+
gs = g*uu
132+
g2 = ABSSQ( gs )
133+
if( f1*uu < rtmin ) then
134+
!
135+
! f is not well-scaled when scaled by g1.
136+
! Use a different scaling for f.
137+
!
138+
v = min( safmax, max( safmin, f1 ) )
139+
vv = one / v
140+
w = v * uu
141+
fs = f*vv
142+
f2 = ABSSQ( fs )
143+
h2 = f2*w**2 + g2
144+
else
145+
!
146+
! Otherwise use the same scaling for f and g.
147+
!
148+
w = one
149+
fs = f*uu
150+
f2 = ABSSQ( fs )
151+
h2 = f2 + g2
152+
end if
153+
if( f2 > rtmin .and. h2 < rtmax ) then
154+
d = sqrt( f2*h2 )
155+
else
156+
d = sqrt( f2 )*sqrt( h2 )
157+
end if
158+
p = 1 / d
159+
c = ( f2*p )*w
160+
s = conjg( gs )*( fs*p )
161+
r = ( fs*( h2*p ) )*u
162+
end if
163+
end if
164+
return
165+
end subroutine

SRC/dlartg.f90

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
subroutine DLARTG( f, g, c, s, r )
2+
use LA_CONSTANTS, only: wp, zero, half, one, rtmin, rtmax, safmin, safmax
3+
!
4+
! LAPACK auxiliary routine
5+
! E. Anderson
6+
! July 30, 2016
7+
!
8+
! .. Scalar Arguments ..
9+
real(wp) :: c, f, g, r, s
10+
! ..
11+
!
12+
! Purpose
13+
! =======
14+
!
15+
! DLARTG generates a plane rotation so that
16+
!
17+
! [ C S ] . [ F ] = [ R ]
18+
! [ -S C ] [ G ] [ 0 ]
19+
!
20+
! where C**2 + S**2 = 1.
21+
!
22+
! The mathematical formulas used for C and S are
23+
! R = sign(F) * sqrt(F**2 + G**2)
24+
! C = F / R
25+
! S = G / R
26+
! Hence C >= 0. The algorithm used to compute these quantities
27+
! incorporates scaling to avoid overflow or underflow in computing the
28+
! square root of the sum of squares.
29+
!
30+
! This version is discontinuous in R at F = 0 but it returns the same
31+
! C and S as CLARTG for complex inputs (F,0) and (G,0).
32+
!
33+
! Arguments
34+
! =========
35+
!
36+
! F (input) REAL
37+
! The first component of vector to be rotated.
38+
!
39+
! G (input) REAL
40+
! The second component of vector to be rotated.
41+
!
42+
! C (output) REAL
43+
! The cosine of the rotation.
44+
!
45+
! S (output) REAL
46+
! The sine of the rotation.
47+
!
48+
! R (output) REAL
49+
! The nonzero component of the rotated vector.
50+
!
51+
! =====================================================================
52+
!
53+
! .. Local Scalars ..
54+
real(wp) :: d, f1, fs, g1, gs, p, u, uu
55+
! ..
56+
! .. Intrinsic Functions ..
57+
intrinsic :: abs, sign, sqrt
58+
! ..
59+
! .. Executable Statements ..
60+
!
61+
f1 = abs( f )
62+
g1 = abs( g )
63+
if( g == zero ) then
64+
c = one
65+
s = zero
66+
r = f
67+
else if( f == zero ) then
68+
c = zero
69+
s = sign( one, g )
70+
r = g1
71+
else if( f1 > rtmin .and. f1 < rtmax .and. &
72+
g1 > rtmin .and. g1 < rtmax ) then
73+
d = sqrt( f*f + g*g )
74+
p = one / d
75+
c = f1*p
76+
s = g*sign( p, f )
77+
r = sign( d, f )
78+
else
79+
u = min( safmax, max( safmin, f1, g1 ) )
80+
uu = one / u
81+
fs = f*uu
82+
gs = g*uu
83+
d = sqrt( fs*fs + gs*gs )
84+
p = one / d
85+
c = abs( fs )*p
86+
s = gs*sign( p, f )
87+
r = sign( d, f )*u
88+
end if
89+
return
90+
end subroutine

SRC/la_constants.f90

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
module LA_CONSTANTS
2+
!
3+
! -- BLAS/LAPACK module --
4+
! May 06, 2016
5+
!
6+
! Standard constants
7+
!
8+
integer, parameter :: wp = 8
9+
real(wp), parameter :: zero = 0.0_wp
10+
real(wp), parameter :: half = 0.5_wp
11+
real(wp), parameter :: one = 1.0_wp
12+
real(wp), parameter :: two = 2.0_wp
13+
real(wp), parameter :: three = 3.0_wp
14+
real(wp), parameter :: four = 4.0_wp
15+
real(wp), parameter :: eight = 8.0_wp
16+
real(wp), parameter :: ten = 10.0_wp
17+
complex(wp), parameter :: czero = ( 0.0_wp, 0.0_wp )
18+
complex(wp), parameter :: chalf = ( 0.5_wp, 0.0_wp )
19+
complex(wp), parameter :: cone = ( 1.0_wp, 0.0_wp )
20+
character*1, parameter :: sprefix = 'D'
21+
character*1, parameter :: cprefix = 'Z'
22+
!
23+
! Model parameters
24+
!
25+
real(wp), parameter :: eps = 0.11102230246251565404E-015_wp
26+
real(wp), parameter :: ulp = 0.22204460492503130808E-015_wp
27+
real(wp), parameter :: safmin = 0.22250738585072013831E-307_wp
28+
real(wp), parameter :: safmax = 0.44942328371557897693E+308_wp
29+
real(wp), parameter :: smlnum = 0.10020841800044863890E-291_wp
30+
real(wp), parameter :: bignum = 0.99792015476735990583E+292_wp
31+
real(wp), parameter :: rtmin = 0.10010415475915504622E-145_wp
32+
real(wp), parameter :: rtmax = 0.99895953610111751404E+146_wp
33+
!
34+
! Blue's scaling constants
35+
!
36+
real(wp), parameter :: tsml = 0.14916681462400413487E-153_wp
37+
real(wp), parameter :: tbig = 0.19979190722022350281E+147_wp
38+
real(wp), parameter :: ssml = 0.44989137945431963828E+162_wp
39+
real(wp), parameter :: sbig = 0.11113793747425387417E-161_wp
40+
end module LA_CONSTANTS

0 commit comments

Comments
 (0)