Skip to content

Commit 0b68dd6

Browse files
authored
Merge pull request #3834 from martin-frbg/lapack631
Use new algorithms for computing Givens rotations (Reference-LAPACK PR631)
2 parents 9343499 + 7ae4269 commit 0b68dd6

File tree

4 files changed

+247
-133
lines changed

4 files changed

+247
-133
lines changed

lapack-netlib/SRC/clartg.f90

Lines changed: 108 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -30,27 +30,28 @@
3030
!> The mathematical formulas used for C and S are
3131
!>
3232
!> sgn(x) = { x / |x|, x != 0
33-
!> { 1, x = 0
33+
!> { 1, x = 0
3434
!>
3535
!> R = sgn(F) * sqrt(|F|**2 + |G|**2)
3636
!>
3737
!> C = |F| / sqrt(|F|**2 + |G|**2)
3838
!>
3939
!> S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2)
4040
!>
41+
!> Special conditions:
42+
!> If G=0, then C=1 and S=0.
43+
!> If F=0, then C=0 and S is chosen so that R is real.
44+
!>
4145
!> When F and G are real, the formulas simplify to C = F/R and
4246
!> S = G/R, and the returned values of C, S, and R should be
43-
!> identical to those returned by CLARTG.
47+
!> identical to those returned by SLARTG.
4448
!>
4549
!> The algorithm used to compute these quantities incorporates scaling
4650
!> to avoid overflow or underflow in computing the square root of the
4751
!> sum of squares.
4852
!>
49-
!> This is a faster version of the BLAS1 routine CROTG, except for
50-
!> the following differences:
51-
!> F and G are unchanged on return.
52-
!> If G=0, then C=1 and S=0.
53-
!> If F=0, then C=0 and S is chosen so that R is real.
53+
!> This is the same routine CROTG fom BLAS1, except that
54+
!> F and G are unchanged on return.
5455
!>
5556
!> Below, wp=>sp stands for single precision from LA_CONSTANTS module.
5657
!> \endverbatim
@@ -91,22 +92,19 @@
9192
! Authors:
9293
! ========
9394
!
94-
!> \author Edward Anderson, Lockheed Martin
95+
!> \author Weslley Pereira, University of Colorado Denver, USA
9596
!
96-
!> \date August 2016
97+
!> \date December 2021
9798
!
9899
!> \ingroup OTHERauxiliary
99100
!
100-
!> \par Contributors:
101-
! ==================
102-
!>
103-
!> Weslley Pereira, University of Colorado Denver, USA
104-
!
105101
!> \par Further Details:
106102
! =====================
107103
!>
108104
!> \verbatim
109105
!>
106+
!> Based on the algorithm from
107+
!>
110108
!> Anderson E. (2017)
111109
!> Algorithm 978: Safe Scaling in the Level 1 BLAS
112110
!> ACM Trans Math Softw 44:1--28
@@ -117,7 +115,7 @@
117115
subroutine CLARTG( f, g, c, s, r )
118116
use LA_CONSTANTS, &
119117
only: wp=>sp, zero=>szero, one=>sone, two=>stwo, czero, &
120-
rtmin=>srtmin, rtmax=>srtmax, safmin=>ssafmin, safmax=>ssafmax
118+
safmin=>ssafmin, safmax=>ssafmax
121119
!
122120
! -- LAPACK auxiliary routine --
123121
! -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -129,7 +127,7 @@ subroutine CLARTG( f, g, c, s, r )
129127
complex(wp) f, g, r, s
130128
! ..
131129
! .. Local Scalars ..
132-
real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
130+
real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax
133131
complex(wp) :: fs, gs, t
134132
! ..
135133
! .. Intrinsic Functions ..
@@ -141,6 +139,9 @@ subroutine CLARTG( f, g, c, s, r )
141139
! .. Statement Function definitions ..
142140
ABSSQ( t ) = real( t )**2 + aimag( t )**2
143141
! ..
142+
! .. Constants ..
143+
rtmin = sqrt( safmin )
144+
! ..
144145
! .. Executable Statements ..
145146
!
146147
if( g == czero ) then
@@ -149,30 +150,43 @@ subroutine CLARTG( f, g, c, s, r )
149150
r = f
150151
else if( f == czero ) then
151152
c = zero
152-
g1 = max( abs(real(g)), abs(aimag(g)) )
153-
if( g1 > rtmin .and. g1 < rtmax ) then
153+
if( real(g) == zero ) then
154+
r = abs(aimag(g))
155+
s = conjg( g ) / r
156+
elseif( aimag(g) == zero ) then
157+
r = abs(real(g))
158+
s = conjg( g ) / r
159+
else
160+
g1 = max( abs(real(g)), abs(aimag(g)) )
161+
rtmax = sqrt( safmax/2 )
162+
if( g1 > rtmin .and. g1 < rtmax ) then
154163
!
155164
! Use unscaled algorithm
156165
!
157-
g2 = ABSSQ( g )
158-
d = sqrt( g2 )
159-
s = conjg( g ) / d
160-
r = d
161-
else
166+
! The following two lines can be replaced by `d = abs( g )`.
167+
! This algorithm do not use the intrinsic complex abs.
168+
g2 = ABSSQ( g )
169+
d = sqrt( g2 )
170+
s = conjg( g ) / d
171+
r = d
172+
else
162173
!
163174
! Use scaled algorithm
164175
!
165-
u = min( safmax, max( safmin, g1 ) )
166-
uu = one / u
167-
gs = g*uu
168-
g2 = ABSSQ( gs )
169-
d = sqrt( g2 )
170-
s = conjg( gs ) / d
171-
r = d*u
176+
u = min( safmax, max( safmin, g1 ) )
177+
gs = g / u
178+
! The following two lines can be replaced by `d = abs( gs )`.
179+
! This algorithm do not use the intrinsic complex abs.
180+
g2 = ABSSQ( gs )
181+
d = sqrt( g2 )
182+
s = conjg( gs ) / d
183+
r = d*u
184+
end if
172185
end if
173186
else
174187
f1 = max( abs(real(f)), abs(aimag(f)) )
175188
g1 = max( abs(real(g)), abs(aimag(g)) )
189+
rtmax = sqrt( safmax/4 )
176190
if( f1 > rtmin .and. f1 < rtmax .and. &
177191
g1 > rtmin .and. g1 < rtmax ) then
178192
!
@@ -181,52 +195,95 @@ subroutine CLARTG( f, g, c, s, r )
181195
f2 = ABSSQ( f )
182196
g2 = ABSSQ( g )
183197
h2 = f2 + g2
184-
if( f2 > rtmin .and. h2 < rtmax ) then
185-
d = sqrt( f2*h2 )
198+
! safmin <= f2 <= h2 <= safmax
199+
if( f2 >= h2 * safmin ) then
200+
! safmin <= f2/h2 <= 1, and h2/f2 is finite
201+
c = sqrt( f2 / h2 )
202+
r = f / c
203+
rtmax = rtmax * 2
204+
if( f2 > rtmin .and. h2 < rtmax ) then
205+
! safmin <= sqrt( f2*h2 ) <= safmax
206+
s = conjg( g ) * ( f / sqrt( f2*h2 ) )
207+
else
208+
s = conjg( g ) * ( r / h2 )
209+
end if
186210
else
187-
d = sqrt( f2 )*sqrt( h2 )
211+
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
212+
! Moreover,
213+
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
214+
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
215+
! Also,
216+
! g2 >> f2, which means that h2 = g2.
217+
d = sqrt( f2 * h2 )
218+
c = f2 / d
219+
if( c >= safmin ) then
220+
r = f / c
221+
else
222+
! f2 / sqrt(f2 * h2) < safmin, then
223+
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
224+
r = f * ( h2 / d )
225+
end if
226+
s = conjg( g ) * ( f / d )
188227
end if
189-
p = 1 / d
190-
c = f2*p
191-
s = conjg( g )*( f*p )
192-
r = f*( h2*p )
193228
else
194229
!
195230
! Use scaled algorithm
196231
!
197232
u = min( safmax, max( safmin, f1, g1 ) )
198-
uu = one / u
199-
gs = g*uu
233+
gs = g / u
200234
g2 = ABSSQ( gs )
201-
if( f1*uu < rtmin ) then
235+
if( f1 / u < rtmin ) then
202236
!
203237
! f is not well-scaled when scaled by g1.
204238
! Use a different scaling for f.
205239
!
206240
v = min( safmax, max( safmin, f1 ) )
207-
vv = one / v
208-
w = v * uu
209-
fs = f*vv
241+
w = v / u
242+
fs = f / v
210243
f2 = ABSSQ( fs )
211244
h2 = f2*w**2 + g2
212245
else
213246
!
214247
! Otherwise use the same scaling for f and g.
215248
!
216249
w = one
217-
fs = f*uu
250+
fs = f / u
218251
f2 = ABSSQ( fs )
219252
h2 = f2 + g2
220253
end if
221-
if( f2 > rtmin .and. h2 < rtmax ) then
222-
d = sqrt( f2*h2 )
254+
! safmin <= f2 <= h2 <= safmax
255+
if( f2 >= h2 * safmin ) then
256+
! safmin <= f2/h2 <= 1, and h2/f2 is finite
257+
c = sqrt( f2 / h2 )
258+
r = fs / c
259+
rtmax = rtmax * 2
260+
if( f2 > rtmin .and. h2 < rtmax ) then
261+
! safmin <= sqrt( f2*h2 ) <= safmax
262+
s = conjg( gs ) * ( fs / sqrt( f2*h2 ) )
263+
else
264+
s = conjg( gs ) * ( r / h2 )
265+
end if
223266
else
224-
d = sqrt( f2 )*sqrt( h2 )
267+
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
268+
! Moreover,
269+
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
270+
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
271+
! Also,
272+
! g2 >> f2, which means that h2 = g2.
273+
d = sqrt( f2 * h2 )
274+
c = f2 / d
275+
if( c >= safmin ) then
276+
r = fs / c
277+
else
278+
! f2 / sqrt(f2 * h2) < safmin, then
279+
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
280+
r = fs * ( h2 / d )
281+
end if
282+
s = conjg( gs ) * ( fs / d )
225283
end if
226-
p = 1 / d
227-
c = ( f2*p )*w
228-
s = conjg( gs )*( fs*p )
229-
r = ( fs*( h2*p ) )*u
284+
! Rescale c and r
285+
c = c * w
286+
r = r * u
230287
end if
231288
end if
232289
return

lapack-netlib/SRC/dlartg.f90

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
! SUBROUTINE DLARTG( F, G, C, S, R )
1212
!
1313
! .. Scalar Arguments ..
14-
! REAL(wp) C, F, G, R, S
14+
! REAL(wp) C, F, G, R, S
1515
! ..
1616
!
1717
!> \par Purpose:
@@ -45,8 +45,6 @@
4545
!> floating point operations (saves work in DBDSQR when
4646
!> there are zeros on the diagonal).
4747
!>
48-
!> If F exceeds G in magnitude, C will be positive.
49-
!>
5048
!> Below, wp=>dp stands for double precision from LA_CONSTANTS module.
5149
!> \endverbatim
5250
!
@@ -112,7 +110,7 @@
112110
subroutine DLARTG( f, g, c, s, r )
113111
use LA_CONSTANTS, &
114112
only: wp=>dp, zero=>dzero, half=>dhalf, one=>done, &
115-
rtmin=>drtmin, rtmax=>drtmax, safmin=>dsafmin, safmax=>dsafmax
113+
safmin=>dsafmin, safmax=>dsafmax
116114
!
117115
! -- LAPACK auxiliary routine --
118116
! -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -123,11 +121,15 @@ subroutine DLARTG( f, g, c, s, r )
123121
real(wp) :: c, f, g, r, s
124122
! ..
125123
! .. Local Scalars ..
126-
real(wp) :: d, f1, fs, g1, gs, p, u, uu
124+
real(wp) :: d, f1, fs, g1, gs, u, rtmin, rtmax
127125
! ..
128126
! .. Intrinsic Functions ..
129127
intrinsic :: abs, sign, sqrt
130128
! ..
129+
! .. Constants ..
130+
rtmin = sqrt( safmin )
131+
rtmax = sqrt( safmax/2 )
132+
! ..
131133
! .. Executable Statements ..
132134
!
133135
f1 = abs( f )
@@ -143,20 +145,18 @@ subroutine DLARTG( f, g, c, s, r )
143145
else if( f1 > rtmin .and. f1 < rtmax .and. &
144146
g1 > rtmin .and. g1 < rtmax ) then
145147
d = sqrt( f*f + g*g )
146-
p = one / d
147-
c = f1*p
148-
s = g*sign( p, f )
148+
c = f1 / d
149149
r = sign( d, f )
150+
s = g / r
150151
else
151152
u = min( safmax, max( safmin, f1, g1 ) )
152-
uu = one / u
153-
fs = f*uu
154-
gs = g*uu
153+
fs = f / u
154+
gs = g / u
155155
d = sqrt( fs*fs + gs*gs )
156-
p = one / d
157-
c = abs( fs )*p
158-
s = gs*sign( p, f )
159-
r = sign( d, f )*u
156+
c = abs( fs ) / d
157+
r = sign( d, f )
158+
s = gs / r
159+
r = r*u
160160
end if
161161
return
162162
end subroutine

0 commit comments

Comments
 (0)