Skip to content

Commit c30e502

Browse files
Merge pull request #631 from weslleyspereira/fix-precision-in-clartgf90-2
New algorithms for computing Givens rotations
2 parents 5c5ddfd + c362fff commit c30e502

File tree

6 files changed

+445
-235
lines changed

6 files changed

+445
-235
lines changed

BLAS/SRC/crotg.f90

Lines changed: 99 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
!> \brief \b CROTG
1+
!> \brief \b CROTG generates a Givens rotation with real cosine and complex sine.
22
!
33
! =========== DOCUMENTATION ===========
44
!
@@ -24,8 +24,8 @@
2424
!> = 1 if x = 0
2525
!> c = |a| / sqrt(|a|**2 + |b|**2)
2626
!> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2)
27-
!> When a and b are real and r /= 0, the formulas simplify to
2827
!> r = sgn(a)*sqrt(|a|**2 + |b|**2)
28+
!> When a and b are real and r /= 0, the formulas simplify to
2929
!> c = a / r
3030
!> s = b / r
3131
!> the same as in SROTG when |a| > |b|. When |b| >= |a|, the
@@ -65,12 +65,9 @@
6565
! Authors:
6666
! ========
6767
!
68-
!> \author Edward Anderson, Lockheed Martin
68+
!> \author Weslley Pereira, University of Colorado Denver, USA
6969
!
70-
!> \par Contributors:
71-
! ==================
72-
!>
73-
!> Weslley Pereira, University of Colorado Denver, USA
70+
!> \date December 2021
7471
!
7572
!> \ingroup single_blas_level1
7673
!
@@ -79,6 +76,8 @@
7976
!>
8077
!> \verbatim
8178
!>
79+
!> Based on the algorithm from
80+
!>
8281
!> Anderson E. (2017)
8382
!> Algorithm 978: Safe Scaling in the Level 1 BLAS
8483
!> ACM Trans Math Softw 44:1--28
@@ -108,21 +107,14 @@ subroutine CROTG( a, b, c, s )
108107
1-minexponent(0._wp), &
109108
maxexponent(0._wp)-1 &
110109
)
111-
real(wp), parameter :: rtmin = sqrt( real(radix(0._wp),wp)**max( &
112-
minexponent(0._wp)-1, &
113-
1-maxexponent(0._wp) &
114-
) / epsilon(0._wp) )
115-
real(wp), parameter :: rtmax = sqrt( real(radix(0._wp),wp)**max( &
116-
1-minexponent(0._wp), &
117-
maxexponent(0._wp)-1 &
118-
) * epsilon(0._wp) )
110+
real(wp), parameter :: rtmin = sqrt( safmin )
119111
! ..
120112
! .. Scalar Arguments ..
121113
real(wp) :: c
122114
complex(wp) :: a, b, s
123115
! ..
124116
! .. Local Scalars ..
125-
real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
117+
real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax
126118
complex(wp) :: f, fs, g, gs, r, t
127119
! ..
128120
! .. Intrinsic Functions ..
@@ -144,30 +136,43 @@ subroutine CROTG( a, b, c, s )
144136
r = f
145137
else if( f == czero ) then
146138
c = zero
147-
g1 = max( abs(real(g)), abs(aimag(g)) )
148-
if( g1 > rtmin .and. g1 < rtmax ) then
139+
if( real(g) == zero ) then
140+
r = abs(aimag(g))
141+
s = conjg( g ) / r
142+
elseif( aimag(g) == zero ) then
143+
r = abs(real(g))
144+
s = conjg( g ) / r
145+
else
146+
g1 = max( abs(real(g)), abs(aimag(g)) )
147+
rtmax = sqrt( safmax/2 )
148+
if( g1 > rtmin .and. g1 < rtmax ) then
149149
!
150150
! Use unscaled algorithm
151151
!
152-
g2 = ABSSQ( g )
153-
d = sqrt( g2 )
154-
s = conjg( g ) / d
155-
r = d
156-
else
152+
! The following two lines can be replaced by `d = abs( g )`.
153+
! This algorithm do not use the intrinsic complex abs.
154+
g2 = ABSSQ( g )
155+
d = sqrt( g2 )
156+
s = conjg( g ) / d
157+
r = d
158+
else
157159
!
158160
! Use scaled algorithm
159161
!
160-
u = min( safmax, max( safmin, g1 ) )
161-
uu = one / u
162-
gs = g*uu
163-
g2 = ABSSQ( gs )
164-
d = sqrt( g2 )
165-
s = conjg( gs ) / d
166-
r = d*u
162+
u = min( safmax, max( safmin, g1 ) )
163+
gs = g / u
164+
! The following two lines can be replaced by `d = abs( gs )`.
165+
! This algorithm do not use the intrinsic complex abs.
166+
g2 = ABSSQ( gs )
167+
d = sqrt( g2 )
168+
s = conjg( gs ) / d
169+
r = d*u
170+
end if
167171
end if
168172
else
169173
f1 = max( abs(real(f)), abs(aimag(f)) )
170174
g1 = max( abs(real(g)), abs(aimag(g)) )
175+
rtmax = sqrt( safmax/4 )
171176
if( f1 > rtmin .and. f1 < rtmax .and. &
172177
g1 > rtmin .and. g1 < rtmax ) then
173178
!
@@ -176,52 +181,95 @@ subroutine CROTG( a, b, c, s )
176181
f2 = ABSSQ( f )
177182
g2 = ABSSQ( g )
178183
h2 = f2 + g2
179-
if( f2 > rtmin .and. h2 < rtmax ) then
180-
d = sqrt( f2*h2 )
184+
! safmin <= f2 <= h2 <= safmax
185+
if( f2 >= h2 * safmin ) then
186+
! safmin <= f2/h2 <= 1, and h2/f2 is finite
187+
c = sqrt( f2 / h2 )
188+
r = f / c
189+
rtmax = rtmax * 2
190+
if( f2 > rtmin .and. h2 < rtmax ) then
191+
! safmin <= sqrt( f2*h2 ) <= safmax
192+
s = conjg( g ) * ( f / sqrt( f2*h2 ) )
193+
else
194+
s = conjg( g ) * ( r / h2 )
195+
end if
181196
else
182-
d = sqrt( f2 )*sqrt( h2 )
197+
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
198+
! Moreover,
199+
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
200+
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
201+
! Also,
202+
! g2 >> f2, which means that h2 = g2.
203+
d = sqrt( f2 * h2 )
204+
c = f2 / d
205+
if( c >= safmin ) then
206+
r = f / c
207+
else
208+
! f2 / sqrt(f2 * h2) < safmin, then
209+
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
210+
r = f * ( h2 / d )
211+
end if
212+
s = conjg( g ) * ( f / d )
183213
end if
184-
p = 1 / d
185-
c = f2*p
186-
s = conjg( g )*( f*p )
187-
r = f*( h2*p )
188214
else
189215
!
190216
! Use scaled algorithm
191217
!
192218
u = min( safmax, max( safmin, f1, g1 ) )
193-
uu = one / u
194-
gs = g*uu
219+
gs = g / u
195220
g2 = ABSSQ( gs )
196-
if( f1*uu < rtmin ) then
221+
if( f1 / u < rtmin ) then
197222
!
198223
! f is not well-scaled when scaled by g1.
199224
! Use a different scaling for f.
200225
!
201226
v = min( safmax, max( safmin, f1 ) )
202-
vv = one / v
203-
w = v * uu
204-
fs = f*vv
227+
w = v / u
228+
fs = f / v
205229
f2 = ABSSQ( fs )
206230
h2 = f2*w**2 + g2
207231
else
208232
!
209233
! Otherwise use the same scaling for f and g.
210234
!
211235
w = one
212-
fs = f*uu
236+
fs = f / u
213237
f2 = ABSSQ( fs )
214238
h2 = f2 + g2
215239
end if
216-
if( f2 > rtmin .and. h2 < rtmax ) then
217-
d = sqrt( f2*h2 )
240+
! safmin <= f2 <= h2 <= safmax
241+
if( f2 >= h2 * safmin ) then
242+
! safmin <= f2/h2 <= 1, and h2/f2 is finite
243+
c = sqrt( f2 / h2 )
244+
r = fs / c
245+
rtmax = rtmax * 2
246+
if( f2 > rtmin .and. h2 < rtmax ) then
247+
! safmin <= sqrt( f2*h2 ) <= safmax
248+
s = conjg( gs ) * ( fs / sqrt( f2*h2 ) )
249+
else
250+
s = conjg( gs ) * ( r / h2 )
251+
end if
218252
else
219-
d = sqrt( f2 )*sqrt( h2 )
253+
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
254+
! Moreover,
255+
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
256+
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
257+
! Also,
258+
! g2 >> f2, which means that h2 = g2.
259+
d = sqrt( f2 * h2 )
260+
c = f2 / d
261+
if( c >= safmin ) then
262+
r = fs / c
263+
else
264+
! f2 / sqrt(f2 * h2) < safmin, then
265+
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
266+
r = fs * ( h2 / d )
267+
end if
268+
s = conjg( gs ) * ( fs / d )
220269
end if
221-
p = 1 / d
222-
c = ( f2*p )*w
223-
s = conjg( gs )*( fs*p )
224-
r = ( fs*( h2*p ) )*u
270+
! Rescale c and r
271+
c = c * w
272+
r = r * u
225273
end if
226274
end if
227275
a = r

0 commit comments

Comments
 (0)