1
- ! > \brief \b CROTG
1
+ ! > \brief \b CROTG generates a Givens rotation with real cosine and complex sine.
2
2
!
3
3
! =========== DOCUMENTATION ===========
4
4
!
24
24
! > = 1 if x = 0
25
25
! > c = |a| / sqrt(|a|**2 + |b|**2)
26
26
! > s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2)
27
- ! > When a and b are real and r /= 0, the formulas simplify to
28
27
! > r = sgn(a)*sqrt(|a|**2 + |b|**2)
28
+ ! > When a and b are real and r /= 0, the formulas simplify to
29
29
! > c = a / r
30
30
! > s = b / r
31
31
! > the same as in SROTG when |a| > |b|. When |b| >= |a|, the
65
65
! Authors:
66
66
! ========
67
67
!
68
- ! > \author Edward Anderson, Lockheed Martin
68
+ ! > \author Weslley Pereira, University of Colorado Denver, USA
69
69
!
70
- ! > \par Contributors:
71
- ! ==================
72
- ! >
73
- ! > Weslley Pereira, University of Colorado Denver, USA
70
+ ! > \date December 2021
74
71
!
75
72
! > \ingroup single_blas_level1
76
73
!
79
76
! >
80
77
! > \verbatim
81
78
! >
79
+ ! > Based on the algorithm from
80
+ ! >
82
81
! > Anderson E. (2017)
83
82
! > Algorithm 978: Safe Scaling in the Level 1 BLAS
84
83
! > ACM Trans Math Softw 44:1--28
@@ -108,21 +107,14 @@ subroutine CROTG( a, b, c, s )
108
107
1 - minexponent (0._wp ), &
109
108
maxexponent (0._wp )- 1 &
110
109
)
111
- real (wp), parameter :: rtmin = sqrt ( real (radix (0._wp ),wp)** max ( &
112
- minexponent (0._wp )- 1 , &
113
- 1 - maxexponent (0._wp ) &
114
- ) / epsilon (0._wp ) )
115
- real (wp), parameter :: rtmax = sqrt ( real (radix (0._wp ),wp)** max ( &
116
- 1 - minexponent (0._wp ), &
117
- maxexponent (0._wp )- 1 &
118
- ) * epsilon (0._wp ) )
110
+ real (wp), parameter :: rtmin = sqrt ( safmin )
119
111
! ..
120
112
! .. Scalar Arguments ..
121
113
real (wp) :: c
122
114
complex (wp) :: a, b, s
123
115
! ..
124
116
! .. Local Scalars ..
125
- real (wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
117
+ real (wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax
126
118
complex (wp) :: f, fs, g, gs, r, t
127
119
! ..
128
120
! .. Intrinsic Functions ..
@@ -144,30 +136,43 @@ subroutine CROTG( a, b, c, s )
144
136
r = f
145
137
else if ( f == czero ) then
146
138
c = zero
147
- g1 = max ( abs (real (g)), abs (aimag (g)) )
148
- if ( g1 > rtmin .and. g1 < rtmax ) then
139
+ if ( real (g) == zero ) then
140
+ r = abs (aimag (g))
141
+ s = conjg ( g ) / r
142
+ elseif ( aimag (g) == zero ) then
143
+ r = abs (real (g))
144
+ s = conjg ( g ) / r
145
+ else
146
+ g1 = max ( abs (real (g)), abs (aimag (g)) )
147
+ rtmax = sqrt ( safmax/ 2 )
148
+ if ( g1 > rtmin .and. g1 < rtmax ) then
149
149
!
150
150
! Use unscaled algorithm
151
151
!
152
- g2 = ABSSQ( g )
153
- d = sqrt ( g2 )
154
- s = conjg ( g ) / d
155
- r = d
156
- else
152
+ ! The following two lines can be replaced by `d = abs( g )`.
153
+ ! This algorithm do not use the intrinsic complex abs.
154
+ g2 = ABSSQ( g )
155
+ d = sqrt ( g2 )
156
+ s = conjg ( g ) / d
157
+ r = d
158
+ else
157
159
!
158
160
! Use scaled algorithm
159
161
!
160
- u = min ( safmax, max ( safmin, g1 ) )
161
- uu = one / u
162
- gs = g* uu
163
- g2 = ABSSQ( gs )
164
- d = sqrt ( g2 )
165
- s = conjg ( gs ) / d
166
- r = d* u
162
+ u = min ( safmax, max ( safmin, g1 ) )
163
+ gs = g / u
164
+ ! The following two lines can be replaced by `d = abs( gs )`.
165
+ ! This algorithm do not use the intrinsic complex abs.
166
+ g2 = ABSSQ( gs )
167
+ d = sqrt ( g2 )
168
+ s = conjg ( gs ) / d
169
+ r = d* u
170
+ end if
167
171
end if
168
172
else
169
173
f1 = max ( abs (real (f)), abs (aimag (f)) )
170
174
g1 = max ( abs (real (g)), abs (aimag (g)) )
175
+ rtmax = sqrt ( safmax/ 4 )
171
176
if ( f1 > rtmin .and. f1 < rtmax .and. &
172
177
g1 > rtmin .and. g1 < rtmax ) then
173
178
!
@@ -176,52 +181,95 @@ subroutine CROTG( a, b, c, s )
176
181
f2 = ABSSQ( f )
177
182
g2 = ABSSQ( g )
178
183
h2 = f2 + g2
179
- if ( f2 > rtmin .and. h2 < rtmax ) then
180
- d = sqrt ( f2* h2 )
184
+ ! safmin <= f2 <= h2 <= safmax
185
+ if ( f2 >= h2 * safmin ) then
186
+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
187
+ c = sqrt ( f2 / h2 )
188
+ r = f / c
189
+ rtmax = rtmax * 2
190
+ if ( f2 > rtmin .and. h2 < rtmax ) then
191
+ ! safmin <= sqrt( f2*h2 ) <= safmax
192
+ s = conjg ( g ) * ( f / sqrt ( f2* h2 ) )
193
+ else
194
+ s = conjg ( g ) * ( r / h2 )
195
+ end if
181
196
else
182
- d = sqrt ( f2 )* sqrt ( h2 )
197
+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
198
+ ! Moreover,
199
+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
200
+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
201
+ ! Also,
202
+ ! g2 >> f2, which means that h2 = g2.
203
+ d = sqrt ( f2 * h2 )
204
+ c = f2 / d
205
+ if ( c >= safmin ) then
206
+ r = f / c
207
+ else
208
+ ! f2 / sqrt(f2 * h2) < safmin, then
209
+ ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
210
+ r = f * ( h2 / d )
211
+ end if
212
+ s = conjg ( g ) * ( f / d )
183
213
end if
184
- p = 1 / d
185
- c = f2* p
186
- s = conjg ( g )* ( f* p )
187
- r = f* ( h2* p )
188
214
else
189
215
!
190
216
! Use scaled algorithm
191
217
!
192
218
u = min ( safmax, max ( safmin, f1, g1 ) )
193
- uu = one / u
194
- gs = g* uu
219
+ gs = g / u
195
220
g2 = ABSSQ( gs )
196
- if ( f1* uu < rtmin ) then
221
+ if ( f1 / u < rtmin ) then
197
222
!
198
223
! f is not well-scaled when scaled by g1.
199
224
! Use a different scaling for f.
200
225
!
201
226
v = min ( safmax, max ( safmin, f1 ) )
202
- vv = one / v
203
- w = v * uu
204
- fs = f* vv
227
+ w = v / u
228
+ fs = f / v
205
229
f2 = ABSSQ( fs )
206
230
h2 = f2* w** 2 + g2
207
231
else
208
232
!
209
233
! Otherwise use the same scaling for f and g.
210
234
!
211
235
w = one
212
- fs = f* uu
236
+ fs = f / u
213
237
f2 = ABSSQ( fs )
214
238
h2 = f2 + g2
215
239
end if
216
- if ( f2 > rtmin .and. h2 < rtmax ) then
217
- d = sqrt ( f2* h2 )
240
+ ! safmin <= f2 <= h2 <= safmax
241
+ if ( f2 >= h2 * safmin ) then
242
+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
243
+ c = sqrt ( f2 / h2 )
244
+ r = fs / c
245
+ rtmax = rtmax * 2
246
+ if ( f2 > rtmin .and. h2 < rtmax ) then
247
+ ! safmin <= sqrt( f2*h2 ) <= safmax
248
+ s = conjg ( gs ) * ( fs / sqrt ( f2* h2 ) )
249
+ else
250
+ s = conjg ( gs ) * ( r / h2 )
251
+ end if
218
252
else
219
- d = sqrt ( f2 )* sqrt ( h2 )
253
+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
254
+ ! Moreover,
255
+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
256
+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
257
+ ! Also,
258
+ ! g2 >> f2, which means that h2 = g2.
259
+ d = sqrt ( f2 * h2 )
260
+ c = f2 / d
261
+ if ( c >= safmin ) then
262
+ r = fs / c
263
+ else
264
+ ! f2 / sqrt(f2 * h2) < safmin, then
265
+ ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
266
+ r = fs * ( h2 / d )
267
+ end if
268
+ s = conjg ( gs ) * ( fs / d )
220
269
end if
221
- p = 1 / d
222
- c = ( f2* p )* w
223
- s = conjg ( gs )* ( fs* p )
224
- r = ( fs* ( h2* p ) )* u
270
+ ! Rescale c and r
271
+ c = c * w
272
+ r = r * u
225
273
end if
226
274
end if
227
275
a = r
0 commit comments