1
- ! > \brief \b CROTG
1
+ ! > \brief \b CROTG generates a Givens rotation with real cosine and complex sine.
2
2
!
3
3
! =========== DOCUMENTATION ===========
4
4
!
24
24
! > = 1 if x = 0
25
25
! > c = |a| / sqrt(|a|**2 + |b|**2)
26
26
! > s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2)
27
- ! > When a and b are real and r /= 0, the formulas simplify to
28
27
! > r = sgn(a)*sqrt(|a|**2 + |b|**2)
28
+ ! > When a and b are real and r /= 0, the formulas simplify to
29
29
! > c = a / r
30
30
! > s = b / r
31
- ! > the same as in CROTG when |a| > |b|. When |b| >= |a|, the
32
- ! > sign of c and s will be different from those computed by CROTG
31
+ ! > the same as in SROTG when |a| > |b|. When |b| >= |a|, the
32
+ ! > sign of c and s will be different from those computed by SROTG
33
33
! > if the signs of a and b are not the same.
34
34
! >
35
35
! > \endverbatim
65
65
! Authors:
66
66
! ========
67
67
!
68
- ! > \author Edward Anderson, Lockheed Martin
68
+ ! > \author Weslley Pereira, University of Colorado Denver, USA
69
69
!
70
- ! > \par Contributors:
71
- ! ==================
72
- ! >
73
- ! > Weslley Pereira, University of Colorado Denver, USA
70
+ ! > \date December 2021
74
71
!
75
- ! > \ingroup single_blas_level1
72
+ ! > \ingroup OTHERauxiliary
76
73
!
77
74
! > \par Further Details:
78
75
! =====================
79
76
! >
80
77
! > \verbatim
81
78
! >
79
+ ! > Based on the algorithm from
80
+ ! >
82
81
! > Anderson E. (2017)
83
82
! > Algorithm 978: Safe Scaling in the Level 1 BLAS
84
83
! > ACM Trans Math Softw 44:1--28
@@ -108,21 +107,14 @@ subroutine CROTG( a, b, c, s )
108
107
1 - minexponent (0._wp ), &
109
108
maxexponent (0._wp )- 1 &
110
109
)
111
- real (wp), parameter :: rtmin = sqrt ( real (radix (0._wp ),wp)** max ( &
112
- minexponent (0._wp )- 1 , &
113
- 1 - maxexponent (0._wp ) &
114
- ) / epsilon (0._wp ) )
115
- real (wp), parameter :: rtmax = sqrt ( real (radix (0._wp ),wp)** max ( &
116
- 1 - minexponent (0._wp ), &
117
- maxexponent (0._wp )- 1 &
118
- ) * epsilon (0._wp ) )
110
+ real (wp), parameter :: rtmin = sqrt ( safmin )
119
111
! ..
120
112
! .. Scalar Arguments ..
121
113
real (wp) :: c
122
114
complex (wp) :: a, b, s
123
115
! ..
124
116
! .. Local Scalars ..
125
- real (wp) :: d, f1, f2, g1, g2, h2, u, v, w
117
+ real (wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax
126
118
complex (wp) :: f, fs, g, gs, r, t
127
119
! ..
128
120
! .. Intrinsic Functions ..
@@ -145,6 +137,7 @@ subroutine CROTG( a, b, c, s )
145
137
else if ( f == czero ) then
146
138
c = zero
147
139
g1 = max ( abs (real (g)), abs (aimag (g)) )
140
+ rtmax = sqrt ( safmax/ 2 )
148
141
if ( g1 > rtmin .and. g1 < rtmax ) then
149
142
!
150
143
! Use unscaled algorithm
@@ -165,6 +158,7 @@ subroutine CROTG( a, b, c, s )
165
158
else
166
159
f1 = max ( abs (real (f)), abs (aimag (f)) )
167
160
g1 = max ( abs (real (g)), abs (aimag (g)) )
161
+ rtmax = sqrt ( safmax/ 4 )
168
162
if ( f1 > rtmin .and. f1 < rtmax .and. &
169
163
g1 > rtmin .and. g1 < rtmax ) then
170
164
!
@@ -173,14 +167,36 @@ subroutine CROTG( a, b, c, s )
173
167
f2 = ABSSQ( f )
174
168
g2 = ABSSQ( g )
175
169
h2 = f2 + g2
176
- if ( f2 > rtmin .and. h2 < rtmax ) then
177
- d = sqrt ( f2* h2 )
170
+ ! safmin <= f2 <= h2 <= safmax
171
+ if ( f2 >= h2 * safmin ) then
172
+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
173
+ c = sqrt ( f2 / h2 )
174
+ r = f / c
175
+ rtmax = rtmax * 2
176
+ if ( f2 > rtmin .and. h2 < rtmax ) then
177
+ ! safmin <= sqrt( f2*h2 ) <= safmax
178
+ s = conjg ( g ) * ( f / sqrt ( f2* h2 ) )
179
+ else
180
+ s = conjg ( g ) * ( r / h2 )
181
+ end if
178
182
else
179
- d = sqrt ( f2 )* sqrt ( h2 )
183
+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
184
+ ! Moreover,
185
+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
186
+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
187
+ ! Also,
188
+ ! g2 >> f2, which means that h2 = g2.
189
+ d = sqrt ( f2 * h2 )
190
+ c = f2 / d
191
+ if ( c >= safmin ) then
192
+ r = f / c
193
+ else
194
+ ! f2 / sqrt(f2 * h2) < safmin, then
195
+ ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
196
+ r = f * ( h2 / d )
197
+ end if
198
+ s = conjg ( g ) * ( f / d )
180
199
end if
181
- c = f2 / d
182
- s = conjg ( g )* ( f / d )
183
- r = f* ( h2 / d )
184
200
else
185
201
!
186
202
! Use scaled algorithm
@@ -207,14 +223,39 @@ subroutine CROTG( a, b, c, s )
207
223
f2 = ABSSQ( fs )
208
224
h2 = f2 + g2
209
225
end if
210
- if ( f2 > rtmin .and. h2 < rtmax ) then
211
- d = sqrt ( f2* h2 )
226
+ ! safmin <= f2 <= h2 <= safmax
227
+ if ( f2 >= h2 * safmin ) then
228
+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
229
+ c = sqrt ( f2 / h2 )
230
+ r = fs / c
231
+ rtmax = rtmax * 2
232
+ if ( f2 > rtmin .and. h2 < rtmax ) then
233
+ ! safmin <= sqrt( f2*h2 ) <= safmax
234
+ s = conjg ( gs ) * ( fs / sqrt ( f2* h2 ) )
235
+ else
236
+ s = conjg ( gs ) * ( r / h2 )
237
+ end if
212
238
else
213
- d = sqrt ( f2 )* sqrt ( h2 )
239
+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
240
+ ! Moreover,
241
+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
242
+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
243
+ ! Also,
244
+ ! g2 >> f2, which means that h2 = g2.
245
+ d = sqrt ( f2 * h2 )
246
+ c = f2 / d
247
+ if ( c >= safmin ) then
248
+ r = fs / c
249
+ else
250
+ ! f2 / sqrt(f2 * h2) < safmin, then
251
+ ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
252
+ r = fs * ( h2 / d )
253
+ end if
254
+ s = conjg ( gs ) * ( fs / d )
214
255
end if
215
- c = ( f2 / d ) * w
216
- s = conjg ( gs ) * ( fs / d )
217
- r = ( fs * ( h2 / d ) ) * u
256
+ ! Rescale c and r
257
+ c = c * w
258
+ r = r * u
218
259
end if
219
260
end if
220
261
a = r
0 commit comments