30
30
! > The mathematical formulas used for C and S are
31
31
! >
32
32
! > sgn(x) = { x / |x|, x != 0
33
- ! > { 1, x = 0
33
+ ! > { 1, x = 0
34
34
! >
35
35
! > R = sgn(F) * sqrt(|F|**2 + |G|**2)
36
36
! >
37
37
! > C = |F| / sqrt(|F|**2 + |G|**2)
38
38
! >
39
39
! > S = sgn(F) * conjg(G) / sqrt(|F|**2 + |G|**2)
40
40
! >
41
+ ! > Special conditions:
42
+ ! > If G=0, then C=1 and S=0.
43
+ ! > If F=0, then C=0 and S is chosen so that R is real.
44
+ ! >
41
45
! > When F and G are real, the formulas simplify to C = F/R and
42
46
! > S = G/R, and the returned values of C, S, and R should be
43
- ! > identical to those returned by CLARTG .
47
+ ! > identical to those returned by SLARTG .
44
48
! >
45
49
! > The algorithm used to compute these quantities incorporates scaling
46
50
! > to avoid overflow or underflow in computing the square root of the
47
51
! > sum of squares.
48
52
! >
49
- ! > This is a faster version of the BLAS1 routine CROTG, except for
50
- ! > the following differences:
51
- ! > F and G are unchanged on return.
52
- ! > If G=0, then C=1 and S=0.
53
- ! > If F=0, then C=0 and S is chosen so that R is real.
53
+ ! > This is the same routine CROTG fom BLAS1, except that
54
+ ! > F and G are unchanged on return.
54
55
! >
55
56
! > Below, wp=>sp stands for single precision from LA_CONSTANTS module.
56
57
! > \endverbatim
91
92
! Authors:
92
93
! ========
93
94
!
94
- ! > \author Edward Anderson, Lockheed Martin
95
+ ! > \author Weslley Pereira, University of Colorado Denver, USA
95
96
!
96
- ! > \date August 2016
97
+ ! > \date December 2021
97
98
!
98
99
! > \ingroup OTHERauxiliary
99
100
!
100
- ! > \par Contributors:
101
- ! ==================
102
- ! >
103
- ! > Weslley Pereira, University of Colorado Denver, USA
104
- !
105
101
! > \par Further Details:
106
102
! =====================
107
103
! >
108
104
! > \verbatim
109
105
! >
106
+ ! > Based on the algorithm from
107
+ ! >
110
108
! > Anderson E. (2017)
111
109
! > Algorithm 978: Safe Scaling in the Level 1 BLAS
112
110
! > ACM Trans Math Softw 44:1--28
117
115
subroutine CLARTG ( f , g , c , s , r )
118
116
use LA_CONSTANTS, &
119
117
only: wp= >sp, zero= >szero, one= >sone, two= >stwo, czero, &
120
- rtmin = >srtmin, rtmax = >srtmax, safmin= >ssafmin, safmax= >ssafmax
118
+ safmin= >ssafmin, safmax= >ssafmax
121
119
!
122
120
! -- LAPACK auxiliary routine --
123
121
! -- LAPACK is a software package provided by Univ. of Tennessee, --
@@ -129,7 +127,7 @@ subroutine CLARTG( f, g, c, s, r )
129
127
complex (wp) f, g, r, s
130
128
! ..
131
129
! .. Local Scalars ..
132
- real (wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
130
+ real (wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmin, rtmax
133
131
complex (wp) :: fs, gs, t
134
132
! ..
135
133
! .. Intrinsic Functions ..
@@ -141,6 +139,9 @@ subroutine CLARTG( f, g, c, s, r )
141
139
! .. Statement Function definitions ..
142
140
ABSSQ( t ) = real ( t )** 2 + aimag ( t )** 2
143
141
! ..
142
+ ! .. Constants ..
143
+ rtmin = sqrt ( safmin )
144
+ ! ..
144
145
! .. Executable Statements ..
145
146
!
146
147
if ( g == czero ) then
@@ -149,30 +150,43 @@ subroutine CLARTG( f, g, c, s, r )
149
150
r = f
150
151
else if ( f == czero ) then
151
152
c = zero
152
- g1 = max ( abs (real (g)), abs (aimag (g)) )
153
- if ( g1 > rtmin .and. g1 < rtmax ) then
153
+ if ( real (g) == zero ) then
154
+ r = abs (aimag (g))
155
+ s = conjg ( g ) / r
156
+ elseif ( aimag (g) == zero ) then
157
+ r = abs (real (g))
158
+ s = conjg ( g ) / r
159
+ else
160
+ g1 = max ( abs (real (g)), abs (aimag (g)) )
161
+ rtmax = sqrt ( safmax/ 2 )
162
+ if ( g1 > rtmin .and. g1 < rtmax ) then
154
163
!
155
164
! Use unscaled algorithm
156
165
!
157
- g2 = ABSSQ( g )
158
- d = sqrt ( g2 )
159
- s = conjg ( g ) / d
160
- r = d
161
- else
166
+ ! The following two lines can be replaced by `d = abs( g )`.
167
+ ! This algorithm do not use the intrinsic complex abs.
168
+ g2 = ABSSQ( g )
169
+ d = sqrt ( g2 )
170
+ s = conjg ( g ) / d
171
+ r = d
172
+ else
162
173
!
163
174
! Use scaled algorithm
164
175
!
165
- u = min ( safmax, max ( safmin, g1 ) )
166
- uu = one / u
167
- gs = g* uu
168
- g2 = ABSSQ( gs )
169
- d = sqrt ( g2 )
170
- s = conjg ( gs ) / d
171
- r = d* u
176
+ u = min ( safmax, max ( safmin, g1 ) )
177
+ gs = g / u
178
+ ! The following two lines can be replaced by `d = abs( gs )`.
179
+ ! This algorithm do not use the intrinsic complex abs.
180
+ g2 = ABSSQ( gs )
181
+ d = sqrt ( g2 )
182
+ s = conjg ( gs ) / d
183
+ r = d* u
184
+ end if
172
185
end if
173
186
else
174
187
f1 = max ( abs (real (f)), abs (aimag (f)) )
175
188
g1 = max ( abs (real (g)), abs (aimag (g)) )
189
+ rtmax = sqrt ( safmax/ 4 )
176
190
if ( f1 > rtmin .and. f1 < rtmax .and. &
177
191
g1 > rtmin .and. g1 < rtmax ) then
178
192
!
@@ -181,52 +195,95 @@ subroutine CLARTG( f, g, c, s, r )
181
195
f2 = ABSSQ( f )
182
196
g2 = ABSSQ( g )
183
197
h2 = f2 + g2
184
- if ( f2 > rtmin .and. h2 < rtmax ) then
185
- d = sqrt ( f2* h2 )
198
+ ! safmin <= f2 <= h2 <= safmax
199
+ if ( f2 >= h2 * safmin ) then
200
+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
201
+ c = sqrt ( f2 / h2 )
202
+ r = f / c
203
+ rtmax = rtmax * 2
204
+ if ( f2 > rtmin .and. h2 < rtmax ) then
205
+ ! safmin <= sqrt( f2*h2 ) <= safmax
206
+ s = conjg ( g ) * ( f / sqrt ( f2* h2 ) )
207
+ else
208
+ s = conjg ( g ) * ( r / h2 )
209
+ end if
186
210
else
187
- d = sqrt ( f2 )* sqrt ( h2 )
211
+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
212
+ ! Moreover,
213
+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
214
+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
215
+ ! Also,
216
+ ! g2 >> f2, which means that h2 = g2.
217
+ d = sqrt ( f2 * h2 )
218
+ c = f2 / d
219
+ if ( c >= safmin ) then
220
+ r = f / c
221
+ else
222
+ ! f2 / sqrt(f2 * h2) < safmin, then
223
+ ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
224
+ r = f * ( h2 / d )
225
+ end if
226
+ s = conjg ( g ) * ( f / d )
188
227
end if
189
- p = 1 / d
190
- c = f2* p
191
- s = conjg ( g )* ( f* p )
192
- r = f* ( h2* p )
193
228
else
194
229
!
195
230
! Use scaled algorithm
196
231
!
197
232
u = min ( safmax, max ( safmin, f1, g1 ) )
198
- uu = one / u
199
- gs = g* uu
233
+ gs = g / u
200
234
g2 = ABSSQ( gs )
201
- if ( f1* uu < rtmin ) then
235
+ if ( f1 / u < rtmin ) then
202
236
!
203
237
! f is not well-scaled when scaled by g1.
204
238
! Use a different scaling for f.
205
239
!
206
240
v = min ( safmax, max ( safmin, f1 ) )
207
- vv = one / v
208
- w = v * uu
209
- fs = f* vv
241
+ w = v / u
242
+ fs = f / v
210
243
f2 = ABSSQ( fs )
211
244
h2 = f2* w** 2 + g2
212
245
else
213
246
!
214
247
! Otherwise use the same scaling for f and g.
215
248
!
216
249
w = one
217
- fs = f* uu
250
+ fs = f / u
218
251
f2 = ABSSQ( fs )
219
252
h2 = f2 + g2
220
253
end if
221
- if ( f2 > rtmin .and. h2 < rtmax ) then
222
- d = sqrt ( f2* h2 )
254
+ ! safmin <= f2 <= h2 <= safmax
255
+ if ( f2 >= h2 * safmin ) then
256
+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
257
+ c = sqrt ( f2 / h2 )
258
+ r = fs / c
259
+ rtmax = rtmax * 2
260
+ if ( f2 > rtmin .and. h2 < rtmax ) then
261
+ ! safmin <= sqrt( f2*h2 ) <= safmax
262
+ s = conjg ( gs ) * ( fs / sqrt ( f2* h2 ) )
263
+ else
264
+ s = conjg ( gs ) * ( r / h2 )
265
+ end if
223
266
else
224
- d = sqrt ( f2 )* sqrt ( h2 )
267
+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
268
+ ! Moreover,
269
+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
270
+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
271
+ ! Also,
272
+ ! g2 >> f2, which means that h2 = g2.
273
+ d = sqrt ( f2 * h2 )
274
+ c = f2 / d
275
+ if ( c >= safmin ) then
276
+ r = fs / c
277
+ else
278
+ ! f2 / sqrt(f2 * h2) < safmin, then
279
+ ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
280
+ r = fs * ( h2 / d )
281
+ end if
282
+ s = conjg ( gs ) * ( fs / d )
225
283
end if
226
- p = 1 / d
227
- c = ( f2* p )* w
228
- s = conjg ( gs )* ( fs* p )
229
- r = ( fs* ( h2* p ) )* u
284
+ ! Rescale c and r
285
+ c = c * w
286
+ r = r * u
230
287
end if
231
288
end if
232
289
return
0 commit comments