Skip to content

Commit fea1d4f

Browse files
authored
Merge pull request #4294 from martin-frbg/lapack909
Fix accumulation in LAPACK ?LASSQ (Reference-LAPACK PR 909)
2 parents 6f11992 + f6ec777 commit fea1d4f

File tree

4 files changed

+160
-164
lines changed

4 files changed

+160
-164
lines changed

lapack-netlib/SRC/classq.f90

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,15 @@
3434
!>
3535
!> \verbatim
3636
!>
37-
!> CLASSQ returns the values scl and smsq such that
37+
!> CLASSQ returns the values scale_out and sumsq_out such that
3838
!>
39-
!> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
39+
!> (scale_out**2)*sumsq_out = x( 1 )**2 +...+ x( n )**2 + (scale**2)*sumsq,
4040
!>
41-
!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is
41+
!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is
4242
!> assumed to be non-negative.
4343
!>
4444
!> scale and sumsq must be supplied in SCALE and SUMSQ and
45-
!> scl and smsq are overwritten on SCALE and SUMSQ respectively.
46-
!>
47-
!> If scale * sqrt( sumsq ) > tbig then
48-
!> we require: scale >= sqrt( TINY*EPS ) / sbig on entry,
49-
!> and if 0 < scale * sqrt( sumsq ) < tsml then
50-
!> we require: scale <= sqrt( HUGE ) / ssml on entry,
51-
!> where
52-
!> tbig -- upper threshold for values whose square is representable;
53-
!> sbig -- scaling constant for big numbers; \see la_constants.f90
54-
!> tsml -- lower threshold for values whose square is representable;
55-
!> ssml -- scaling constant for small numbers; \see la_constants.f90
56-
!> and
57-
!> TINY*EPS -- tiniest representable number;
58-
!> HUGE -- biggest representable number.
45+
!> scale_out and sumsq_out are overwritten on SCALE and SUMSQ respectively.
5946
!>
6047
!> \endverbatim
6148
!
@@ -72,7 +59,7 @@
7259
!> \verbatim
7360
!> X is COMPLEX array, dimension (1+(N-1)*abs(INCX))
7461
!> The vector for which a scaled sum of squares is computed.
75-
!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
62+
!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
7663
!> \endverbatim
7764
!>
7865
!> \param[in] INCX
@@ -82,24 +69,24 @@
8269
!> If INCX > 0, X(1+(i-1)*INCX) = x(i) for 1 <= i <= n
8370
!> If INCX < 0, X(1-(n-i)*INCX) = x(i) for 1 <= i <= n
8471
!> If INCX = 0, x isn't a vector so there is no need to call
85-
!> this subroutine. If you call it anyway, it will count x(1)
72+
!> this subroutine. If you call it anyway, it will count x(1)
8673
!> in the vector norm N times.
8774
!> \endverbatim
8875
!>
8976
!> \param[in,out] SCALE
9077
!> \verbatim
9178
!> SCALE is REAL
92-
!> On entry, the value scale in the equation above.
93-
!> On exit, SCALE is overwritten with scl , the scaling factor
79+
!> On entry, the value scale in the equation above.
80+
!> On exit, SCALE is overwritten by scale_out, the scaling factor
9481
!> for the sum of squares.
9582
!> \endverbatim
9683
!>
9784
!> \param[in,out] SUMSQ
9885
!> \verbatim
9986
!> SUMSQ is REAL
100-
!> On entry, the value sumsq in the equation above.
101-
!> On exit, SUMSQ is overwritten with smsq , the basic sum of
102-
!> squares from which scl has been factored out.
87+
!> On entry, the value sumsq in the equation above.
88+
!> On exit, SUMSQ is overwritten by sumsq_out, the basic sum of
89+
!> squares from which scale_out has been factored out.
10390
!> \endverbatim
10491
!
10592
! Authors:
@@ -130,10 +117,10 @@
130117
!>
131118
!> \endverbatim
132119
!
133-
!> \ingroup OTHERauxiliary
120+
!> \ingroup lassq
134121
!
135122
! =====================================================================
136-
subroutine CLASSQ( n, x, incx, scl, sumsq )
123+
subroutine CLASSQ( n, x, incx, scale, sumsq )
137124
use LA_CONSTANTS, &
138125
only: wp=>sp, zero=>szero, one=>sone, &
139126
sbig=>ssbig, ssml=>sssml, tbig=>stbig, tsml=>stsml
@@ -145,7 +132,7 @@ subroutine CLASSQ( n, x, incx, scl, sumsq )
145132
!
146133
! .. Scalar Arguments ..
147134
integer :: incx, n
148-
real(wp) :: scl, sumsq
135+
real(wp) :: scale, sumsq
149136
! ..
150137
! .. Array Arguments ..
151138
complex(wp) :: x(*)
@@ -158,10 +145,10 @@ subroutine CLASSQ( n, x, incx, scl, sumsq )
158145
!
159146
! Quick return if possible
160147
!
161-
if( LA_ISNAN(scl) .or. LA_ISNAN(sumsq) ) return
162-
if( sumsq == zero ) scl = one
163-
if( scl == zero ) then
164-
scl = one
148+
if( LA_ISNAN(scale) .or. LA_ISNAN(sumsq) ) return
149+
if( sumsq == zero ) scale = one
150+
if( scale == zero ) then
151+
scale = one
165152
sumsq = zero
166153
end if
167154
if (n <= 0) then
@@ -207,15 +194,27 @@ subroutine CLASSQ( n, x, incx, scl, sumsq )
207194
! Put the existing sum of squares into one of the accumulators
208195
!
209196
if( sumsq > zero ) then
210-
ax = scl*sqrt( sumsq )
197+
ax = scale*sqrt( sumsq )
211198
if (ax > tbig) then
212-
! We assume scl >= sqrt( TINY*EPS ) / sbig
213-
abig = abig + (scl*sbig)**2 * sumsq
199+
if (scale > one) then
200+
scale = scale * sbig
201+
abig = abig + scale * (scale * sumsq)
202+
else
203+
! sumsq > tbig^2 => (sbig * (sbig * sumsq)) is representable
204+
abig = abig + scale * (scale * (sbig * (sbig * sumsq)))
205+
end if
214206
else if (ax < tsml) then
215-
! We assume scl <= sqrt( HUGE ) / ssml
216-
if (notbig) asml = asml + (scl*ssml)**2 * sumsq
207+
if (notbig) then
208+
if (scale < one) then
209+
scale = scale * ssml
210+
asml = asml + scale * (scale * sumsq)
211+
else
212+
! sumsq < tsml^2 => (ssml * (ssml * sumsq)) is representable
213+
asml = asml + scale * (scale * (ssml * (ssml * sumsq)))
214+
end if
215+
end if
217216
else
218-
amed = amed + scl**2 * sumsq
217+
amed = amed + scale * (scale * sumsq)
219218
end if
220219
end if
221220
!
@@ -229,7 +228,7 @@ subroutine CLASSQ( n, x, incx, scl, sumsq )
229228
if (amed > zero .or. LA_ISNAN(amed)) then
230229
abig = abig + (amed*sbig)*sbig
231230
end if
232-
scl = one / sbig
231+
scale = one / sbig
233232
sumsq = abig
234233
else if (asml > zero) then
235234
!
@@ -245,17 +244,17 @@ subroutine CLASSQ( n, x, incx, scl, sumsq )
245244
ymin = asml
246245
ymax = amed
247246
end if
248-
scl = one
247+
scale = one
249248
sumsq = ymax**2*( one + (ymin/ymax)**2 )
250249
else
251-
scl = one / ssml
250+
scale = one / ssml
252251
sumsq = asml
253252
end if
254253
else
255254
!
256255
! Otherwise all values are mid-range or zero
257256
!
258-
scl = one
257+
scale = one
259258
sumsq = amed
260259
end if
261260
return

lapack-netlib/SRC/dlassq.f90

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,15 @@
3434
!>
3535
!> \verbatim
3636
!>
37-
!> DLASSQ returns the values scl and smsq such that
37+
!> DLASSQ returns the values scale_out and sumsq_out such that
3838
!>
39-
!> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
39+
!> (scale_out**2)*sumsq_out = x( 1 )**2 +...+ x( n )**2 + (scale**2)*sumsq,
4040
!>
41-
!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is
41+
!> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is
4242
!> assumed to be non-negative.
4343
!>
4444
!> scale and sumsq must be supplied in SCALE and SUMSQ and
45-
!> scl and smsq are overwritten on SCALE and SUMSQ respectively.
46-
!>
47-
!> If scale * sqrt( sumsq ) > tbig then
48-
!> we require: scale >= sqrt( TINY*EPS ) / sbig on entry,
49-
!> and if 0 < scale * sqrt( sumsq ) < tsml then
50-
!> we require: scale <= sqrt( HUGE ) / ssml on entry,
51-
!> where
52-
!> tbig -- upper threshold for values whose square is representable;
53-
!> sbig -- scaling constant for big numbers; \see la_constants.f90
54-
!> tsml -- lower threshold for values whose square is representable;
55-
!> ssml -- scaling constant for small numbers; \see la_constants.f90
56-
!> and
57-
!> TINY*EPS -- tiniest representable number;
58-
!> HUGE -- biggest representable number.
45+
!> scale_out and sumsq_out are overwritten on SCALE and SUMSQ respectively.
5946
!>
6047
!> \endverbatim
6148
!
@@ -72,7 +59,7 @@
7259
!> \verbatim
7360
!> X is DOUBLE PRECISION array, dimension (1+(N-1)*abs(INCX))
7461
!> The vector for which a scaled sum of squares is computed.
75-
!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
62+
!> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
7663
!> \endverbatim
7764
!>
7865
!> \param[in] INCX
@@ -82,24 +69,24 @@
8269
!> If INCX > 0, X(1+(i-1)*INCX) = x(i) for 1 <= i <= n
8370
!> If INCX < 0, X(1-(n-i)*INCX) = x(i) for 1 <= i <= n
8471
!> If INCX = 0, x isn't a vector so there is no need to call
85-
!> this subroutine. If you call it anyway, it will count x(1)
72+
!> this subroutine. If you call it anyway, it will count x(1)
8673
!> in the vector norm N times.
8774
!> \endverbatim
8875
!>
8976
!> \param[in,out] SCALE
9077
!> \verbatim
9178
!> SCALE is DOUBLE PRECISION
92-
!> On entry, the value scale in the equation above.
93-
!> On exit, SCALE is overwritten with scl , the scaling factor
79+
!> On entry, the value scale in the equation above.
80+
!> On exit, SCALE is overwritten by scale_out, the scaling factor
9481
!> for the sum of squares.
9582
!> \endverbatim
9683
!>
9784
!> \param[in,out] SUMSQ
9885
!> \verbatim
9986
!> SUMSQ is DOUBLE PRECISION
100-
!> On entry, the value sumsq in the equation above.
101-
!> On exit, SUMSQ is overwritten with smsq , the basic sum of
102-
!> squares from which scl has been factored out.
87+
!> On entry, the value sumsq in the equation above.
88+
!> On exit, SUMSQ is overwritten by sumsq_out, the basic sum of
89+
!> squares from which scale_out has been factored out.
10390
!> \endverbatim
10491
!
10592
! Authors:
@@ -130,10 +117,10 @@
130117
!>
131118
!> \endverbatim
132119
!
133-
!> \ingroup OTHERauxiliary
120+
!> \ingroup lassq
134121
!
135122
! =====================================================================
136-
subroutine DLASSQ( n, x, incx, scl, sumsq )
123+
subroutine DLASSQ( n, x, incx, scale, sumsq )
137124
use LA_CONSTANTS, &
138125
only: wp=>dp, zero=>dzero, one=>done, &
139126
sbig=>dsbig, ssml=>dssml, tbig=>dtbig, tsml=>dtsml
@@ -145,7 +132,7 @@ subroutine DLASSQ( n, x, incx, scl, sumsq )
145132
!
146133
! .. Scalar Arguments ..
147134
integer :: incx, n
148-
real(wp) :: scl, sumsq
135+
real(wp) :: scale, sumsq
149136
! ..
150137
! .. Array Arguments ..
151138
real(wp) :: x(*)
@@ -158,10 +145,10 @@ subroutine DLASSQ( n, x, incx, scl, sumsq )
158145
!
159146
! Quick return if possible
160147
!
161-
if( LA_ISNAN(scl) .or. LA_ISNAN(sumsq) ) return
162-
if( sumsq == zero ) scl = one
163-
if( scl == zero ) then
164-
scl = one
148+
if( LA_ISNAN(scale) .or. LA_ISNAN(sumsq) ) return
149+
if( sumsq == zero ) scale = one
150+
if( scale == zero ) then
151+
scale = one
165152
sumsq = zero
166153
end if
167154
if (n <= 0) then
@@ -198,15 +185,27 @@ subroutine DLASSQ( n, x, incx, scl, sumsq )
198185
! Put the existing sum of squares into one of the accumulators
199186
!
200187
if( sumsq > zero ) then
201-
ax = scl*sqrt( sumsq )
188+
ax = scale*sqrt( sumsq )
202189
if (ax > tbig) then
203-
! We assume scl >= sqrt( TINY*EPS ) / sbig
204-
abig = abig + (scl*sbig)**2 * sumsq
190+
if (scale > one) then
191+
scale = scale * sbig
192+
abig = abig + scale * (scale * sumsq)
193+
else
194+
! sumsq > tbig^2 => (sbig * (sbig * sumsq)) is representable
195+
abig = abig + scale * (scale * (sbig * (sbig * sumsq)))
196+
end if
205197
else if (ax < tsml) then
206-
! We assume scl <= sqrt( HUGE ) / ssml
207-
if (notbig) asml = asml + (scl*ssml)**2 * sumsq
198+
if (notbig) then
199+
if (scale < one) then
200+
scale = scale * ssml
201+
asml = asml + scale * (scale * sumsq)
202+
else
203+
! sumsq < tsml^2 => (ssml * (ssml * sumsq)) is representable
204+
asml = asml + scale * (scale * (ssml * (ssml * sumsq)))
205+
end if
206+
end if
208207
else
209-
amed = amed + scl**2 * sumsq
208+
amed = amed + scale * (scale * sumsq)
210209
end if
211210
end if
212211
!
@@ -220,7 +219,7 @@ subroutine DLASSQ( n, x, incx, scl, sumsq )
220219
if (amed > zero .or. LA_ISNAN(amed)) then
221220
abig = abig + (amed*sbig)*sbig
222221
end if
223-
scl = one / sbig
222+
scale = one / sbig
224223
sumsq = abig
225224
else if (asml > zero) then
226225
!
@@ -236,17 +235,17 @@ subroutine DLASSQ( n, x, incx, scl, sumsq )
236235
ymin = asml
237236
ymax = amed
238237
end if
239-
scl = one
238+
scale = one
240239
sumsq = ymax**2*( one + (ymin/ymax)**2 )
241240
else
242-
scl = one / ssml
241+
scale = one / ssml
243242
sumsq = asml
244243
end if
245244
else
246245
!
247246
! Otherwise all values are mid-range or zero
248247
!
249-
scl = one
248+
scale = one
250249
sumsq = amed
251250
end if
252251
return

0 commit comments

Comments
 (0)