Skip to content

Commit eef896a

Browse files
authored
Merge pull request #420 from thijssteel/shur-swap
Accuracy improvements to xtgex2
2 parents e56b31e + 7294b77 commit eef896a

File tree

13 files changed

+917
-87
lines changed

13 files changed

+917
-87
lines changed

SRC/ctgex2.f

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
220220
* .. Local Scalars ..
221221
LOGICAL STRONG, WEAK
222222
INTEGER I, M
223-
REAL CQ, CZ, EPS, SA, SB, SCALE, SMLNUM, SS, SUM,
224-
$ THRESH, WS
223+
REAL CQ, CZ, EPS, SA, SB, SCALE, SMLNUM, SUM,
224+
$ THRESHA, THRESHB
225225
COMPLEX CDUM, F, G, SQ, SZ
226226
* ..
227227
* .. Local Arrays ..
@@ -263,8 +263,12 @@ SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
263263
SUM = REAL( CONE )
264264
CALL CLACPY( 'Full', M, M, S, LDST, WORK, M )
265265
CALL CLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M )
266-
CALL CLASSQ( 2*M*M, WORK, 1, SCALE, SUM )
266+
CALL CLASSQ( M*M, WORK, 1, SCALE, SUM )
267267
SA = SCALE*SQRT( SUM )
268+
SCALE = DBLE( CZERO )
269+
SUM = DBLE( CONE )
270+
CALL CLASSQ( M*M, WORK(M*M+1), 1, SCALE, SUM )
271+
SB = SCALE*SQRT( SUM )
268272
*
269273
* THRES has been changed from
270274
* THRESH = MAX( TEN*EPS*SA, SMLNUM )
@@ -274,15 +278,16 @@ SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
274278
* "Bug" reported by Ondra Kamenik, confirmed by Julie Langou, fixed by
275279
* Jim Demmel and Guillaume Revy. See forum post 1783.
276280
*
277-
THRESH = MAX( TWENTY*EPS*SA, SMLNUM )
281+
THRESHA = MAX( TWENTY*EPS*SA, SMLNUM )
282+
THRESHB = MAX( TWENTY*EPS*SB, SMLNUM )
278283
*
279284
* Compute unitary QL and RQ that swap 1-by-1 and 1-by-1 blocks
280285
* using Givens rotations and perform the swap tentatively.
281286
*
282287
F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 )
283288
G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 )
284-
SA = ABS( S( 2, 2 ) )
285-
SB = ABS( T( 2, 2 ) )
289+
SA = ABS( S( 2, 2 ) ) * ABS( T( 1, 1 ) )
290+
SB = ABS( S( 1, 1 ) ) * ABS( T( 2, 2 ) )
286291
CALL CLARTG( G, F, CZ, SZ, CDUM )
287292
SZ = -SZ
288293
CALL CROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, CZ, CONJG( SZ ) )
@@ -295,10 +300,11 @@ SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
295300
CALL CROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, CQ, SQ )
296301
CALL CROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, CQ, SQ )
297302
*
298-
* Weak stability test: |S21| + |T21| <= O(EPS F-norm((S, T)))
303+
* Weak stability test: |S21| <= O(EPS F-norm((A)))
304+
* and |T21| <= O(EPS F-norm((B)))
299305
*
300-
WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) )
301-
WEAK = WS.LE.THRESH
306+
WEAK = ABS( S( 2, 1 ) ).LE.THRESHA .AND.
307+
$ ABS( T( 2, 1 ) ).LE.THRESHB
302308
IF( .NOT.WEAK )
303309
$ GO TO 20
304310
*
@@ -319,11 +325,15 @@ SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
319325
WORK( I+4 ) = WORK( I+4 ) - B( J1+I-1, J1 )
320326
WORK( I+6 ) = WORK( I+6 ) - B( J1+I-1, J1+1 )
321327
10 CONTINUE
322-
SCALE = REAL( CZERO )
323-
SUM = REAL( CONE )
324-
CALL CLASSQ( 2*M*M, WORK, 1, SCALE, SUM )
325-
SS = SCALE*SQRT( SUM )
326-
STRONG = SS.LE.THRESH
328+
SCALE = DBLE( CZERO )
329+
SUM = DBLE( CONE )
330+
CALL CLASSQ( M*M, WORK, 1, SCALE, SUM )
331+
SA = SCALE*SQRT( SUM )
332+
SCALE = DBLE( CZERO )
333+
SUM = DBLE( CONE )
334+
CALL CLASSQ( M*M, WORK(M*M+1), 1, SCALE, SUM )
335+
SB = SCALE*SQRT( SUM )
336+
STRONG = SA.LE.THRESHA .AND. SB.LE.THRESHB
327337
IF( .NOT.STRONG )
328338
$ GO TO 20
329339
END IF

SRC/dtgex2.f

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,11 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
250250
PARAMETER ( WANDS = .TRUE. )
251251
* ..
252252
* .. Local Scalars ..
253-
LOGICAL DTRONG, WEAK
253+
LOGICAL STRONG, WEAK
254254
INTEGER I, IDUM, LINFO, M
255-
DOUBLE PRECISION BQRA21, BRQA21, DDUM, DNORM, DSCALE, DSUM, EPS,
256-
$ F, G, SA, SB, SCALE, SMLNUM, SS, THRESH, WS
255+
DOUBLE PRECISION BQRA21, BRQA21, DDUM, DNORMA, DNORMB, DSCALE,
256+
$ DSUM, EPS, F, G, SA, SB, SCALE, SMLNUM,
257+
$ THRESHA, THRESHB
257258
* ..
258259
* .. Local Arrays ..
259260
INTEGER IWORK( LDST )
@@ -293,7 +294,7 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
293294
END IF
294295
*
295296
WEAK = .FALSE.
296-
DTRONG = .FALSE.
297+
STRONG = .FALSE.
297298
*
298299
* Make a local copy of selected block
299300
*
@@ -310,9 +311,12 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
310311
DSUM = ONE
311312
CALL DLACPY( 'Full', M, M, S, LDST, WORK, M )
312313
CALL DLASSQ( M*M, WORK, 1, DSCALE, DSUM )
314+
DNORMA = DSCALE*SQRT( DSUM )
315+
DSCALE = ZERO
316+
DSUM = ONE
313317
CALL DLACPY( 'Full', M, M, T, LDST, WORK, M )
314318
CALL DLASSQ( M*M, WORK, 1, DSCALE, DSUM )
315-
DNORM = DSCALE*SQRT( DSUM )
319+
DNORMB = DSCALE*SQRT( DSUM )
316320
*
317321
* THRES has been changed from
318322
* THRESH = MAX( TEN*EPS*SA, SMLNUM )
@@ -322,7 +326,8 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
322326
* "Bug" reported by Ondra Kamenik, confirmed by Julie Langou, fixed by
323327
* Jim Demmel and Guillaume Revy. See forum post 1783.
324328
*
325-
THRESH = MAX( TWENTY*EPS*DNORM, SMLNUM )
329+
THRESHA = MAX( TWENTY*EPS*DNORMA, SMLNUM )
330+
THRESHB = MAX( TWENTY*EPS*DNORMB, SMLNUM )
326331
*
327332
IF( M.EQ.2 ) THEN
328333
*
@@ -333,8 +338,8 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
333338
*
334339
F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 )
335340
G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 )
336-
SB = ABS( T( 2, 2 ) )
337-
SA = ABS( S( 2, 2 ) )
341+
SA = ABS( S( 2, 2 ) ) * ABS( T( 1, 1 ) )
342+
SB = ABS( S( 1, 1 ) ) * ABS( T( 2, 2 ) )
338343
CALL DLARTG( F, G, IR( 1, 2 ), IR( 1, 1 ), DDUM )
339344
IR( 2, 1 ) = -IR( 1, 2 )
340345
IR( 2, 2 ) = IR( 1, 1 )
@@ -356,18 +361,20 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
356361
LI( 2, 2 ) = LI( 1, 1 )
357362
LI( 1, 2 ) = -LI( 2, 1 )
358363
*
359-
* Weak stability test:
360-
* |S21| + |T21| <= O(EPS * F-norm((S, T)))
364+
* Weak stability test: |S21| <= O(EPS F-norm((A)))
365+
* and |T21| <= O(EPS F-norm((B)))
361366
*
362-
WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) )
363-
WEAK = WS.LE.THRESH
367+
WEAK = ABS( S( 2, 1 ) ) .LE. THRESHA .AND.
368+
$ ABS( T( 2, 1 ) ) .LE. THRESHB
364369
IF( .NOT.WEAK )
365370
$ GO TO 70
366371
*
367372
IF( WANDS ) THEN
368373
*
369374
* Strong stability test:
370-
* F-norm((A-QL**T*S*QR, B-QL**T*T*QR)) <= O(EPS*F-norm((A,B)))
375+
* F-norm((A-QL**H*S*QR)) <= O(EPS*F-norm((A)))
376+
* and
377+
* F-norm((B-QL**H*T*QR)) <= O(EPS*F-norm((B)))
371378
*
372379
CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ),
373380
$ M )
@@ -378,17 +385,20 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
378385
DSCALE = ZERO
379386
DSUM = ONE
380387
CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
388+
SA = DSCALE*SQRT( DSUM )
381389
*
382390
CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ),
383391
$ M )
384392
CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO,
385393
$ WORK, M )
386394
CALL DGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
387395
$ WORK( M*M+1 ), M )
396+
DSCALE = ZERO
397+
DSUM = ONE
388398
CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
389-
SS = DSCALE*SQRT( DSUM )
390-
DTRONG = SS.LE.THRESH
391-
IF( .NOT.DTRONG )
399+
SB = DSCALE*SQRT( DSUM )
400+
STRONG = SA.LE.THRESHA .AND. SB.LE.THRESHB
401+
IF( .NOT.STRONG )
392402
$ GO TO 70
393403
END IF
394404
*
@@ -439,6 +449,8 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
439449
$ IR( N2+1, N1+1 ), LDST, T, LDST, T( N1+1, N1+1 ),
440450
$ LDST, LI, LDST, SCALE, DSUM, DSCALE, IWORK, IDUM,
441451
$ LINFO )
452+
IF( LINFO.NE.0 )
453+
$ GO TO 70
442454
*
443455
* Compute orthogonal matrix QL:
444456
*
@@ -538,14 +550,14 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
538550
*
539551
* Decide which method to use.
540552
* Weak stability test:
541-
* F-norm(S21) <= O(EPS * F-norm((S, T)))
553+
* F-norm(S21) <= O(EPS * F-norm((S)))
542554
*
543-
IF( BQRA21.LE.BRQA21 .AND. BQRA21.LE.THRESH ) THEN
555+
IF( BQRA21.LE.BRQA21 .AND. BQRA21.LE.THRESHA ) THEN
544556
CALL DLACPY( 'F', M, M, SCPY, LDST, S, LDST )
545557
CALL DLACPY( 'F', M, M, TCPY, LDST, T, LDST )
546558
CALL DLACPY( 'F', M, M, IRCOP, LDST, IR, LDST )
547559
CALL DLACPY( 'F', M, M, LICOP, LDST, LI, LDST )
548-
ELSE IF( BRQA21.GE.THRESH ) THEN
560+
ELSE IF( BRQA21.GE.THRESHA ) THEN
549561
GO TO 70
550562
END IF
551563
*
@@ -556,7 +568,9 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
556568
IF( WANDS ) THEN
557569
*
558570
* Strong stability test:
559-
* F-norm((A-QL*S*QR**T, B-QL*T*QR**T)) <= O(EPS*F-norm((A,B)))
571+
* F-norm((A-QL**H*S*QR)) <= O(EPS*F-norm((A)))
572+
* and
573+
* F-norm((B-QL**H*T*QR)) <= O(EPS*F-norm((B)))
560574
*
561575
CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ),
562576
$ M )
@@ -567,17 +581,20 @@ SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
567581
DSCALE = ZERO
568582
DSUM = ONE
569583
CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
584+
SA = DSCALE*SQRT( DSUM )
570585
*
571586
CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ),
572587
$ M )
573588
CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO,
574589
$ WORK, M )
575590
CALL DGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
576591
$ WORK( M*M+1 ), M )
592+
DSCALE = ZERO
593+
DSUM = ONE
577594
CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
578-
SS = DSCALE*SQRT( DSUM )
579-
DTRONG = ( SS.LE.THRESH )
580-
IF( .NOT.DTRONG )
595+
SB = DSCALE*SQRT( DSUM )
596+
STRONG = SA.LE.THRESHA .AND. SB.LE.THRESHB
597+
IF( .NOT.STRONG )
581598
$ GO TO 70
582599
*
583600
END IF

0 commit comments

Comments
 (0)