Skip to content

Commit d926519

Browse files
authored
Merge pull request #480 from thijssteel/optimal-packing-xlaqr5
Optimal packing xlaqr5
2 parents 643f6db + b4873c1 commit d926519

File tree

16 files changed

+1256
-1659
lines changed

16 files changed

+1256
-1659
lines changed

SRC/chseqr.f

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,10 +320,10 @@ SUBROUTINE CHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ,
320320
* . CLAHQR because of insufficient subdiagonal scratch space.
321321
* . (This is a hard limit.) ====
322322
INTEGER NTINY
323-
PARAMETER ( NTINY = 11 )
323+
PARAMETER ( NTINY = 15 )
324324
*
325325
* ==== NL allocates some local workspace to help small matrices
326-
* . through a rare CLAHQR failure. NL > NTINY = 11 is
326+
* . through a rare CLAHQR failure. NL > NTINY = 15 is
327327
* . required and NL <= NMIN = ILAENV(ISPEC=12,...) is recom-
328328
* . mended. (The default value of NMIN is 75.) Using NL = 49
329329
* . allows up to six simultaneous shifts and a 16-by-16

SRC/claqr0.f

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ SUBROUTINE CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
260260
* . CLAHQR because of insufficient subdiagonal scratch space.
261261
* . (This is a hard limit.) ====
262262
INTEGER NTINY
263-
PARAMETER ( NTINY = 11 )
263+
PARAMETER ( NTINY = 15 )
264264
*
265265
* ==== Exceptional deflation windows: try to cure rare
266266
* . slow convergence by varying the size of the
@@ -355,22 +355,22 @@ SUBROUTINE CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
355355
END IF
356356
*
357357
* ==== NWR = recommended deflation window size. At this
358-
* . point, N .GT. NTINY = 11, so there is enough
358+
* . point, N .GT. NTINY = 15, so there is enough
359359
* . subdiagonal workspace for NWR.GE.2 as required.
360360
* . (In fact, there is enough subdiagonal space for
361-
* . NWR.GE.3.) ====
361+
* . NWR.GE.4.) ====
362362
*
363363
NWR = ILAENV( 13, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK )
364364
NWR = MAX( 2, NWR )
365365
NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR )
366366
*
367367
* ==== NSR = recommended number of simultaneous shifts.
368-
* . At this point N .GT. NTINY = 11, so there is at
368+
* . At this point N .GT. NTINY = 15, so there is at
369369
* . enough subdiagonal workspace for NSR to be even
370370
* . and greater than or equal to two as required. ====
371371
*
372372
NSR = ILAENV( 15, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK )
373-
NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO )
373+
NSR = MIN( NSR, ( N-3 ) / 6, IHI-ILO )
374374
NSR = MAX( 2, NSR-MOD( NSR, 2 ) )
375375
*
376376
* ==== Estimate optimal workspace ====
@@ -418,7 +418,7 @@ SUBROUTINE CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
418418
* ==== NSMAX = the Largest number of simultaneous shifts
419419
* . for which there is sufficient workspace. ====
420420
*
421-
NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 )
421+
NSMAX = MIN( ( N-3 ) / 6, 2*LWORK / 3 )
422422
NSMAX = NSMAX - MOD( NSMAX, 2 )
423423
*
424424
* ==== NDFL: an iteration count restarted at deflation. ====
@@ -659,7 +659,7 @@ SUBROUTINE CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
659659
* . (NVE-by-KDU) vertical work WV arrow along
660660
* . the left-hand-edge. ====
661661
*
662-
KDU = 3*NS - 3
662+
KDU = 2*NS
663663
KU = N - KDU + 1
664664
KWH = KDU + 1
665665
NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1

SRC/claqr4.f

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ SUBROUTINE CLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
270270
* . CLAHQR because of insufficient subdiagonal scratch space.
271271
* . (This is a hard limit.) ====
272272
INTEGER NTINY
273-
PARAMETER ( NTINY = 11 )
273+
PARAMETER ( NTINY = 15 )
274274
*
275275
* ==== Exceptional deflation windows: try to cure rare
276276
* . slow convergence by varying the size of the
@@ -365,17 +365,17 @@ SUBROUTINE CLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
365365
END IF
366366
*
367367
* ==== NWR = recommended deflation window size. At this
368-
* . point, N .GT. NTINY = 11, so there is enough
368+
* . point, N .GT. NTINY = 15, so there is enough
369369
* . subdiagonal workspace for NWR.GE.2 as required.
370370
* . (In fact, there is enough subdiagonal space for
371-
* . NWR.GE.3.) ====
371+
* . NWR.GE.4.) ====
372372
*
373373
NWR = ILAENV( 13, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK )
374374
NWR = MAX( 2, NWR )
375375
NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR )
376376
*
377377
* ==== NSR = recommended number of simultaneous shifts.
378-
* . At this point N .GT. NTINY = 11, so there is at
378+
* . At this point N .GT. NTINY = 15, so there is at
379379
* . enough subdiagonal workspace for NSR to be even
380380
* . and greater than or equal to two as required. ====
381381
*
@@ -663,7 +663,7 @@ SUBROUTINE CLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
663663
* . (NVE-by-KDU) vertical work WV arrow along
664664
* . the left-hand-edge. ====
665665
*
666-
KDU = 3*NS - 3
666+
KDU = 2*NS
667667
KU = N - KDU + 1
668668
KWH = KDU + 1
669669
NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1

0 commit comments

Comments
 (0)