Skip to content

Commit 0a6cd43

Browse files
committed
Rewrite [ds]hgeqz to use FMA with Householder reflectors
1 parent 9f9295f commit 0a6cd43

File tree

2 files changed

+74
-66
lines changed

2 files changed

+74
-66
lines changed

SRC/dhgeqz.f

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,9 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
337337
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
338338
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
339339
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
340-
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
341-
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
342-
$ WR2
340+
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
341+
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
342+
$ WABS, WI, WR, WR2
343343
* ..
344344
* .. Local Arrays ..
345345
DOUBLE PRECISION V( 3 )
@@ -1132,25 +1132,27 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
11321132
H( J+2, J-1 ) = ZERO
11331133
END IF
11341134
*
1135+
T2 = TAU*V( 2 )
1136+
T3 = TAU*V( 3 )
11351137
DO 230 JC = J, ILASTM
1136-
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
1137-
$ H( J+2, JC ) )
1138-
H( J, JC ) = H( J, JC ) - TEMP
1139-
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
1140-
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
1141-
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
1142-
$ T( J+2, JC ) )
1143-
T( J, JC ) = T( J, JC ) - TEMP2
1144-
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
1145-
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
1138+
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
1139+
$ H( J+2, JC )
1140+
H( J, JC ) = H( J, JC ) - TEMP*TAU
1141+
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
1142+
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
1143+
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
1144+
$ T( J+2, JC )
1145+
T( J, JC ) = T( J, JC ) - TEMP2*TAU
1146+
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
1147+
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
11461148
230 CONTINUE
11471149
IF( ILQ ) THEN
11481150
DO 240 JR = 1, N
1149-
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
1150-
$ Q( JR, J+2 ) )
1151-
Q( JR, J ) = Q( JR, J ) - TEMP
1152-
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
1153-
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
1151+
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
1152+
$ Q( JR, J+2 )
1153+
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
1154+
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
1155+
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
11541156
240 CONTINUE
11551157
END IF
11561158
*
@@ -1238,27 +1240,29 @@ SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
12381240
*
12391241
* Apply transformations from the right.
12401242
*
1243+
T2 = TAU*V(2)
1244+
T3 = TAU*V(3)
12411245
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
1242-
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
1243-
$ H( JR, J+2 ) )
1244-
H( JR, J ) = H( JR, J ) - TEMP
1245-
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
1246-
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
1246+
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
1247+
$ H( JR, J+2 )
1248+
H( JR, J ) = H( JR, J ) - TEMP*TAU
1249+
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
1250+
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
12471251
260 CONTINUE
12481252
DO 270 JR = IFRSTM, J + 2
1249-
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
1250-
$ T( JR, J+2 ) )
1251-
T( JR, J ) = T( JR, J ) - TEMP
1252-
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
1253-
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
1253+
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
1254+
$ T( JR, J+2 )
1255+
T( JR, J ) = T( JR, J ) - TEMP*TAU
1256+
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
1257+
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
12541258
270 CONTINUE
12551259
IF( ILZ ) THEN
12561260
DO 280 JR = 1, N
1257-
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
1258-
$ Z( JR, J+2 ) )
1259-
Z( JR, J ) = Z( JR, J ) - TEMP
1260-
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
1261-
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
1261+
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
1262+
$ Z( JR, J+2 )
1263+
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
1264+
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
1265+
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
12621266
280 CONTINUE
12631267
END IF
12641268
T( J+1, J ) = ZERO

SRC/shgeqz.f

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,9 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
337337
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
338338
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
339339
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
340-
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
341-
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
342-
$ WR2
340+
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
341+
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
342+
$ WABS, WI, WR, WR2
343343
* ..
344344
* .. Local Arrays ..
345345
REAL V( 3 )
@@ -1132,25 +1132,27 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
11321132
H( J+2, J-1 ) = ZERO
11331133
END IF
11341134
*
1135+
T2 = TAU * V( 2 )
1136+
T3 = TAU * V( 3 )
11351137
DO 230 JC = J, ILASTM
1136-
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
1137-
$ H( J+2, JC ) )
1138-
H( J, JC ) = H( J, JC ) - TEMP
1139-
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
1140-
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
1141-
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
1142-
$ T( J+2, JC ) )
1143-
T( J, JC ) = T( J, JC ) - TEMP2
1144-
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
1145-
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
1138+
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
1139+
$ H( J+2, JC )
1140+
H( J, JC ) = H( J, JC ) - TEMP*TAU
1141+
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
1142+
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
1143+
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
1144+
$ T( J+2, JC )
1145+
T( J, JC ) = T( J, JC ) - TEMP2*TAU
1146+
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
1147+
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
11461148
230 CONTINUE
11471149
IF( ILQ ) THEN
11481150
DO 240 JR = 1, N
1149-
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
1150-
$ Q( JR, J+2 ) )
1151-
Q( JR, J ) = Q( JR, J ) - TEMP
1152-
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
1153-
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
1151+
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
1152+
$ Q( JR, J+2 )
1153+
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
1154+
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
1155+
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
11541156
240 CONTINUE
11551157
END IF
11561158
*
@@ -1238,27 +1240,29 @@ SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT,
12381240
*
12391241
* Apply transformations from the right.
12401242
*
1243+
T2 = TAU*V( 2 )
1244+
T3 = TAU*V( 3 )
12411245
DO 260 JR = IFRSTM, MIN( J+3, ILAST )
1242-
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
1243-
$ H( JR, J+2 ) )
1244-
H( JR, J ) = H( JR, J ) - TEMP
1245-
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
1246-
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
1246+
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
1247+
$ H( JR, J+2 )
1248+
H( JR, J ) = H( JR, J ) - TEMP*TAU
1249+
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
1250+
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
12471251
260 CONTINUE
12481252
DO 270 JR = IFRSTM, J + 2
1249-
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
1250-
$ T( JR, J+2 ) )
1251-
T( JR, J ) = T( JR, J ) - TEMP
1252-
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
1253-
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
1253+
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
1254+
$ T( JR, J+2 )
1255+
T( JR, J ) = T( JR, J ) - TEMP*TAU
1256+
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
1257+
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
12541258
270 CONTINUE
12551259
IF( ILZ ) THEN
12561260
DO 280 JR = 1, N
1257-
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
1258-
$ Z( JR, J+2 ) )
1259-
Z( JR, J ) = Z( JR, J ) - TEMP
1260-
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
1261-
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
1261+
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
1262+
$ Z( JR, J+2 )
1263+
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
1264+
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
1265+
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
12621266
280 CONTINUE
12631267
END IF
12641268
T( J+1, J ) = ZERO

0 commit comments

Comments
 (0)