Skip to content

Commit c8b4cec

Browse files
authored
prevent compilers from using FMA (Reference-LAPACK PR 1033)
1 parent 14a8a9a commit c8b4cec

File tree

2 files changed

+16
-8
lines changed

2 files changed

+16
-8
lines changed

lapack-netlib/SRC/dlanv2.f

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@
109109
*> \author Univ. of Colorado Denver
110110
*> \author NAG Ltd.
111111
*
112-
*> \ingroup doubleOTHERauxiliary
112+
*> \ingroup lanv2
113113
*
114114
*> \par Further Details:
115115
* =====================
@@ -144,7 +144,7 @@ SUBROUTINE DLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN )
144144
* ..
145145
* .. Local Scalars ..
146146
DOUBLE PRECISION AA, BB, BCMAX, BCMIS, CC, CS1, DD, EPS, P, SAB,
147-
$ SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z, SAFMIN,
147+
$ SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z, SAFMIN,
148148
$ SAFMN2, SAFMX2
149149
INTEGER COUNT
150150
* ..
@@ -248,10 +248,14 @@ SUBROUTINE DLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN )
248248
*
249249
* Compute [ A B ] = [ CS SN ] [ AA BB ]
250250
* [ C D ] [-SN CS ] [ CC DD ]
251+
*
252+
* Note: Some of the multiplications are wrapped in parentheses to
253+
* prevent compilers from using FMA instructions. See
254+
* https://github.com/Reference-LAPACK/lapack/issues/1031.
251255
*
252256
A = AA*CS + CC*SN
253-
B = BB*CS + DD*SN
254-
C = -AA*SN + CC*CS
257+
B = ( BB*CS ) + ( DD*SN )
258+
C = -( AA*SN ) + ( CC*CS )
255259
D = -BB*SN + DD*CS
256260
*
257261
TEMP = HALF*( A+D )

lapack-netlib/SRC/slanv2.f

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@
109109
*> \author Univ. of Colorado Denver
110110
*> \author NAG Ltd.
111111
*
112-
*> \ingroup realOTHERauxiliary
112+
*> \ingroup lanv2
113113
*
114114
*> \par Further Details:
115115
* =====================
@@ -144,7 +144,7 @@ SUBROUTINE SLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN )
144144
* ..
145145
* .. Local Scalars ..
146146
REAL AA, BB, BCMAX, BCMIS, CC, CS1, DD, EPS, P, SAB,
147-
$ SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z, SAFMIN,
147+
$ SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z, SAFMIN,
148148
$ SAFMN2, SAFMX2
149149
INTEGER COUNT
150150
* ..
@@ -248,10 +248,14 @@ SUBROUTINE SLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN )
248248
*
249249
* Compute [ A B ] = [ CS SN ] [ AA BB ]
250250
* [ C D ] [-SN CS ] [ CC DD ]
251+
*
252+
* Note: Some of the multiplications are wrapped in parentheses to
253+
* prevent compilers from using FMA instructions. See
254+
* https://github.com/Reference-LAPACK/lapack/issues/1031.
251255
*
252256
A = AA*CS + CC*SN
253-
B = BB*CS + DD*SN
254-
C = -AA*SN + CC*CS
257+
B = ( BB*CS ) + ( DD*SN )
258+
C = -( AA*SN ) + ( CC*CS )
255259
D = -BB*SN + DD*CS
256260
*
257261
TEMP = HALF*( A+D )

0 commit comments

Comments
 (0)