Skip to content

Commit e08743d

Browse files
authored
Update to use safe scaling algorithm from Reference-LAPACK PR 527
1 parent 2edebc5 commit e08743d

File tree

2 files changed

+80
-22
lines changed

2 files changed

+80
-22
lines changed

interface/rotg.c

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#include <math.h>
2+
#include <float.h>
23
#include "common.h"
34
#ifdef FUNCTION_PROFILE
45
#include "functable.h"
56
#endif
67

8+
79
#ifndef CBLAS
810

911
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
@@ -14,35 +16,53 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
1416

1517
#endif
1618

19+
#ifdef DOUBLE
20+
long double safmin = DBL_MIN;
21+
#else
22+
long double safmin = FLT_MIN;
23+
#endif
24+
1725
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
1826

1927
long double da = *DA;
2028
long double db = *DB;
2129
long double c;
2230
long double s;
23-
long double r, roe, z;
31+
long double r, z;
32+
long double sigma, dascal,dbscal;
2433

2534
long double ada = fabsl(da);
2635
long double adb = fabsl(db);
27-
long double scale = ada + adb;
36+
long double maxab = MAX(ada,adb);
37+
long double safmax;
38+
long double scale;
39+
2840

2941
#ifndef CBLAS
3042
PRINT_DEBUG_NAME;
3143
#else
3244
PRINT_DEBUG_CNAME;
3345
#endif
3446

35-
roe = db;
36-
if (ada > adb) roe = da;
37-
38-
if (scale == ZERO) {
47+
if (adb == ZERO) {
3948
*C = ONE;
4049
*S = ZERO;
41-
*DA = ZERO;
4250
*DB = ZERO;
51+
} else if (ada == ZERO) {
52+
*C = ZERO;
53+
*S = ONE;
54+
*DA = *DB;
55+
*DB = ONE;
4356
} else {
44-
r = sqrt(da * da + db * db);
45-
if (roe < 0) r = -r;
57+
safmax = 1./safmin;
58+
scale = MIN(MAX(safmin,maxab), safmax);
59+
if (ada > adb)
60+
sigma = copysign(1.,da);
61+
else
62+
sigma = copysign(1.,db);
63+
dascal = da / scale;
64+
dbscal = db / scale;
65+
r = sigma * (scale * sqrt(dascal * dascal + dbscal * dbscal));
4666
c = da / r;
4767
s = db / r;
4868
z = ONE;
@@ -65,32 +85,40 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
6585
FLOAT db = *DB;
6686
FLOAT c = *C;
6787
FLOAT s = *S;
68-
FLOAT r, roe, z;
88+
FLOAT sigma;
89+
FLOAT r, z;
6990

7091
FLOAT ada = fabs(da);
7192
FLOAT adb = fabs(db);
72-
FLOAT scale = ada + adb;
93+
FLOAT maxab = MAX(ada,adb);
94+
long double safmax ;
95+
FLOAT scale ;
96+
97+
safmax = 1./safmin;
98+
scale = MIN(MAX(safmin,maxab), safmax);
99+
100+
if (ada > adb)
101+
sigma = sign(1.,da);
102+
else
103+
sigma = sign(1.,db);
73104

74105
#ifndef CBLAS
75106
PRINT_DEBUG_NAME;
76107
#else
77108
PRINT_DEBUG_CNAME;
78109
#endif
79110

80-
roe = db;
81-
if (ada > adb) roe = da;
82111

83-
if (scale == ZERO) {
112+
if (adb == ZERO) {
84113
*C = ONE;
85114
*S = ZERO;
86-
*DA = ZERO;
115+
DA = ZERO;
87116
*DB = ZERO;
88117
} else {
89118
FLOAT aa = da / scale;
90119
FLOAT bb = db / scale;
91120

92-
r = scale * sqrt(aa * aa + bb * bb);
93-
if (roe < 0) r = -r;
121+
r = sigma * scale * sqrt(aa * aa + bb * bb);
94122
c = da / r;
95123
s = db / r;
96124
z = ONE;

interface/zrotg.c

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#include <math.h>
2+
#include <float.h>
23
#include "common.h"
34
#ifdef FUNCTION_PROFILE
45
#include "functable.h"
56
#endif
67

8+
79
#ifndef CBLAS
810
void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
911

@@ -14,6 +16,12 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
1416
FLOAT *S = (FLOAT*) VS;
1517
#endif /* CBLAS */
1618

19+
#ifdef DOUBLE
20+
long double safmin = DBL_MIN;
21+
#else
22+
long double safmin = FLT_MIN;
23+
#endif
24+
1725
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
1826

1927
long double da_r = *(DA + 0);
@@ -23,6 +31,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
2331
long double r;
2432

2533
long double ada = fabsl(da_r) + fabsl(da_i);
34+
long double adb = sqrt(db_r * db_r + db_i * db_i);
2635

2736
PRINT_DEBUG_NAME;
2837

@@ -38,10 +47,24 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
3847
*(DA + 1) = db_i;
3948
} else {
4049
long double alpha_r, alpha_i;
50+
long double safmax = 1./safmin;
51+
long double sigma;
52+
long double maxab = MAX(ada,adb);
53+
long double scale = MIN(MAX(safmin,maxab), safmax);
4154

42-
ada = sqrt(da_r * da_r + da_i * da_i);
4355

44-
r = sqrt(da_r * da_r + da_i * da_i + db_r * db_r + db_i * db_i);
56+
long double aa_r = da_r / scale;
57+
long double aa_i = da_i / scale;
58+
long double bb_r = db_r / scale;
59+
long double bb_i = db_i / scale;
60+
61+
if (ada > adb)
62+
sigma = copysign(1.,da_r);
63+
else
64+
sigma = copysign(1.,db_r);
65+
66+
r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);
67+
4568

4669
alpha_r = da_r / ada;
4770
alpha_i = da_i / ada;
@@ -60,7 +83,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
6083
FLOAT r;
6184

6285
FLOAT ada = fabs(da_r) + fabs(da_i);
63-
FLOAT adb;
86+
FLOAT ada = fabs(db_r) + fabs(db_i);
6487

6588
PRINT_DEBUG_NAME;
6689

@@ -75,6 +98,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
7598
*(DA + 0) = db_r;
7699
*(DA + 1) = db_i;
77100
} else {
101+
long double safmax = 1./safmin;
78102
FLOAT scale;
79103
FLOAT aa_r, aa_i, bb_r, bb_i;
80104
FLOAT alpha_r, alpha_i;
@@ -108,14 +132,20 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
108132
scale = (bb_i / bb_r);
109133
adb = bb_r * sqrt(ONE + scale * scale);
110134
}
111-
scale = ada + adb;
135+
FLOAT maxab = MAX(ada,adb);
136+
scale = MIN(MAX(safmin,maxab), safmax);
112137

113138
aa_r = da_r / scale;
114139
aa_i = da_i / scale;
115140
bb_r = db_r / scale;
116141
bb_i = db_i / scale;
117142

118-
r = scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);
143+
if (ada > adb)
144+
sigma = copysign(1.,da_r);
145+
else
146+
sigma = copysign(1.,db_r);
147+
148+
r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);
119149

120150
alpha_r = da_r / ada;
121151
alpha_i = da_i / ada;

0 commit comments

Comments
 (0)