Skip to content

Commit a2a1845

Browse files
authored
update zrotg
1 parent 04cdf5e commit a2a1845

File tree

1 file changed

+143
-128
lines changed

1 file changed

+143
-128
lines changed

interface/zrotg.c

Lines changed: 143 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -18,149 +18,164 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
1818

1919
#ifdef DOUBLE
2020
long double safmin = DBL_MIN;
21+
long double rtmin = sqrt(DBL_MIN/DBL_EPSILON);
2122
#else
2223
long double safmin = FLT_MIN;
24+
long double rtmin = sqrt(FLT_MIN/FLT_EPSILON);
2325
#endif
2426

25-
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86)
2627

27-
long double da_r = *(DA + 0);
28-
long double da_i = *(DA + 1);
29-
long double db_r = *(DB + 0);
30-
long double db_i = *(DB + 1);
31-
long double r;
28+
FLOAT da_r = *(DA+0);
29+
FLOAT da_i = *(DA+1);
30+
FLOAT db_r = *(DB+0);
31+
FLOAT db_i = *(DB+1);
32+
//long double r;
33+
FLOAT *r, *S1=(FLOAT *)malloc(2*sizeof(FLOAT));
34+
FLOAT *R=(FLOAT *)malloc(2*sizeof(FLOAT));
35+
long double d;
3236

33-
long double ada = fabsl(da_r) + fabsl(da_i);
34-
long double adb = sqrt(db_r * db_r + db_i * db_i);
37+
FLOAT ada = da_r * da_r + da_i * da_i;
38+
FLOAT adb = db_r * db_r + db_i * db_i;
39+
FLOAT adart = sqrt( da_r * da_r + da_i * da_i);
40+
FLOAT adbrt = sqrt( db_r * db_r + db_i * db_i);
3541

3642
PRINT_DEBUG_NAME;
3743

3844
IDEBUG_START;
3945

4046
FUNCTION_PROFILE_START();
4147

42-
if (ada == ZERO) {
43-
*C = ZERO;
44-
*(S + 0) = ONE;
48+
if (db_r == ZERO && db_i == ZERO) {
49+
*C = ONE;
50+
*(S + 0) = ZERO;
4551
*(S + 1) = ZERO;
46-
*(DA + 0) = db_r;
47-
*(DA + 1) = db_i;
48-
} else {
49-
long double alpha_r, alpha_i;
50-
long double safmax = 1./safmin;
51-
long double sigma;
52-
long double maxab = MAX(ada,adb);
53-
long double scale = MIN(MAX(safmin,maxab), safmax);
54-
55-
56-
long double aa_r = da_r / scale;
57-
long double aa_i = da_i / scale;
58-
long double bb_r = db_r / scale;
59-
long double bb_i = db_i / scale;
60-
61-
if (ada > adb)
62-
sigma = copysign(1.,da_r);
63-
else
64-
sigma = copysign(1.,db_r);
65-
66-
r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);
67-
68-
69-
alpha_r = da_r / ada;
70-
alpha_i = da_i / ada;
71-
72-
*(C + 0) = ada / r;
73-
*(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r;
74-
*(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r;
75-
*(DA + 0) = alpha_r * r;
76-
*(DA + 1) = alpha_i * r;
52+
return;
7753
}
78-
#else
79-
FLOAT da_r = *(DA + 0);
80-
FLOAT da_i = *(DA + 1);
81-
FLOAT db_r = *(DB + 0);
82-
FLOAT db_i = *(DB + 1);
83-
FLOAT r;
84-
85-
FLOAT ada = fabs(da_r) + fabs(da_i);
86-
FLOAT adb = fabs(db_r) + fabs(db_i);
87-
88-
PRINT_DEBUG_NAME;
89-
90-
IDEBUG_START;
91-
92-
FUNCTION_PROFILE_START();
93-
94-
if (ada == ZERO) {
95-
*C = ZERO;
96-
*(S + 0) = ONE;
97-
*(S + 1) = ZERO;
98-
*(DA + 0) = db_r;
99-
*(DA + 1) = db_i;
100-
} else {
101-
long double safmax = 1./safmin;
102-
FLOAT scale, sigma;
103-
FLOAT aa_r, aa_i, bb_r, bb_i;
104-
FLOAT alpha_r, alpha_i;
105-
106-
aa_r = fabs(da_r);
107-
aa_i = fabs(da_i);
108-
109-
if (aa_i > aa_r) {
110-
aa_r = fabs(da_i);
111-
aa_i = fabs(da_r);
112-
}
113-
114-
if (aa_r == ZERO) {
115-
ada = 0.;
116-
} else {
117-
scale = (aa_i / aa_r);
118-
ada = aa_r * sqrt(ONE + scale * scale);
119-
}
120-
121-
bb_r = fabs(db_r);
122-
bb_i = fabs(db_i);
123-
124-
if (bb_i > bb_r) {
125-
bb_r = fabs(bb_i);
126-
bb_i = fabs(bb_r);
127-
}
128-
129-
if (bb_r == ZERO) {
130-
adb = 0.;
131-
} else {
132-
scale = (bb_i / bb_r);
133-
adb = bb_r * sqrt(ONE + scale * scale);
134-
}
135-
FLOAT maxab = MAX(ada,adb);
136-
scale = MIN(MAX(safmin,maxab), safmax);
137-
138-
aa_r = da_r / scale;
139-
aa_i = da_i / scale;
140-
bb_r = db_r / scale;
141-
bb_i = db_i / scale;
14254

143-
if (ada > adb)
144-
sigma = copysign(1.,da_r);
145-
else
146-
sigma = copysign(1.,db_r);
147-
148-
r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i);
149-
150-
alpha_r = da_r / ada;
151-
alpha_i = da_i / ada;
152-
153-
*(C + 0) = ada / r;
154-
*(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r;
155-
*(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r;
156-
*(DA + 0) = alpha_r * r;
157-
*(DA + 1) = alpha_i * r;
158-
}
55+
long double safmax = 1./safmin;
56+
#if defined DOUBLE
57+
long double rtmax = safmax /DBL_EPSILON;
58+
#else
59+
long double rtmax = safmax /FLT_EPSILON;
15960
#endif
160-
161-
FUNCTION_PROFILE_END(4, 4, 4);
162-
163-
IDEBUG_END;
164-
165-
return;
61+
*(S1 + 0) = *(DB + 0);
62+
*(S1 + 1) = *(DB + 1) *-1;
63+
if (da_r == ZERO && da_i == ZERO) {
64+
*C = ZERO;
65+
if (db_r == ZERO) {
66+
(*DA) = fabsl(db_i);
67+
*S = *S1 /da_r;
68+
*(S+1) = *(S1+1) /da_r;
69+
return;
70+
} else if ( db_i == ZERO) {
71+
*DA = fabsl(db_r);
72+
*S = *S1 /da_r;
73+
*(S+1) = *(S1+1) /da_r;
74+
return;
75+
} else {
76+
long double g1 = MAX( fabsl(db_r), fabsl(db_i));
77+
rtmax =sqrt(safmax/2.);
78+
if (g1 > rtmin && g1 < rtmax) { // unscaled
79+
d = sqrt(adb);
80+
*S = *S1 /d;
81+
*(S+1) = *(S1+1) /d;
82+
*DA = d ;
83+
*(DA+1) = ZERO;
84+
return;
85+
} else { // scaled algorithm
86+
long double u = MIN ( safmax, MAX ( safmin, g1));
87+
FLOAT gs_r = db_r/u;
88+
FLOAT gs_i = db_i/u;
89+
d = sqrt ( gs_r*gs_r + gs_i*gs_i);
90+
*S = gs_r / d;
91+
*(S + 1) = (gs_i * -1) / d;
92+
*DA = d * u;
93+
*(DA+1) = ZERO;
94+
return;
95+
}
96+
}
97+
} else {
98+
FLOAT f1 = MAX ( fabsl(da_r), fabsl(da_i));
99+
FLOAT g1 = MAX ( fabsl(db_r), fabsl(db_i));
100+
rtmax = sqrt(safmax / 4.);
101+
if ( f1 > rtmin && f1 < rtmax && g1 > rtmin && g1 < rtmax) { //unscaled
102+
long double h = ada + adb;
103+
double adahsq = sqrt(ada * h);
104+
if (ada >= h *safmin) {
105+
*C = sqrt(ada/h);
106+
*R = *DA / *C;
107+
*(R+1) = *(DA+1) / *(C+1);
108+
rtmax *= 2.;
109+
if ( ada > rtmin && h < rtmax) { // no risk of intermediate overflow
110+
*S = *S1 * (*DA / adahsq) - *(S1+1)* (*(DA+1)/adahsq);
111+
*(S+1) = *S1 * (*(DA+1) / adahsq) + *(S1+1) * (*DA/adahsq);
112+
} else {
113+
*S = *S1 * (*R/h) - *(S1+1) * (*(R+1)/h);
114+
*(S+1) = *S1 * (*(R+1)/h) + *(S1+1) * (*(R)/h);
115+
}
116+
} else {
117+
*C = ada / adahsq;
118+
if (*C >= safmin)
119+
*R = *DA / *C;
120+
else
121+
*R = *DA * (h / adahsq);
122+
*S = *S1 * ada / adahsq;
123+
*(S+1) = *(S1+1) * ada / adahsq;
124+
}
125+
*DA=*R;
126+
*(DA+1)=*(R+1);
127+
return;
128+
} else { // scaled
129+
FLOAT fs_r, fs_i, gs_r, gs_i;
130+
long double v,w,f2,g2,h;
131+
long double u = MIN ( safmax, MAX ( safmin, MAX(f1,g1)));
132+
gs_r = db_r/u;
133+
gs_i = db_i/u;
134+
g2 = sqrt ( gs_r*gs_r + gs_i*gs_i);
135+
if (f1 /u < rtmin) {
136+
v = MIN (safmax, MAX (safmin, f1));
137+
w = v / u;
138+
fs_r = *DA/ v;
139+
fs_i = *(DA+1) / v;
140+
f2 = sqrt ( fs_r*fs_r + fs_i*fs_i);
141+
h = f2 * w * w + g2;
142+
} else { // use same scaling for both
143+
w = 1.;
144+
fs_r = *DA/ u;
145+
fs_i = *(DA+1) / u;
146+
f2 = sqrt ( fs_r*fs_r + fs_i*fs_i);
147+
h = f2 + g2;
148+
}
149+
if ( f2 >= h * safmin) {
150+
*C = sqrt ( f2 / h );
151+
*DA = fs_r / *C;
152+
*(DA+1) = fs_i / *C;
153+
rtmax *= 2;
154+
if ( f2 > rtmin && h < rtmax) {
155+
*S = gs_r * (fs_r /sqrt(f2*h)) - gs_i * (fs_i / sqrt(f2*h));
156+
*(S+1) = gs_r * (fs_i /sqrt(f2*h)) + gs_i * -1. * (fs_r / sqrt(f2*h));
157+
} else {
158+
*S = gs_r * (*DA/h) - gs_i * (*(DA+1) / h);
159+
*(S+1) = gs_r * (*(DA+1) /h) + gs_i * -1. * (*DA / h);
160+
}
161+
} else { // intermediates might overflow
162+
d = sqrt ( f2 * h);
163+
*C = f2 /d;
164+
if (*C >= safmin) {
165+
*DA = fs_r / *C;
166+
*(DA+1) = fs_i / *C;
167+
} else {
168+
*DA = fs_r * (h / d);
169+
*(DA+1) = fs_i / (h / d);
170+
}
171+
*S = gs_r * (fs_r /d) - gs_i * (fs_i / d);
172+
*(S+1) = gs_r * (fs_i /d) + gs_i * -1. * (fs_r / d);
173+
}
174+
*C *= w;
175+
*DA *= u;
176+
*(DA+1) *= u;
177+
return;
178+
}
179+
}
166180
}
181+

0 commit comments

Comments
 (0)