@@ -18,149 +18,164 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) {
18
18
19
19
#ifdef DOUBLE
20
20
long double safmin = DBL_MIN ;
21
+ long double rtmin = sqrt (DBL_MIN /DBL_EPSILON );
21
22
#else
22
23
long double safmin = FLT_MIN ;
24
+ long double rtmin = sqrt (FLT_MIN /FLT_EPSILON );
23
25
#endif
24
26
25
- #if defined(__i386__ ) || defined(__x86_64__ ) || defined(__ia64__ ) || defined(_M_X64 ) || defined(_M_IX86 )
26
27
27
- long double da_r = * (DA + 0 );
28
- long double da_i = * (DA + 1 );
29
- long double db_r = * (DB + 0 );
30
- long double db_i = * (DB + 1 );
31
- long double r ;
28
+ FLOAT da_r = * (DA + 0 );
29
+ FLOAT da_i = * (DA + 1 );
30
+ FLOAT db_r = * (DB + 0 );
31
+ FLOAT db_i = * (DB + 1 );
32
+ //long double r;
33
+ FLOAT * r , * S1 = (FLOAT * )malloc (2 * sizeof (FLOAT ));
34
+ FLOAT * R = (FLOAT * )malloc (2 * sizeof (FLOAT ));
35
+ long double d ;
32
36
33
- long double ada = fabsl (da_r ) + fabsl (da_i );
34
- long double adb = sqrt (db_r * db_r + db_i * db_i );
37
+ FLOAT ada = da_r * da_r + da_i * da_i ;
38
+ FLOAT adb = db_r * db_r + db_i * db_i ;
39
+ FLOAT adart = sqrt ( da_r * da_r + da_i * da_i );
40
+ FLOAT adbrt = sqrt ( db_r * db_r + db_i * db_i );
35
41
36
42
PRINT_DEBUG_NAME ;
37
43
38
44
IDEBUG_START ;
39
45
40
46
FUNCTION_PROFILE_START ();
41
47
42
- if (ada == ZERO ) {
43
- * C = ZERO ;
44
- * (S + 0 ) = ONE ;
48
+ if (db_r == ZERO && db_i == ZERO ) {
49
+ * C = ONE ;
50
+ * (S + 0 ) = ZERO ;
45
51
* (S + 1 ) = ZERO ;
46
- * (DA + 0 ) = db_r ;
47
- * (DA + 1 ) = db_i ;
48
- } else {
49
- long double alpha_r , alpha_i ;
50
- long double safmax = 1. /safmin ;
51
- long double sigma ;
52
- long double maxab = MAX (ada ,adb );
53
- long double scale = MIN (MAX (safmin ,maxab ), safmax );
54
-
55
-
56
- long double aa_r = da_r / scale ;
57
- long double aa_i = da_i / scale ;
58
- long double bb_r = db_r / scale ;
59
- long double bb_i = db_i / scale ;
60
-
61
- if (ada > adb )
62
- sigma = copysign (1. ,da_r );
63
- else
64
- sigma = copysign (1. ,db_r );
65
-
66
- r = sigma * scale * sqrt (aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i );
67
-
68
-
69
- alpha_r = da_r / ada ;
70
- alpha_i = da_i / ada ;
71
-
72
- * (C + 0 ) = ada / r ;
73
- * (S + 0 ) = (alpha_r * db_r + alpha_i * db_i ) / r ;
74
- * (S + 1 ) = (alpha_i * db_r - alpha_r * db_i ) / r ;
75
- * (DA + 0 ) = alpha_r * r ;
76
- * (DA + 1 ) = alpha_i * r ;
52
+ return ;
77
53
}
78
- #else
79
- FLOAT da_r = * (DA + 0 );
80
- FLOAT da_i = * (DA + 1 );
81
- FLOAT db_r = * (DB + 0 );
82
- FLOAT db_i = * (DB + 1 );
83
- FLOAT r ;
84
-
85
- FLOAT ada = fabs (da_r ) + fabs (da_i );
86
- FLOAT adb = fabs (db_r ) + fabs (db_i );
87
-
88
- PRINT_DEBUG_NAME ;
89
-
90
- IDEBUG_START ;
91
-
92
- FUNCTION_PROFILE_START ();
93
-
94
- if (ada == ZERO ) {
95
- * C = ZERO ;
96
- * (S + 0 ) = ONE ;
97
- * (S + 1 ) = ZERO ;
98
- * (DA + 0 ) = db_r ;
99
- * (DA + 1 ) = db_i ;
100
- } else {
101
- long double safmax = 1. /safmin ;
102
- FLOAT scale , sigma ;
103
- FLOAT aa_r , aa_i , bb_r , bb_i ;
104
- FLOAT alpha_r , alpha_i ;
105
-
106
- aa_r = fabs (da_r );
107
- aa_i = fabs (da_i );
108
-
109
- if (aa_i > aa_r ) {
110
- aa_r = fabs (da_i );
111
- aa_i = fabs (da_r );
112
- }
113
-
114
- if (aa_r == ZERO ) {
115
- ada = 0. ;
116
- } else {
117
- scale = (aa_i / aa_r );
118
- ada = aa_r * sqrt (ONE + scale * scale );
119
- }
120
-
121
- bb_r = fabs (db_r );
122
- bb_i = fabs (db_i );
123
-
124
- if (bb_i > bb_r ) {
125
- bb_r = fabs (bb_i );
126
- bb_i = fabs (bb_r );
127
- }
128
-
129
- if (bb_r == ZERO ) {
130
- adb = 0. ;
131
- } else {
132
- scale = (bb_i / bb_r );
133
- adb = bb_r * sqrt (ONE + scale * scale );
134
- }
135
- FLOAT maxab = MAX (ada ,adb );
136
- scale = MIN (MAX (safmin ,maxab ), safmax );
137
-
138
- aa_r = da_r / scale ;
139
- aa_i = da_i / scale ;
140
- bb_r = db_r / scale ;
141
- bb_i = db_i / scale ;
142
54
143
- if (ada > adb )
144
- sigma = copysign (1. ,da_r );
145
- else
146
- sigma = copysign (1. ,db_r );
147
-
148
- r = sigma * scale * sqrt (aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i );
149
-
150
- alpha_r = da_r / ada ;
151
- alpha_i = da_i / ada ;
152
-
153
- * (C + 0 ) = ada / r ;
154
- * (S + 0 ) = (alpha_r * db_r + alpha_i * db_i ) / r ;
155
- * (S + 1 ) = (alpha_i * db_r - alpha_r * db_i ) / r ;
156
- * (DA + 0 ) = alpha_r * r ;
157
- * (DA + 1 ) = alpha_i * r ;
158
- }
55
+ long double safmax = 1. /safmin ;
56
+ #if defined DOUBLE
57
+ long double rtmax = safmax /DBL_EPSILON ;
58
+ #else
59
+ long double rtmax = safmax /FLT_EPSILON ;
159
60
#endif
160
-
161
- FUNCTION_PROFILE_END (4 , 4 , 4 );
162
-
163
- IDEBUG_END ;
164
-
165
- return ;
61
+ * (S1 + 0 ) = * (DB + 0 );
62
+ * (S1 + 1 ) = * (DB + 1 ) * -1 ;
63
+ if (da_r == ZERO && da_i == ZERO ) {
64
+ * C = ZERO ;
65
+ if (db_r == ZERO ) {
66
+ (* DA ) = fabsl (db_i );
67
+ * S = * S1 /da_r ;
68
+ * (S + 1 ) = * (S1 + 1 ) /da_r ;
69
+ return ;
70
+ } else if ( db_i == ZERO ) {
71
+ * DA = fabsl (db_r );
72
+ * S = * S1 /da_r ;
73
+ * (S + 1 ) = * (S1 + 1 ) /da_r ;
74
+ return ;
75
+ } else {
76
+ long double g1 = MAX ( fabsl (db_r ), fabsl (db_i ));
77
+ rtmax = sqrt (safmax /2. );
78
+ if (g1 > rtmin && g1 < rtmax ) { // unscaled
79
+ d = sqrt (adb );
80
+ * S = * S1 /d ;
81
+ * (S + 1 ) = * (S1 + 1 ) /d ;
82
+ * DA = d ;
83
+ * (DA + 1 ) = ZERO ;
84
+ return ;
85
+ } else { // scaled algorithm
86
+ long double u = MIN ( safmax , MAX ( safmin , g1 ));
87
+ FLOAT gs_r = db_r /u ;
88
+ FLOAT gs_i = db_i /u ;
89
+ d = sqrt ( gs_r * gs_r + gs_i * gs_i );
90
+ * S = gs_r / d ;
91
+ * (S + 1 ) = (gs_i * -1 ) / d ;
92
+ * DA = d * u ;
93
+ * (DA + 1 ) = ZERO ;
94
+ return ;
95
+ }
96
+ }
97
+ } else {
98
+ FLOAT f1 = MAX ( fabsl (da_r ), fabsl (da_i ));
99
+ FLOAT g1 = MAX ( fabsl (db_r ), fabsl (db_i ));
100
+ rtmax = sqrt (safmax / 4. );
101
+ if ( f1 > rtmin && f1 < rtmax && g1 > rtmin && g1 < rtmax ) { //unscaled
102
+ long double h = ada + adb ;
103
+ double adahsq = sqrt (ada * h );
104
+ if (ada >= h * safmin ) {
105
+ * C = sqrt (ada /h );
106
+ * R = * DA / * C ;
107
+ * (R + 1 ) = * (DA + 1 ) / * (C + 1 );
108
+ rtmax *= 2. ;
109
+ if ( ada > rtmin && h < rtmax ) { // no risk of intermediate overflow
110
+ * S = * S1 * (* DA / adahsq ) - * (S1 + 1 )* (* (DA + 1 )/adahsq );
111
+ * (S + 1 ) = * S1 * (* (DA + 1 ) / adahsq ) + * (S1 + 1 ) * (* DA /adahsq );
112
+ } else {
113
+ * S = * S1 * (* R /h ) - * (S1 + 1 ) * (* (R + 1 )/h );
114
+ * (S + 1 ) = * S1 * (* (R + 1 )/h ) + * (S1 + 1 ) * (* (R )/h );
115
+ }
116
+ } else {
117
+ * C = ada / adahsq ;
118
+ if (* C >= safmin )
119
+ * R = * DA / * C ;
120
+ else
121
+ * R = * DA * (h / adahsq );
122
+ * S = * S1 * ada / adahsq ;
123
+ * (S + 1 ) = * (S1 + 1 ) * ada / adahsq ;
124
+ }
125
+ * DA = * R ;
126
+ * (DA + 1 )= * (R + 1 );
127
+ return ;
128
+ } else { // scaled
129
+ FLOAT fs_r , fs_i , gs_r , gs_i ;
130
+ long double v ,w ,f2 ,g2 ,h ;
131
+ long double u = MIN ( safmax , MAX ( safmin , MAX (f1 ,g1 )));
132
+ gs_r = db_r /u ;
133
+ gs_i = db_i /u ;
134
+ g2 = sqrt ( gs_r * gs_r + gs_i * gs_i );
135
+ if (f1 /u < rtmin ) {
136
+ v = MIN (safmax , MAX (safmin , f1 ));
137
+ w = v / u ;
138
+ fs_r = * DA / v ;
139
+ fs_i = * (DA + 1 ) / v ;
140
+ f2 = sqrt ( fs_r * fs_r + fs_i * fs_i );
141
+ h = f2 * w * w + g2 ;
142
+ } else { // use same scaling for both
143
+ w = 1. ;
144
+ fs_r = * DA / u ;
145
+ fs_i = * (DA + 1 ) / u ;
146
+ f2 = sqrt ( fs_r * fs_r + fs_i * fs_i );
147
+ h = f2 + g2 ;
148
+ }
149
+ if ( f2 >= h * safmin ) {
150
+ * C = sqrt ( f2 / h );
151
+ * DA = fs_r / * C ;
152
+ * (DA + 1 ) = fs_i / * C ;
153
+ rtmax *= 2 ;
154
+ if ( f2 > rtmin && h < rtmax ) {
155
+ * S = gs_r * (fs_r /sqrt (f2 * h )) - gs_i * (fs_i / sqrt (f2 * h ));
156
+ * (S + 1 ) = gs_r * (fs_i /sqrt (f2 * h )) + gs_i * -1. * (fs_r / sqrt (f2 * h ));
157
+ } else {
158
+ * S = gs_r * (* DA /h ) - gs_i * (* (DA + 1 ) / h );
159
+ * (S + 1 ) = gs_r * (* (DA + 1 ) /h ) + gs_i * -1. * (* DA / h );
160
+ }
161
+ } else { // intermediates might overflow
162
+ d = sqrt ( f2 * h );
163
+ * C = f2 /d ;
164
+ if (* C >= safmin ) {
165
+ * DA = fs_r / * C ;
166
+ * (DA + 1 ) = fs_i / * C ;
167
+ } else {
168
+ * DA = fs_r * (h / d );
169
+ * (DA + 1 ) = fs_i / (h / d );
170
+ }
171
+ * S = gs_r * (fs_r /d ) - gs_i * (fs_i / d );
172
+ * (S + 1 ) = gs_r * (fs_i /d ) + gs_i * -1. * (fs_r / d );
173
+ }
174
+ * C *= w ;
175
+ * DA *= u ;
176
+ * (DA + 1 ) *= u ;
177
+ return ;
178
+ }
179
+ }
166
180
}
181
+
0 commit comments