|
7 | 7 | //===----------------------------------------------------------------------===//
|
8 | 8 |
|
9 | 9 | #include <libspirv/spirv.h>
|
| 10 | +#include <clc/math/clc_atan2.h> |
10 | 11 |
|
11 |
| -#include <clc/math/tables.h> |
12 |
| -#include <clc/clcmacro.h> |
13 |
| -#include <clc/math/math.h> |
| 12 | +#define FUNCTION __spirv_ocl_atan2 |
| 13 | +#define __CLC_FUNCTION(x) __clc_atan2 |
| 14 | +#define __CLC_BODY <clc/shared/binary_def.inc> |
14 | 15 |
|
15 |
| -_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_atan2(float y, float x) { |
16 |
| - const float pi = 0x1.921fb6p+1f; |
17 |
| - const float piby2 = 0x1.921fb6p+0f; |
18 |
| - const float piby4 = 0x1.921fb6p-1f; |
19 |
| - const float threepiby4 = 0x1.2d97c8p+1f; |
20 |
| - |
21 |
| - float ax = __spirv_ocl_fabs(x); |
22 |
| - float ay = __spirv_ocl_fabs(y); |
23 |
| - float v = __spirv_ocl_fmin(ax, ay); |
24 |
| - float u = __spirv_ocl_fmax(ax, ay); |
25 |
| - |
26 |
| - // Scale since u could be large, as in "regular" divide |
27 |
| - float s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f; |
28 |
| - float vbyu = s * MATH_DIVIDE(v, s * u); |
29 |
| - |
30 |
| - float vbyu2 = vbyu * vbyu; |
31 |
| - |
32 |
| -#define USE_2_2_APPROXIMATION |
33 |
| -#if defined USE_2_2_APPROXIMATION |
34 |
| - float p = __spirv_ocl_mad( |
35 |
| - vbyu2, __spirv_ocl_mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f), |
36 |
| - -0x1.5554d0p-2f) * |
37 |
| - vbyu2 * vbyu; |
38 |
| - float q = __spirv_ocl_mad( |
39 |
| - vbyu2, __spirv_ocl_mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f); |
40 |
| -#else |
41 |
| - float p = __spirv_ocl_mad( |
42 |
| - vbyu2, __spirv_ocl_mad(vbyu2, -0x1.55cd22p-5f, -0x1.26cf76p-2f), |
43 |
| - -0x1.55554ep-2f) * |
44 |
| - vbyu2 * vbyu; |
45 |
| - float q = __spirv_ocl_mad( |
46 |
| - vbyu2, |
47 |
| - __spirv_ocl_mad(vbyu2, |
48 |
| - __spirv_ocl_mad(vbyu2, 0x1.9f1304p-5f, 0x1.2656fap-1f), |
49 |
| - 0x1.76b4b8p+0f), |
50 |
| - 1.0f); |
51 |
| -#endif |
52 |
| - |
53 |
| - // Octant 0 result |
54 |
| - float a = __spirv_ocl_mad(p, MATH_RECIP(q), vbyu); |
55 |
| - |
56 |
| - // Fix up 3 other octants |
57 |
| - float at = piby2 - a; |
58 |
| - a = ay > ax ? at : a; |
59 |
| - at = pi - a; |
60 |
| - a = x < 0.0F ? at : a; |
61 |
| - |
62 |
| - // y == 0 => 0 for x >= 0, pi for x < 0 |
63 |
| - at = __clc_as_int(x) < 0 ? pi : 0.0f; |
64 |
| - a = y == 0.0f ? at : a; |
65 |
| - |
66 |
| - // if (!FINITE_ONLY()) { |
67 |
| - // x and y are +- Inf |
68 |
| - at = x > 0.0f ? piby4 : threepiby4; |
69 |
| - a = ax == INFINITY && ay == INFINITY ? at : a; |
70 |
| - |
71 |
| - // x or y is NaN |
72 |
| - a = __spirv_IsNan(x) || __spirv_IsNan(y) ? __clc_as_float(QNANBITPATT_SP32) : a; |
73 |
| - // } |
74 |
| - |
75 |
| - // Fixup sign and return |
76 |
| - return __spirv_ocl_copysign(a, y); |
77 |
| -} |
78 |
| - |
79 |
| -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_atan2, float, |
80 |
| - float); |
81 |
| - |
82 |
| -#ifdef cl_khr_fp64 |
83 |
| - |
84 |
| -#pragma OPENCL EXTENSION cl_khr_fp64 : enable |
85 |
| - |
86 |
| -_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_atan2(double y, double x) { |
87 |
| - const double pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */ |
88 |
| - const double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */ |
89 |
| - const double piby4 = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */ |
90 |
| - const double three_piby4 = 2.3561944901923449e+00; /* 0x4002d97c7f3321d2 */ |
91 |
| - const double pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */ |
92 |
| - const double pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */ |
93 |
| - const double piby2_head = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */ |
94 |
| - const double piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */ |
95 |
| - |
96 |
| - double x2 = x; |
97 |
| - int xneg = __clc_as_int2(x).hi < 0; |
98 |
| - int xexp = (__clc_as_int2(x).hi >> 20) & 0x7ff; |
99 |
| - |
100 |
| - double y2 = y; |
101 |
| - int yneg = __clc_as_int2(y).hi < 0; |
102 |
| - int yexp = (__clc_as_int2(y).hi >> 20) & 0x7ff; |
103 |
| - |
104 |
| - int cond2 = (xexp < 1021) & (yexp < 1021); |
105 |
| - int diffexp = yexp - xexp; |
106 |
| - |
107 |
| - // Scale up both x and y if they are both below 1/4 |
108 |
| - double x1 = __spirv_ocl_ldexp(x, 1024); |
109 |
| - int xexp1 = (__clc_as_int2(x1).hi >> 20) & 0x7ff; |
110 |
| - double y1 = __spirv_ocl_ldexp(y, 1024); |
111 |
| - int yexp1 = (__clc_as_int2(y1).hi >> 20) & 0x7ff; |
112 |
| - int diffexp1 = yexp1 - xexp1; |
113 |
| - |
114 |
| - diffexp = cond2 ? diffexp1 : diffexp; |
115 |
| - x = cond2 ? x1 : x; |
116 |
| - y = cond2 ? y1 : y; |
117 |
| - |
118 |
| - // General case: take absolute values of arguments |
119 |
| - double u = __spirv_ocl_fabs(x); |
120 |
| - double v = __spirv_ocl_fabs(y); |
121 |
| - |
122 |
| - // Swap u and v if necessary to obtain 0 < v < u. Compute v/u. |
123 |
| - int swap_vu = u < v; |
124 |
| - double uu = u; |
125 |
| - u = swap_vu ? v : u; |
126 |
| - v = swap_vu ? uu : v; |
127 |
| - |
128 |
| - double vbyu = v / u; |
129 |
| - double q1, q2; |
130 |
| - |
131 |
| - // General values of v/u. Use a look-up table and series expansion. |
132 |
| - |
133 |
| - { |
134 |
| - double val = vbyu > 0.0625 ? vbyu : 0.063; |
135 |
| - int index = __spirv_ConvertFToS_Rint(__spirv_ocl_fma(256.0, val, 0.5)); |
136 |
| - double2 tv = USE_TABLE(atan_jby256_tbl, index - 16); |
137 |
| - q1 = tv.s0; |
138 |
| - q2 = tv.s1; |
139 |
| - double c = (double)index * 0x1.0p-8; |
140 |
| - |
141 |
| - // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1 |
142 |
| - // u_exponent could be EMAX so we have to do it in 2 steps |
143 |
| - int m = -((int)(__clc_as_ulong(u) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64); |
144 |
| - // double um = __amdil_ldexp_f64(u, m); |
145 |
| - // double vm = __amdil_ldexp_f64(v, m); |
146 |
| - double um = __spirv_ocl_ldexp(u, m); |
147 |
| - double vm = __spirv_ocl_ldexp(v, m); |
148 |
| - |
149 |
| - // 26 leading bits of u |
150 |
| - double u1 = __clc_as_double(__clc_as_ulong(um) & 0xfffffffff8000000UL); |
151 |
| - double u2 = um - u1; |
152 |
| - |
153 |
| - double r = MATH_DIVIDE(__spirv_ocl_fma(-c, u2, __spirv_ocl_fma(-c, u1, vm)), |
154 |
| - __spirv_ocl_fma(c, vm, um)); |
155 |
| - |
156 |
| - // Polynomial approximation to atan(r) |
157 |
| - double s = r * r; |
158 |
| - q2 = q2 + __spirv_ocl_fma((s * __spirv_ocl_fma(-s, 0.19999918038989143496, |
159 |
| - 0.33333333333224095522)), |
160 |
| - -r, r); |
161 |
| - } |
162 |
| - |
163 |
| - double q3, q4; |
164 |
| - { |
165 |
| - q3 = 0.0; |
166 |
| - q4 = vbyu; |
167 |
| - } |
168 |
| - |
169 |
| - double q5, q6; |
170 |
| - { |
171 |
| - double u1 = __clc_as_double(__clc_as_ulong(u) & 0xffffffff00000000UL); |
172 |
| - double u2 = u - u1; |
173 |
| - double vu1 = __clc_as_double(__clc_as_ulong(vbyu) & 0xffffffff00000000UL); |
174 |
| - double vu2 = vbyu - vu1; |
175 |
| - |
176 |
| - q5 = 0.0; |
177 |
| - double s = vbyu * vbyu; |
178 |
| - q6 = vbyu + |
179 |
| - __spirv_ocl_fma( |
180 |
| - -vbyu * s, |
181 |
| - __spirv_ocl_fma( |
182 |
| - -s, |
183 |
| - __spirv_ocl_fma( |
184 |
| - -s, |
185 |
| - __spirv_ocl_fma(-s, |
186 |
| - __spirv_ocl_fma(-s, |
187 |
| - 0.90029810285449784439E-01, |
188 |
| - 0.11110736283514525407), |
189 |
| - 0.14285713561807169030), |
190 |
| - 0.19999999999393223405), |
191 |
| - 0.33333333333333170500), |
192 |
| - MATH_DIVIDE( |
193 |
| - __spirv_ocl_fma( |
194 |
| - -u, vu2, |
195 |
| - __spirv_ocl_fma(-u2, vu1, __spirv_ocl_fma(-u1, vu1, v))), |
196 |
| - u)); |
197 |
| - } |
198 |
| - |
199 |
| - q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5; |
200 |
| - q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6; |
201 |
| - |
202 |
| - q1 = vbyu > 0.0625 ? q1 : q3; |
203 |
| - q2 = vbyu > 0.0625 ? q2 : q4; |
204 |
| - |
205 |
| - // Tidy-up according to which quadrant the arguments lie in |
206 |
| - double res1, res2, res3, res4; |
207 |
| - q1 = swap_vu ? piby2_head - q1 : q1; |
208 |
| - q2 = swap_vu ? piby2_tail - q2 : q2; |
209 |
| - q1 = xneg ? pi_head - q1 : q1; |
210 |
| - q2 = xneg ? pi_tail - q2 : q2; |
211 |
| - q1 = q1 + q2; |
212 |
| - res4 = yneg ? -q1 : q1; |
213 |
| - |
214 |
| - res1 = yneg ? -three_piby4 : three_piby4; |
215 |
| - res2 = yneg ? -piby4 : piby4; |
216 |
| - res3 = xneg ? res1 : res2; |
217 |
| - |
218 |
| - res3 = __spirv_IsInf(x2) && __spirv_IsInf(y2) ? res3 : res4; |
219 |
| - res1 = yneg ? -pi : pi; |
220 |
| - |
221 |
| - // abs(x)/abs(y) > 2^56 and x < 0 |
222 |
| - res3 = (diffexp < -56 && xneg) ? res1 : res3; |
223 |
| - |
224 |
| - res4 = MATH_DIVIDE(y, x); |
225 |
| - // x positive and dominant over y by a factor of 2^28 |
226 |
| - res3 = diffexp < -28 && xneg == 0 ? res4 : res3; |
227 |
| - |
228 |
| - // abs(y)/abs(x) > 2^56 |
229 |
| - res4 = yneg ? -piby2 : piby2; // atan(y/x) is insignificant compared to piby2 |
230 |
| - res3 = diffexp > 56 ? res4 : res3; |
231 |
| - |
232 |
| - res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y |
233 |
| - res4 = xneg ? res1 : y2; |
234 |
| - |
235 |
| - res3 = y2 == 0.0 |
236 |
| - ? res4 |
237 |
| - : res3; // Zero y gives +-0 for positive x and +-pi for negative x |
238 |
| - res3 = __spirv_IsNan(y2) ? y2 : res3; |
239 |
| - res3 = __spirv_IsNan(x2) ? x2 : res3; |
240 |
| - |
241 |
| - return res3; |
242 |
| -} |
243 |
| - |
244 |
| -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_atan2, double, |
245 |
| - double); |
246 |
| - |
247 |
| -#endif |
248 |
| - |
249 |
| -#ifdef cl_khr_fp16 |
250 |
| - |
251 |
| -#pragma OPENCL EXTENSION cl_khr_fp16 : enable |
252 |
| - |
253 |
| -_CLC_DEFINE_BINARY_BUILTIN(half, __spirv_ocl_atan2, __builtin_atan2f16, half, half) |
254 |
| - |
255 |
| -#endif |
| 16 | +#include <clc/math/gentype.inc> |
0 commit comments