Skip to content

Commit 9eb7a9c

Browse files
frasercrmckjsji
authored andcommitted
[libspirv] Fix missing table declaration
These tables were split into head/tail tables upstream. The libspirv versions of atan2/atan2pi were still using the old unified table, which was never defined. A previous pulldown observed a build failure and resolved it by adding a declaration of the old table; this table was never defined so could result in a link failure at build time. The correct fix is to have libspirv's atan2/atan2pi builtins call into CLC's atan2/atan2pi, and at the same time remove the use of the old tables from libspirv. We can now also remove the declaration of the missing table.
1 parent f71f7e9 commit 9eb7a9c

File tree

3 files changed

+10
-470
lines changed

3 files changed

+10
-470
lines changed

libclc/clc/include/clc/math/tables.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_tail);
8282

8383
CLC_TABLE_FUNCTION_DECL(double, ln_tbl_lo);
8484
CLC_TABLE_FUNCTION_DECL(double, ln_tbl_hi);
85-
TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
8685
CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_head);
8786
CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_tail);
8887
CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_head);

libclc/libspirv/lib/generic/math/atan2.cl

Lines changed: 5 additions & 244 deletions
Original file line numberDiff line numberDiff line change
@@ -7,249 +7,10 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <libspirv/spirv.h>
10+
#include <clc/math/clc_atan2.h>
1011

11-
#include <clc/math/tables.h>
12-
#include <clc/clcmacro.h>
13-
#include <clc/math/math.h>
12+
#define FUNCTION __spirv_ocl_atan2
13+
#define __CLC_FUNCTION(x) __clc_atan2
14+
#define __CLC_BODY <clc/shared/binary_def.inc>
1415

15-
_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_atan2(float y, float x) {
16-
const float pi = 0x1.921fb6p+1f;
17-
const float piby2 = 0x1.921fb6p+0f;
18-
const float piby4 = 0x1.921fb6p-1f;
19-
const float threepiby4 = 0x1.2d97c8p+1f;
20-
21-
float ax = __spirv_ocl_fabs(x);
22-
float ay = __spirv_ocl_fabs(y);
23-
float v = __spirv_ocl_fmin(ax, ay);
24-
float u = __spirv_ocl_fmax(ax, ay);
25-
26-
// Scale since u could be large, as in "regular" divide
27-
float s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f;
28-
float vbyu = s * MATH_DIVIDE(v, s * u);
29-
30-
float vbyu2 = vbyu * vbyu;
31-
32-
#define USE_2_2_APPROXIMATION
33-
#if defined USE_2_2_APPROXIMATION
34-
float p = __spirv_ocl_mad(
35-
vbyu2, __spirv_ocl_mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f),
36-
-0x1.5554d0p-2f) *
37-
vbyu2 * vbyu;
38-
float q = __spirv_ocl_mad(
39-
vbyu2, __spirv_ocl_mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f);
40-
#else
41-
float p = __spirv_ocl_mad(
42-
vbyu2, __spirv_ocl_mad(vbyu2, -0x1.55cd22p-5f, -0x1.26cf76p-2f),
43-
-0x1.55554ep-2f) *
44-
vbyu2 * vbyu;
45-
float q = __spirv_ocl_mad(
46-
vbyu2,
47-
__spirv_ocl_mad(vbyu2,
48-
__spirv_ocl_mad(vbyu2, 0x1.9f1304p-5f, 0x1.2656fap-1f),
49-
0x1.76b4b8p+0f),
50-
1.0f);
51-
#endif
52-
53-
// Octant 0 result
54-
float a = __spirv_ocl_mad(p, MATH_RECIP(q), vbyu);
55-
56-
// Fix up 3 other octants
57-
float at = piby2 - a;
58-
a = ay > ax ? at : a;
59-
at = pi - a;
60-
a = x < 0.0F ? at : a;
61-
62-
// y == 0 => 0 for x >= 0, pi for x < 0
63-
at = __clc_as_int(x) < 0 ? pi : 0.0f;
64-
a = y == 0.0f ? at : a;
65-
66-
// if (!FINITE_ONLY()) {
67-
// x and y are +- Inf
68-
at = x > 0.0f ? piby4 : threepiby4;
69-
a = ax == INFINITY && ay == INFINITY ? at : a;
70-
71-
// x or y is NaN
72-
a = __spirv_IsNan(x) || __spirv_IsNan(y) ? __clc_as_float(QNANBITPATT_SP32) : a;
73-
// }
74-
75-
// Fixup sign and return
76-
return __spirv_ocl_copysign(a, y);
77-
}
78-
79-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_atan2, float,
80-
float);
81-
82-
#ifdef cl_khr_fp64
83-
84-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
85-
86-
_CLC_OVERLOAD _CLC_DEF double __spirv_ocl_atan2(double y, double x) {
87-
const double pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */
88-
const double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
89-
const double piby4 = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */
90-
const double three_piby4 = 2.3561944901923449e+00; /* 0x4002d97c7f3321d2 */
91-
const double pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */
92-
const double pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */
93-
const double piby2_head = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */
94-
const double piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */
95-
96-
double x2 = x;
97-
int xneg = __clc_as_int2(x).hi < 0;
98-
int xexp = (__clc_as_int2(x).hi >> 20) & 0x7ff;
99-
100-
double y2 = y;
101-
int yneg = __clc_as_int2(y).hi < 0;
102-
int yexp = (__clc_as_int2(y).hi >> 20) & 0x7ff;
103-
104-
int cond2 = (xexp < 1021) & (yexp < 1021);
105-
int diffexp = yexp - xexp;
106-
107-
// Scale up both x and y if they are both below 1/4
108-
double x1 = __spirv_ocl_ldexp(x, 1024);
109-
int xexp1 = (__clc_as_int2(x1).hi >> 20) & 0x7ff;
110-
double y1 = __spirv_ocl_ldexp(y, 1024);
111-
int yexp1 = (__clc_as_int2(y1).hi >> 20) & 0x7ff;
112-
int diffexp1 = yexp1 - xexp1;
113-
114-
diffexp = cond2 ? diffexp1 : diffexp;
115-
x = cond2 ? x1 : x;
116-
y = cond2 ? y1 : y;
117-
118-
// General case: take absolute values of arguments
119-
double u = __spirv_ocl_fabs(x);
120-
double v = __spirv_ocl_fabs(y);
121-
122-
// Swap u and v if necessary to obtain 0 < v < u. Compute v/u.
123-
int swap_vu = u < v;
124-
double uu = u;
125-
u = swap_vu ? v : u;
126-
v = swap_vu ? uu : v;
127-
128-
double vbyu = v / u;
129-
double q1, q2;
130-
131-
// General values of v/u. Use a look-up table and series expansion.
132-
133-
{
134-
double val = vbyu > 0.0625 ? vbyu : 0.063;
135-
int index = __spirv_ConvertFToS_Rint(__spirv_ocl_fma(256.0, val, 0.5));
136-
double2 tv = USE_TABLE(atan_jby256_tbl, index - 16);
137-
q1 = tv.s0;
138-
q2 = tv.s1;
139-
double c = (double)index * 0x1.0p-8;
140-
141-
// We're going to scale u and v by 2^(-u_exponent) to bring them close to 1
142-
// u_exponent could be EMAX so we have to do it in 2 steps
143-
int m = -((int)(__clc_as_ulong(u) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
144-
// double um = __amdil_ldexp_f64(u, m);
145-
// double vm = __amdil_ldexp_f64(v, m);
146-
double um = __spirv_ocl_ldexp(u, m);
147-
double vm = __spirv_ocl_ldexp(v, m);
148-
149-
// 26 leading bits of u
150-
double u1 = __clc_as_double(__clc_as_ulong(um) & 0xfffffffff8000000UL);
151-
double u2 = um - u1;
152-
153-
double r = MATH_DIVIDE(__spirv_ocl_fma(-c, u2, __spirv_ocl_fma(-c, u1, vm)),
154-
__spirv_ocl_fma(c, vm, um));
155-
156-
// Polynomial approximation to atan(r)
157-
double s = r * r;
158-
q2 = q2 + __spirv_ocl_fma((s * __spirv_ocl_fma(-s, 0.19999918038989143496,
159-
0.33333333333224095522)),
160-
-r, r);
161-
}
162-
163-
double q3, q4;
164-
{
165-
q3 = 0.0;
166-
q4 = vbyu;
167-
}
168-
169-
double q5, q6;
170-
{
171-
double u1 = __clc_as_double(__clc_as_ulong(u) & 0xffffffff00000000UL);
172-
double u2 = u - u1;
173-
double vu1 = __clc_as_double(__clc_as_ulong(vbyu) & 0xffffffff00000000UL);
174-
double vu2 = vbyu - vu1;
175-
176-
q5 = 0.0;
177-
double s = vbyu * vbyu;
178-
q6 = vbyu +
179-
__spirv_ocl_fma(
180-
-vbyu * s,
181-
__spirv_ocl_fma(
182-
-s,
183-
__spirv_ocl_fma(
184-
-s,
185-
__spirv_ocl_fma(-s,
186-
__spirv_ocl_fma(-s,
187-
0.90029810285449784439E-01,
188-
0.11110736283514525407),
189-
0.14285713561807169030),
190-
0.19999999999393223405),
191-
0.33333333333333170500),
192-
MATH_DIVIDE(
193-
__spirv_ocl_fma(
194-
-u, vu2,
195-
__spirv_ocl_fma(-u2, vu1, __spirv_ocl_fma(-u1, vu1, v))),
196-
u));
197-
}
198-
199-
q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5;
200-
q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6;
201-
202-
q1 = vbyu > 0.0625 ? q1 : q3;
203-
q2 = vbyu > 0.0625 ? q2 : q4;
204-
205-
// Tidy-up according to which quadrant the arguments lie in
206-
double res1, res2, res3, res4;
207-
q1 = swap_vu ? piby2_head - q1 : q1;
208-
q2 = swap_vu ? piby2_tail - q2 : q2;
209-
q1 = xneg ? pi_head - q1 : q1;
210-
q2 = xneg ? pi_tail - q2 : q2;
211-
q1 = q1 + q2;
212-
res4 = yneg ? -q1 : q1;
213-
214-
res1 = yneg ? -three_piby4 : three_piby4;
215-
res2 = yneg ? -piby4 : piby4;
216-
res3 = xneg ? res1 : res2;
217-
218-
res3 = __spirv_IsInf(x2) && __spirv_IsInf(y2) ? res3 : res4;
219-
res1 = yneg ? -pi : pi;
220-
221-
// abs(x)/abs(y) > 2^56 and x < 0
222-
res3 = (diffexp < -56 && xneg) ? res1 : res3;
223-
224-
res4 = MATH_DIVIDE(y, x);
225-
// x positive and dominant over y by a factor of 2^28
226-
res3 = diffexp < -28 && xneg == 0 ? res4 : res3;
227-
228-
// abs(y)/abs(x) > 2^56
229-
res4 = yneg ? -piby2 : piby2; // atan(y/x) is insignificant compared to piby2
230-
res3 = diffexp > 56 ? res4 : res3;
231-
232-
res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y
233-
res4 = xneg ? res1 : y2;
234-
235-
res3 = y2 == 0.0
236-
? res4
237-
: res3; // Zero y gives +-0 for positive x and +-pi for negative x
238-
res3 = __spirv_IsNan(y2) ? y2 : res3;
239-
res3 = __spirv_IsNan(x2) ? x2 : res3;
240-
241-
return res3;
242-
}
243-
244-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_atan2, double,
245-
double);
246-
247-
#endif
248-
249-
#ifdef cl_khr_fp16
250-
251-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
252-
253-
_CLC_DEFINE_BINARY_BUILTIN(half, __spirv_ocl_atan2, __builtin_atan2f16, half, half)
254-
255-
#endif
16+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)