|
14 | 14 |
|
15 | 15 | #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
|
16 | 16 | DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
|
17 |
| - return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \ |
| 17 | + return (RET_TYPE##2)(FUNCTION(x.s0), FUNCTION(x.s1)); \ |
18 | 18 | } \
|
19 | 19 | \
|
20 | 20 | DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
|
21 |
| - return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \ |
| 21 | + return (RET_TYPE##3)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)); \ |
22 | 22 | } \
|
23 | 23 | \
|
24 | 24 | DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
|
25 |
| - return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \ |
| 25 | + return (RET_TYPE##4)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \ |
| 26 | + FUNCTION(x.s3)); \ |
26 | 27 | } \
|
27 | 28 | \
|
28 | 29 | DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
|
29 |
| - return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \ |
| 30 | + return (RET_TYPE##8)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \ |
| 31 | + FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \ |
| 32 | + FUNCTION(x.s6), FUNCTION(x.s7)); \ |
30 | 33 | } \
|
31 | 34 | \
|
32 | 35 | DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
|
33 |
| - return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \ |
| 36 | + return (RET_TYPE##16)( \ |
| 37 | + FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \ |
| 38 | + FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7), \ |
| 39 | + FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \ |
| 40 | + FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf)); \ |
34 | 41 | }
|
35 | 42 |
|
36 | 43 | #define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
|
37 | 44 | ARG2_TYPE) \
|
38 | 45 | DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
|
39 |
| - return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \ |
| 46 | + return (RET_TYPE##2)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1)); \ |
40 | 47 | } \
|
41 | 48 | \
|
42 | 49 | DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
|
43 |
| - return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \ |
44 |
| - FUNCTION(x.z, y.z)); \ |
| 50 | + return (RET_TYPE##3)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ |
| 51 | + FUNCTION(x.s2, y.s2)); \ |
45 | 52 | } \
|
46 | 53 | \
|
47 | 54 | DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
|
48 |
| - return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ |
| 55 | + return (RET_TYPE##4)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ |
| 56 | + FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3)); \ |
49 | 57 | } \
|
50 | 58 | \
|
51 | 59 | DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
|
52 |
| - return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ |
| 60 | + return (RET_TYPE##8)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \ |
| 61 | + FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \ |
| 62 | + FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \ |
| 63 | + FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7)); \ |
53 | 64 | } \
|
54 | 65 | \
|
55 | 66 | DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
|
56 |
| - return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \ |
| 67 | + return (RET_TYPE##16)( \ |
| 68 | + FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), \ |
| 69 | + FUNCTION(x.s3, y.s3), FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \ |
| 70 | + FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), FUNCTION(x.s8, y.s8), \ |
| 71 | + FUNCTION(x.s9, y.s9), FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \ |
| 72 | + FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), FUNCTION(x.se, y.se), \ |
| 73 | + FUNCTION(x.sf, y.sf)); \ |
57 | 74 | }
|
58 | 75 |
|
59 | 76 | #define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
|
60 | 77 | ARG2_TYPE) \
|
61 | 78 | DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \
|
62 |
| - return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| 79 | + return (RET_TYPE##2)(FUNCTION(x, y.s0), FUNCTION(x, y.s1)); \ |
63 | 80 | } \
|
64 | 81 | \
|
65 | 82 | DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) { \
|
66 |
| - return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y), \ |
67 |
| - FUNCTION(x, y.z)); \ |
| 83 | + return (RET_TYPE##3)(FUNCTION(x, y.s0), FUNCTION(x, y.s1), \ |
| 84 | + FUNCTION(x, y.s2)); \ |
68 | 85 | } \
|
69 | 86 | \
|
70 | 87 | DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) { \
|
71 |
| - return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| 88 | + return (RET_TYPE##4)(FUNCTION(x, y.s0), FUNCTION(x, y.s1), \ |
| 89 | + FUNCTION(x, y.s2), FUNCTION(x, y.s3)); \ |
72 | 90 | } \
|
73 | 91 | \
|
74 | 92 | DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) { \
|
75 |
| - return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| 93 | + return (RET_TYPE##8)(FUNCTION(x, y.s0), FUNCTION(x, y.s1), \ |
| 94 | + FUNCTION(x, y.s2), FUNCTION(x, y.s3), \ |
| 95 | + FUNCTION(x, y.s4), FUNCTION(x, y.s5), \ |
| 96 | + FUNCTION(x, y.s6), FUNCTION(x, y.s7)); \ |
76 | 97 | } \
|
77 | 98 | \
|
78 | 99 | DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) { \
|
79 |
| - return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \ |
| 100 | + return (RET_TYPE##16)( \ |
| 101 | + FUNCTION(x, y.s0), FUNCTION(x, y.s1), FUNCTION(x, y.s2), \ |
| 102 | + FUNCTION(x, y.s3), FUNCTION(x, y.s4), FUNCTION(x, y.s5), \ |
| 103 | + FUNCTION(x, y.s6), FUNCTION(x, y.s7), FUNCTION(x, y.s8), \ |
| 104 | + FUNCTION(x, y.s9), FUNCTION(x, y.sa), FUNCTION(x, y.sb), \ |
| 105 | + FUNCTION(x, y.sc), FUNCTION(x, y.sd), FUNCTION(x, y.se), \ |
| 106 | + FUNCTION(x, y.sf)); \ |
80 | 107 | }
|
81 | 108 |
|
82 | 109 | #define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
|
83 | 110 | ARG2_TYPE, ARG3_TYPE) \
|
84 | 111 | DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \
|
85 | 112 | ARG3_TYPE##2 z) { \
|
86 |
| - return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \ |
| 113 | + return (RET_TYPE##2)(FUNCTION(x.s0, y.s0, z.s0), \ |
| 114 | + FUNCTION(x.s1, y.s1, z.s1)); \ |
87 | 115 | } \
|
88 | 116 | \
|
89 | 117 | DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, \
|
90 | 118 | ARG3_TYPE##3 z) { \
|
91 |
| - return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \ |
92 |
| - FUNCTION(x.z, y.z, z.z)); \ |
| 119 | + return (RET_TYPE##3)(FUNCTION(x.s0, y.s0, z.s0), \ |
| 120 | + FUNCTION(x.s1, y.s1, z.s1), \ |
| 121 | + FUNCTION(x.s2, y.s2, z.s2)); \ |
93 | 122 | } \
|
94 | 123 | \
|
95 | 124 | DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, \
|
96 | 125 | ARG3_TYPE##4 z) { \
|
97 |
| - return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), \ |
98 |
| - FUNCTION(x.hi, y.hi, z.hi)); \ |
| 126 | + return (RET_TYPE##4)( \ |
| 127 | + FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1), \ |
| 128 | + FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3)); \ |
99 | 129 | } \
|
100 | 130 | \
|
101 | 131 | DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, \
|
102 | 132 | ARG3_TYPE##8 z) { \
|
103 |
| - return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), \ |
104 |
| - FUNCTION(x.hi, y.hi, z.hi)); \ |
| 133 | + return (RET_TYPE##8)( \ |
| 134 | + FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1), \ |
| 135 | + FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3), \ |
| 136 | + FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5), \ |
| 137 | + FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7)); \ |
105 | 138 | } \
|
106 | 139 | \
|
107 | 140 | DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, \
|
108 | 141 | ARG3_TYPE##16 z) { \
|
109 |
| - return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), \ |
110 |
| - FUNCTION(x.hi, y.hi, z.hi)); \ |
| 142 | + return (RET_TYPE##16)( \ |
| 143 | + FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1), \ |
| 144 | + FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3), \ |
| 145 | + FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5), \ |
| 146 | + FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7), \ |
| 147 | + FUNCTION(x.s8, y.s8, z.s8), FUNCTION(x.s9, y.s9, z.s9), \ |
| 148 | + FUNCTION(x.sa, y.sa, z.sa), FUNCTION(x.sb, y.sb, z.sb), \ |
| 149 | + FUNCTION(x.sc, y.sc, z.sc), FUNCTION(x.sd, y.sd, z.sd), \ |
| 150 | + FUNCTION(x.se, y.se, z.se), FUNCTION(x.sf, y.sf, z.sf)); \ |
111 | 151 | }
|
112 | 152 |
|
113 | 153 | #define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
|
114 | 154 | ADDR_SPACE, ARG2_TYPE) \
|
115 | 155 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
|
116 | 156 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
|
117 | 157 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
|
118 |
| - return (__CLC_XCONCAT(RET_TYPE, 2))( \ |
119 |
| - FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ |
120 |
| - FUNCTION(x.y, \ |
121 |
| - (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1))); \ |
| 158 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
| 159 | + return (__CLC_XCONCAT(RET_TYPE, 2))(FUNCTION(x.s0, ptr), \ |
| 160 | + FUNCTION(x.s1, ptr + 1)); \ |
122 | 161 | } \
|
123 | 162 | \
|
124 | 163 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
|
125 | 164 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
|
126 | 165 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
|
127 |
| - return (__CLC_XCONCAT(RET_TYPE, 3))( \ |
128 |
| - FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \ |
129 |
| - FUNCTION(x.y, \ |
130 |
| - (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)), \ |
131 |
| - FUNCTION(x.z, \ |
132 |
| - (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ |
| 166 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
| 167 | + return (__CLC_XCONCAT(RET_TYPE, 3))(FUNCTION(x.s0, ptr), \ |
| 168 | + FUNCTION(x.s1, ptr + 1), \ |
| 169 | + FUNCTION(x.s2, ptr + 2)); \ |
133 | 170 | } \
|
134 | 171 | \
|
135 | 172 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
|
136 | 173 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
|
137 | 174 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
|
| 175 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
138 | 176 | return (__CLC_XCONCAT(RET_TYPE, 4))( \
|
139 |
| - FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y), \ |
140 |
| - FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ |
141 |
| - ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \ |
| 177 | + FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \ |
| 178 | + FUNCTION(x.s3, ptr + 3)); \ |
142 | 179 | } \
|
143 | 180 | \
|
144 | 181 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
|
145 | 182 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
|
146 | 183 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
|
| 184 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
147 | 185 | return (__CLC_XCONCAT(RET_TYPE, 8))( \
|
148 |
| - FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y), \ |
149 |
| - FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ |
150 |
| - ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4))); \ |
| 186 | + FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \ |
| 187 | + FUNCTION(x.s3, ptr + 3), FUNCTION(x.s4, ptr + 4), \ |
| 188 | + FUNCTION(x.s5, ptr + 5), FUNCTION(x.s6, ptr + 6), \ |
| 189 | + FUNCTION(x.s7, ptr + 7)); \ |
151 | 190 | } \
|
152 | 191 | \
|
153 | 192 | DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
|
154 | 193 | FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
|
155 | 194 | ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
|
| 195 | + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ |
156 | 196 | return (__CLC_XCONCAT(RET_TYPE, 16))( \
|
157 |
| - FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y), \ |
158 |
| - FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \ |
159 |
| - ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8))); \ |
| 197 | + FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \ |
| 198 | + FUNCTION(x.s3, ptr + 3), FUNCTION(x.s4, ptr + 4), \ |
| 199 | + FUNCTION(x.s5, ptr + 5), FUNCTION(x.s6, ptr + 6), \ |
| 200 | + FUNCTION(x.s7, ptr + 7), FUNCTION(x.s8, ptr + 8), \ |
| 201 | + FUNCTION(x.s9, ptr + 9), FUNCTION(x.sa, ptr + 10), \ |
| 202 | + FUNCTION(x.sb, ptr + 11), FUNCTION(x.sc, ptr + 12), \ |
| 203 | + FUNCTION(x.sd, ptr + 13), FUNCTION(x.se, ptr + 14), \ |
| 204 | + FUNCTION(x.sf, ptr + 15)); \ |
160 | 205 | }
|
161 | 206 |
|
162 | 207 | #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, \
|
|
0 commit comments