Skip to content

Commit 299a278

Browse files
authored
[libclc] Improving vector code generated from scalar code (llvm#140008)
The previous method splits vector data into two halves. shuffle_vector concatenates the two results into a vector data of original size. This PR eliminates the use of shuffle_vector.
1 parent 22576e2 commit 299a278

File tree

2 files changed

+103
-56
lines changed

2 files changed

+103
-56
lines changed

libclc/clc/include/clc/clcmacro.h

Lines changed: 90 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -14,149 +14,194 @@
1414

1515
#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
1616
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
17-
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
17+
return (RET_TYPE##2)(FUNCTION(x.s0), FUNCTION(x.s1)); \
1818
} \
1919
\
2020
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
21-
return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
21+
return (RET_TYPE##3)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)); \
2222
} \
2323
\
2424
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
25-
return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \
25+
return (RET_TYPE##4)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \
26+
FUNCTION(x.s3)); \
2627
} \
2728
\
2829
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
29-
return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \
30+
return (RET_TYPE##8)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), \
31+
FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5), \
32+
FUNCTION(x.s6), FUNCTION(x.s7)); \
3033
} \
3134
\
3235
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
33-
return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
36+
return (RET_TYPE##16)( \
37+
FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \
38+
FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7), \
39+
FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \
40+
FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf)); \
3441
}
3542

3643
#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
3744
ARG2_TYPE) \
3845
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
39-
return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
46+
return (RET_TYPE##2)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1)); \
4047
} \
4148
\
4249
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
43-
return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
44-
FUNCTION(x.z, y.z)); \
50+
return (RET_TYPE##3)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
51+
FUNCTION(x.s2, y.s2)); \
4552
} \
4653
\
4754
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
48-
return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
55+
return (RET_TYPE##4)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
56+
FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3)); \
4957
} \
5058
\
5159
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
52-
return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
60+
return (RET_TYPE##8)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
61+
FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
62+
FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
63+
FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7)); \
5364
} \
5465
\
5566
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
56-
return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
67+
return (RET_TYPE##16)( \
68+
FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2), \
69+
FUNCTION(x.s3, y.s3), FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
70+
FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), FUNCTION(x.s8, y.s8), \
71+
FUNCTION(x.s9, y.s9), FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \
72+
FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), FUNCTION(x.se, y.se), \
73+
FUNCTION(x.sf, y.sf)); \
5774
}
5875

5976
#define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
6077
ARG2_TYPE) \
6178
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \
62-
return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
79+
return (RET_TYPE##2)(FUNCTION(x, y.s0), FUNCTION(x, y.s1)); \
6380
} \
6481
\
6582
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) { \
66-
return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y), \
67-
FUNCTION(x, y.z)); \
83+
return (RET_TYPE##3)(FUNCTION(x, y.s0), FUNCTION(x, y.s1), \
84+
FUNCTION(x, y.s2)); \
6885
} \
6986
\
7087
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) { \
71-
return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
88+
return (RET_TYPE##4)(FUNCTION(x, y.s0), FUNCTION(x, y.s1), \
89+
FUNCTION(x, y.s2), FUNCTION(x, y.s3)); \
7290
} \
7391
\
7492
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) { \
75-
return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
93+
return (RET_TYPE##8)(FUNCTION(x, y.s0), FUNCTION(x, y.s1), \
94+
FUNCTION(x, y.s2), FUNCTION(x, y.s3), \
95+
FUNCTION(x, y.s4), FUNCTION(x, y.s5), \
96+
FUNCTION(x, y.s6), FUNCTION(x, y.s7)); \
7697
} \
7798
\
7899
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) { \
79-
return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
100+
return (RET_TYPE##16)( \
101+
FUNCTION(x, y.s0), FUNCTION(x, y.s1), FUNCTION(x, y.s2), \
102+
FUNCTION(x, y.s3), FUNCTION(x, y.s4), FUNCTION(x, y.s5), \
103+
FUNCTION(x, y.s6), FUNCTION(x, y.s7), FUNCTION(x, y.s8), \
104+
FUNCTION(x, y.s9), FUNCTION(x, y.sa), FUNCTION(x, y.sb), \
105+
FUNCTION(x, y.sc), FUNCTION(x, y.sd), FUNCTION(x, y.se), \
106+
FUNCTION(x, y.sf)); \
80107
}
81108

82109
#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
83110
ARG2_TYPE, ARG3_TYPE) \
84111
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \
85112
ARG3_TYPE##2 z) { \
86-
return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
113+
return (RET_TYPE##2)(FUNCTION(x.s0, y.s0, z.s0), \
114+
FUNCTION(x.s1, y.s1, z.s1)); \
87115
} \
88116
\
89117
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, \
90118
ARG3_TYPE##3 z) { \
91-
return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
92-
FUNCTION(x.z, y.z, z.z)); \
119+
return (RET_TYPE##3)(FUNCTION(x.s0, y.s0, z.s0), \
120+
FUNCTION(x.s1, y.s1, z.s1), \
121+
FUNCTION(x.s2, y.s2, z.s2)); \
93122
} \
94123
\
95124
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, \
96125
ARG3_TYPE##4 z) { \
97-
return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), \
98-
FUNCTION(x.hi, y.hi, z.hi)); \
126+
return (RET_TYPE##4)( \
127+
FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1), \
128+
FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3)); \
99129
} \
100130
\
101131
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, \
102132
ARG3_TYPE##8 z) { \
103-
return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), \
104-
FUNCTION(x.hi, y.hi, z.hi)); \
133+
return (RET_TYPE##8)( \
134+
FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1), \
135+
FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3), \
136+
FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5), \
137+
FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7)); \
105138
} \
106139
\
107140
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, \
108141
ARG3_TYPE##16 z) { \
109-
return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), \
110-
FUNCTION(x.hi, y.hi, z.hi)); \
142+
return (RET_TYPE##16)( \
143+
FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1), \
144+
FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3), \
145+
FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5), \
146+
FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7), \
147+
FUNCTION(x.s8, y.s8, z.s8), FUNCTION(x.s9, y.s9, z.s9), \
148+
FUNCTION(x.sa, y.sa, z.sa), FUNCTION(x.sb, y.sb, z.sb), \
149+
FUNCTION(x.sc, y.sc, z.sc), FUNCTION(x.sd, y.sd, z.sd), \
150+
FUNCTION(x.se, y.se, z.se), FUNCTION(x.sf, y.sf, z.sf)); \
111151
}
112152

113153
#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
114154
ADDR_SPACE, ARG2_TYPE) \
115155
DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
116156
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
117157
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
118-
return (__CLC_XCONCAT(RET_TYPE, 2))( \
119-
FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \
120-
FUNCTION(x.y, \
121-
(ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1))); \
158+
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
159+
return (__CLC_XCONCAT(RET_TYPE, 2))(FUNCTION(x.s0, ptr), \
160+
FUNCTION(x.s1, ptr + 1)); \
122161
} \
123162
\
124163
DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
125164
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
126165
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
127-
return (__CLC_XCONCAT(RET_TYPE, 3))( \
128-
FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y), \
129-
FUNCTION(x.y, \
130-
(ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)), \
131-
FUNCTION(x.z, \
132-
(ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \
166+
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
167+
return (__CLC_XCONCAT(RET_TYPE, 3))(FUNCTION(x.s0, ptr), \
168+
FUNCTION(x.s1, ptr + 1), \
169+
FUNCTION(x.s2, ptr + 2)); \
133170
} \
134171
\
135172
DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
136173
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
137174
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
175+
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
138176
return (__CLC_XCONCAT(RET_TYPE, 4))( \
139-
FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y), \
140-
FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
141-
ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2))); \
177+
FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \
178+
FUNCTION(x.s3, ptr + 3)); \
142179
} \
143180
\
144181
DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
145182
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
146183
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
184+
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
147185
return (__CLC_XCONCAT(RET_TYPE, 8))( \
148-
FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y), \
149-
FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
150-
ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4))); \
186+
FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \
187+
FUNCTION(x.s3, ptr + 3), FUNCTION(x.s4, ptr + 4), \
188+
FUNCTION(x.s5, ptr + 5), FUNCTION(x.s6, ptr + 6), \
189+
FUNCTION(x.s7, ptr + 7)); \
151190
} \
152191
\
153192
DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
154193
FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
155194
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
195+
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
156196
return (__CLC_XCONCAT(RET_TYPE, 16))( \
157-
FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y), \
158-
FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT( \
159-
ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8))); \
197+
FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \
198+
FUNCTION(x.s3, ptr + 3), FUNCTION(x.s4, ptr + 4), \
199+
FUNCTION(x.s5, ptr + 5), FUNCTION(x.s6, ptr + 6), \
200+
FUNCTION(x.s7, ptr + 7), FUNCTION(x.s8, ptr + 8), \
201+
FUNCTION(x.s9, ptr + 9), FUNCTION(x.sa, ptr + 10), \
202+
FUNCTION(x.sb, ptr + 11), FUNCTION(x.sc, ptr + 12), \
203+
FUNCTION(x.sd, ptr + 13), FUNCTION(x.se, ptr + 14), \
204+
FUNCTION(x.sf, ptr + 15)); \
160205
}
161206

162207
#define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, \

0 commit comments

Comments
 (0)