@@ -153,6 +153,154 @@ void test_pmdmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector uns
153
153
* ((__dmr1024 * )resp ) = vdmr ;
154
154
}
155
155
156
+ // CHECK-LABEL: void @test_dmxvf16gerx2(
157
+ // CHECK-NEXT: [[ENTRY:.*:]]
158
+ // CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
159
+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
160
+ // CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
161
+ // CHECK-NEXT: ret void
162
+ //
163
+ void test_dmxvf16gerx2 (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
164
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
165
+ __vector_pair vp = * ((__vector_pair * )vpp );
166
+ __builtin_mma_dmxvf16gerx2 (& vdmr , vp , vc );
167
+ * ((__dmr1024 * )resp ) = vdmr ;
168
+ }
169
+
170
+ // CHECK-LABEL: void @test_dmxvf16gerx2nn(
171
+ // CHECK-NEXT: [[ENTRY:.*:]]
172
+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
173
+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
174
+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
175
+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
176
+ // CHECK-NEXT: ret void
177
+ //
178
+ void test_dmxvf16gerx2nn (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
179
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
180
+ __vector_pair vp = * ((__vector_pair * )vpp );
181
+ __builtin_mma_dmxvf16gerx2nn (& vdmr , vp , vc );
182
+ * ((__dmr1024 * )resp ) = vdmr ;
183
+ }
184
+
185
+ // CHECK-LABEL: void @test_dmxvf16gerx2np(
186
+ // CHECK-NEXT: [[ENTRY:.*:]]
187
+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
188
+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
189
+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
190
+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
191
+ // CHECK-NEXT: ret void
192
+ //
193
+ void test_dmxvf16gerx2np (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
194
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
195
+ __vector_pair vp = * ((__vector_pair * )vpp );
196
+ __builtin_mma_dmxvf16gerx2np (& vdmr , vp , vc );
197
+ * ((__dmr1024 * )resp ) = vdmr ;
198
+ }
199
+
200
+ // CHECK-LABEL: void @test_dmxvf16gerx2pn(
201
+ // CHECK-NEXT: [[ENTRY:.*:]]
202
+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
203
+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
204
+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
205
+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
206
+ // CHECK-NEXT: ret void
207
+ //
208
+ void test_dmxvf16gerx2pn (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
209
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
210
+ __vector_pair vp = * ((__vector_pair * )vpp );
211
+ __builtin_mma_dmxvf16gerx2pn (& vdmr , vp , vc );
212
+ * ((__dmr1024 * )resp ) = vdmr ;
213
+ }
214
+
215
+ // CHECK-LABEL: void @test_dmxvf16gerx2pp(
216
+ // CHECK-NEXT: [[ENTRY:.*:]]
217
+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
218
+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
219
+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
220
+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
221
+ // CHECK-NEXT: ret void
222
+ //
223
+ void test_dmxvf16gerx2pp (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
224
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
225
+ __vector_pair vp = * ((__vector_pair * )vpp );
226
+ __builtin_mma_dmxvf16gerx2pp (& vdmr , vp , vc );
227
+ * ((__dmr1024 * )resp ) = vdmr ;
228
+ }
229
+
230
+ // CHECK-LABEL: void @test_pmdmxvf16gerx2(
231
+ // CHECK-NEXT: [[ENTRY:.*:]]
232
+ // CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
233
+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
234
+ // CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
235
+ // CHECK-NEXT: ret void
236
+ //
237
+ void test_pmdmxvf16gerx2 (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
238
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
239
+ __vector_pair vp = * ((__vector_pair * )vpp );
240
+ __builtin_mma_pmdmxvf16gerx2 (& vdmr , vp , vc , 0 , 0 , 0 );
241
+ * ((__dmr1024 * )resp ) = vdmr ;
242
+ }
243
+
244
+ // CHECK-LABEL: void @test_pmdmxvf16gerx2nn(
245
+ // CHECK-NEXT: [[ENTRY:.*:]]
246
+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
247
+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
248
+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
249
+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
250
+ // CHECK-NEXT: ret void
251
+ //
252
+ void test_pmdmxvf16gerx2nn (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
253
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
254
+ __vector_pair vp = * ((__vector_pair * )vpp );
255
+ __builtin_mma_pmdmxvf16gerx2nn (& vdmr , vp , vc , 0 , 0 , 0 );
256
+ * ((__dmr1024 * )resp ) = vdmr ;
257
+ }
258
+
259
+ // CHECK-LABEL: void @test_pmdmxvf16gerx2np(
260
+ // CHECK-NEXT: [[ENTRY:.*:]]
261
+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
262
+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
263
+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
264
+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
265
+ // CHECK-NEXT: ret void
266
+ //
267
+ void test_pmdmxvf16gerx2np (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
268
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
269
+ __vector_pair vp = * ((__vector_pair * )vpp );
270
+ __builtin_mma_pmdmxvf16gerx2np (& vdmr , vp , vc , 0 , 0 , 0 );
271
+ * ((__dmr1024 * )resp ) = vdmr ;
272
+ }
273
+
274
+ // CHECK-LABEL: void @test_pmdmxvf16gerx2pn(
275
+ // CHECK-NEXT: [[ENTRY:.*:]]
276
+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
277
+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
278
+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
279
+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
280
+ // CHECK-NEXT: ret void
281
+ //
282
+ void test_pmdmxvf16gerx2pn (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
283
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
284
+ __vector_pair vp = * ((__vector_pair * )vpp );
285
+ __builtin_mma_pmdmxvf16gerx2pn (& vdmr , vp , vc , 0 , 0 , 0 );
286
+ * ((__dmr1024 * )resp ) = vdmr ;
287
+ }
288
+
289
+ // CHECK-LABEL: void @test_pmdmxvf16gerx2pp(
290
+ // CHECK-NEXT: [[ENTRY:.*:]]
291
+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
292
+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
293
+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
294
+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
295
+ // CHECK-NEXT: ret void
296
+ //
297
+ void test_pmdmxvf16gerx2pp (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
298
+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
299
+ __vector_pair vp = * ((__vector_pair * )vpp );
300
+ __builtin_mma_pmdmxvf16gerx2pp (& vdmr , vp , vc , 0 , 0 , 0 );
301
+ * ((__dmr1024 * )resp ) = vdmr ;
302
+ }
303
+
156
304
// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
157
305
// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0}
158
306
// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
0 commit comments