Skip to content

Commit 1485d5f

Browse files
committed
add dmf vsx vector 16-bit floating point builtin
1 parent 731df58 commit 1485d5f

File tree

3 files changed

+184
-6
lines changed

3 files changed

+184
-6
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,10 @@ UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvbf16gerx2, "vW1024*W256V",
10991099
"mma,paired-vector-memops")
11001100
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvbf16gerx2, "vW1024*W256Vi255i15i3",
11011101
"mma,paired-vector-memops")
1102+
UNALIASED_CUSTOM_MMA_BUILTIN(mma_dmxvf16gerx2, "vW1024*W256V",
1103+
"mma,paired-vector-memops")
1104+
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmdmxvf16gerx2, "vW1024*W256Vi255i15i3",
1105+
"mma,paired-vector-memops")
11021106

11031107

11041108
// FIXME: Obviously incomplete.

clang/test/CodeGen/PowerPC/builtins-dmf-vsx-vector-float.c

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,154 @@ void test_pmdmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector uns
153153
*((__dmr1024 *)resp) = vdmr;
154154
}
155155

156+
// CHECK-LABEL: void @test_dmxvf16gerx2(
157+
// CHECK-NEXT: [[ENTRY:.*:]]
158+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
159+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
160+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
161+
// CHECK-NEXT: ret void
162+
//
163+
void test_dmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
164+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
165+
__vector_pair vp = *((__vector_pair *)vpp);
166+
__builtin_mma_dmxvf16gerx2(&vdmr, vp, vc);
167+
*((__dmr1024 *)resp) = vdmr;
168+
}
169+
170+
// CHECK-LABEL: void @test_dmxvf16gerx2nn(
171+
// CHECK-NEXT: [[ENTRY:.*:]]
172+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
173+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
174+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
175+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
176+
// CHECK-NEXT: ret void
177+
//
178+
void test_dmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
179+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
180+
__vector_pair vp = *((__vector_pair *)vpp);
181+
__builtin_mma_dmxvf16gerx2nn(&vdmr, vp, vc);
182+
*((__dmr1024 *)resp) = vdmr;
183+
}
184+
185+
// CHECK-LABEL: void @test_dmxvf16gerx2np(
186+
// CHECK-NEXT: [[ENTRY:.*:]]
187+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
188+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
189+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
190+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
191+
// CHECK-NEXT: ret void
192+
//
193+
void test_dmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
194+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
195+
__vector_pair vp = *((__vector_pair *)vpp);
196+
__builtin_mma_dmxvf16gerx2np(&vdmr, vp, vc);
197+
*((__dmr1024 *)resp) = vdmr;
198+
}
199+
200+
// CHECK-LABEL: void @test_dmxvf16gerx2pn(
201+
// CHECK-NEXT: [[ENTRY:.*:]]
202+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
203+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
204+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
205+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
206+
// CHECK-NEXT: ret void
207+
//
208+
void test_dmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
209+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
210+
__vector_pair vp = *((__vector_pair *)vpp);
211+
__builtin_mma_dmxvf16gerx2pn(&vdmr, vp, vc);
212+
*((__dmr1024 *)resp) = vdmr;
213+
}
214+
215+
// CHECK-LABEL: void @test_dmxvf16gerx2pp(
216+
// CHECK-NEXT: [[ENTRY:.*:]]
217+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
218+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
219+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
220+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
221+
// CHECK-NEXT: ret void
222+
//
223+
void test_dmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
224+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
225+
__vector_pair vp = *((__vector_pair *)vpp);
226+
__builtin_mma_dmxvf16gerx2pp(&vdmr, vp, vc);
227+
*((__dmr1024 *)resp) = vdmr;
228+
}
229+
230+
// CHECK-LABEL: void @test_pmdmxvf16gerx2(
231+
// CHECK-NEXT: [[ENTRY:.*:]]
232+
// CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
233+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
234+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
235+
// CHECK-NEXT: ret void
236+
//
237+
void test_pmdmxvf16gerx2(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
238+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
239+
__vector_pair vp = *((__vector_pair *)vpp);
240+
__builtin_mma_pmdmxvf16gerx2(&vdmr, vp, vc, 0, 0, 0);
241+
*((__dmr1024 *)resp) = vdmr;
242+
}
243+
244+
// CHECK-LABEL: void @test_pmdmxvf16gerx2nn(
245+
// CHECK-NEXT: [[ENTRY:.*:]]
246+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
247+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
248+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
249+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
250+
// CHECK-NEXT: ret void
251+
//
252+
void test_pmdmxvf16gerx2nn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
253+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
254+
__vector_pair vp = *((__vector_pair *)vpp);
255+
__builtin_mma_pmdmxvf16gerx2nn(&vdmr, vp, vc, 0, 0, 0);
256+
*((__dmr1024 *)resp) = vdmr;
257+
}
258+
259+
// CHECK-LABEL: void @test_pmdmxvf16gerx2np(
260+
// CHECK-NEXT: [[ENTRY:.*:]]
261+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
262+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
263+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
264+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
265+
// CHECK-NEXT: ret void
266+
//
267+
void test_pmdmxvf16gerx2np(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
268+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
269+
__vector_pair vp = *((__vector_pair *)vpp);
270+
__builtin_mma_pmdmxvf16gerx2np(&vdmr, vp, vc, 0, 0, 0);
271+
*((__dmr1024 *)resp) = vdmr;
272+
}
273+
274+
// CHECK-LABEL: void @test_pmdmxvf16gerx2pn(
275+
// CHECK-NEXT: [[ENTRY:.*:]]
276+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
277+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
278+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
279+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
280+
// CHECK-NEXT: ret void
281+
//
282+
void test_pmdmxvf16gerx2pn(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
283+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
284+
__vector_pair vp = *((__vector_pair *)vpp);
285+
__builtin_mma_pmdmxvf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
286+
*((__dmr1024 *)resp) = vdmr;
287+
}
288+
289+
// CHECK-LABEL: void @test_pmdmxvf16gerx2pp(
290+
// CHECK-NEXT: [[ENTRY:.*:]]
291+
// CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
292+
// CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
293+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
294+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
295+
// CHECK-NEXT: ret void
296+
//
297+
void test_pmdmxvf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
298+
__dmr1024 vdmr = *((__dmr1024 *)vdmrp);
299+
__vector_pair vp = *((__vector_pair *)vpp);
300+
__builtin_mma_pmdmxvf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
301+
*((__dmr1024 *)resp) = vdmr;
302+
}
303+
156304
// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
157305
// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0}
158306
// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}

clang/test/CodeGen/PowerPC/ppc-future-mma-builtin-err.c

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,15 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
1212
__builtin_mma_dmxvi8gerx4spp(&vdmr, vp, vc);
1313
__builtin_mma_pmdmxvi8gerx4spp(&vdmr, vp, vc, 0, 0, 0);
1414

15+
// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
16+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
17+
// CHECK: error: '__builtin_mma_dmxvi8gerx4pp' needs target feature mma,paired-vector-memops
18+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4pp' needs target feature mma,paired-vector-memops
19+
// CHECK: error: '__builtin_mma_dmxvi8gerx4spp' needs target feature mma,paired-vector-memops
20+
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4spp' needs target feature mma,paired-vector-memops
21+
1522
// DMF VSX Vector bfloat16 GER 2x builtins.
23+
1624
__builtin_mma_dmxvbf16gerx2(&vdmr, vp, vc);
1725
__builtin_mma_dmxvbf16gerx2nn(&vdmr, vp, vc);
1826
__builtin_mma_dmxvbf16gerx2np(&vdmr, vp, vc);
@@ -24,12 +32,6 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
2432
__builtin_mma_pmdmxvbf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
2533
__builtin_mma_pmdmxvbf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
2634

27-
// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
28-
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
29-
// CHECK: error: '__builtin_mma_dmxvi8gerx4pp' needs target feature mma,paired-vector-memops
30-
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4pp' needs target feature mma,paired-vector-memops
31-
// CHECK: error: '__builtin_mma_dmxvi8gerx4spp' needs target feature mma,paired-vector-memops
32-
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4spp' needs target feature mma,paired-vector-memops
3335
// CHECK: error: '__builtin_mma_dmxvbf16gerx2' needs target feature mma,paired-vector-memops
3436
// CHECK: error: '__builtin_mma_dmxvbf16gerx2nn' needs target feature mma,paired-vector-memops
3537
// CHECK: error: '__builtin_mma_dmxvbf16gerx2np' needs target feature mma,paired-vector-memops
@@ -40,4 +42,28 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
4042
// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2np' needs target feature mma,paired-vector-memops
4143
// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2pn' needs target feature mma,paired-vector-memops
4244
// CHECK: error: '__builtin_mma_pmdmxvbf16gerx2pp' needs target feature mma,paired-vector-memops
45+
46+
// DMF VSX Vector 16-bitFloating-point GER 2x builtins.
47+
48+
__builtin_mma_dmxvf16gerx2(&vdmr, vp, vc);
49+
__builtin_mma_dmxvf16gerx2nn(&vdmr, vp, vc);
50+
__builtin_mma_dmxvf16gerx2np(&vdmr, vp, vc);
51+
__builtin_mma_dmxvf16gerx2pn(&vdmr, vp, vc);
52+
__builtin_mma_dmxvf16gerx2pp(&vdmr, vp, vc);
53+
__builtin_mma_pmdmxvf16gerx2(&vdmr, vp, vc, 0, 0, 0);
54+
__builtin_mma_pmdmxvf16gerx2nn(&vdmr, vp, vc, 0, 0, 0);
55+
__builtin_mma_pmdmxvf16gerx2np(&vdmr, vp, vc, 0, 0, 0);
56+
__builtin_mma_pmdmxvf16gerx2pn(&vdmr, vp, vc, 0, 0, 0);
57+
__builtin_mma_pmdmxvf16gerx2pp(&vdmr, vp, vc, 0, 0, 0);
58+
59+
// CHECK: error: '__builtin_mma_dmxvf16gerx2' needs target feature mma,paired-vector-memops
60+
// CHECK: error: '__builtin_mma_dmxvf16gerx2nn' needs target feature mma,paired-vector-memops
61+
// CHECK: error: '__builtin_mma_dmxvf16gerx2np' needs target feature mma,paired-vector-memops
62+
// CHECK: error: '__builtin_mma_dmxvf16gerx2pn' needs target feature mma,paired-vector-memops
63+
// CHECK: error: '__builtin_mma_dmxvf16gerx2pp' needs target feature mma,paired-vector-memops
64+
// CHECK: error: '__builtin_mma_pmdmxvf16gerx2' needs target feature mma,paired-vector-memops
65+
// CHECK: error: '__builtin_mma_pmdmxvf16gerx2nn' needs target feature mma,paired-vector-memops
66+
// CHECK: error: '__builtin_mma_pmdmxvf16gerx2np' needs target feature mma,paired-vector-memops
67+
// CHECK: error: '__builtin_mma_pmdmxvf16gerx2pn' needs target feature mma,paired-vector-memops
68+
// CHECK: error: '__builtin_mma_pmdmxvf16gerx2pp' needs target feature mma,paired-vector-memops
4369
}

0 commit comments

Comments
 (0)