Skip to content

Commit 77fd12a

Browse files
committed
[AArch64] Add aarch64_neon_vcmla{_rot{90,180,270}} intrinsics.
Add builtins required to implement vcmla and rotated variants from the ACLE Reviewed By: t.p.northover Differential Revision: https://reviews.llvm.org/D92929
1 parent 7c59614 commit 77fd12a

File tree

3 files changed

+229
-0
lines changed

3 files changed

+229
-0
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,11 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
497497
// v8.3-A Floating-point complex add
498498
def int_aarch64_neon_vcadd_rot90 : AdvSIMD_2VectorArg_Intrinsic;
499499
def int_aarch64_neon_vcadd_rot270 : AdvSIMD_2VectorArg_Intrinsic;
500+
501+
def int_aarch64_neon_vcmla_rot0 : AdvSIMD_3VectorArg_Intrinsic;
502+
def int_aarch64_neon_vcmla_rot90 : AdvSIMD_3VectorArg_Intrinsic;
503+
def int_aarch64_neon_vcmla_rot180 : AdvSIMD_3VectorArg_Intrinsic;
504+
def int_aarch64_neon_vcmla_rot270 : AdvSIMD_3VectorArg_Intrinsic;
500505
}
501506

502507
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,27 @@ let Predicates = [HasComplxNum, HasNEON] in {
934934
}
935935
}
936936

937+
multiclass FCMLA_PATS<ValueType ty, RegisterClass Reg> {
938+
def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
939+
(!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
940+
def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
941+
(!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
942+
def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
943+
(!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
944+
def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
945+
(!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
946+
}
947+
948+
let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
949+
defm : FCMLA_PATS<v4f16, V64>;
950+
defm : FCMLA_PATS<v8f16, V128>;
951+
}
952+
let Predicates = [HasComplxNum, HasNEON] in {
953+
defm : FCMLA_PATS<v2f32, V64>;
954+
defm : FCMLA_PATS<v4f32, V128>;
955+
defm : FCMLA_PATS<v2f64, V128>;
956+
}
957+
937958
// v8.3a Pointer Authentication
938959
// These instructions inhabit part of the hint space and so can be used for
939960
// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+fullfp16 -o - | FileCheck %s
2+
3+
define <4 x half> @test_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
4+
entry:
5+
; CHECK-LABEL: test_16x4
6+
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #0
7+
;
8+
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
9+
ret <4 x half> %res
10+
}
11+
12+
13+
define <4 x half> @test_rot90_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
14+
entry:
15+
; CHECK-LABEL: test_rot90_16x4
16+
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #90
17+
;
18+
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
19+
ret <4 x half> %res
20+
}
21+
22+
define <4 x half> @test_rot180_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
23+
entry:
24+
; CHECK-LABEL: test_rot180_16x4
25+
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #180
26+
;
27+
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
28+
ret <4 x half> %res
29+
}
30+
31+
define <4 x half> @test_rot270_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
32+
entry:
33+
; CHECK-LABEL: test_rot270_16x4
34+
; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #270
35+
;
36+
%res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
37+
ret <4 x half> %res
38+
}
39+
40+
define <2 x float> @test_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
41+
entry:
42+
; CHECK-LABEL: test_32x2
43+
; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0
44+
;
45+
%res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
46+
ret <2 x float> %res
47+
}
48+
49+
define <2 x float> @test_rot90_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
50+
entry:
51+
; CHECK-LABEL: test_rot90_32x2
52+
; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #90
53+
;
54+
%res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
55+
ret <2 x float> %res
56+
}
57+
58+
define <2 x float> @test_rot180_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
59+
entry:
60+
; CHECK-LABEL: test_rot180_32x2
61+
; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #180
62+
;
63+
%res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
64+
ret <2 x float> %res
65+
}
66+
67+
define <2 x float> @test_rot270_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
68+
entry:
69+
; CHECK-LABEL: test_rot270_32x2
70+
; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #270
71+
;
72+
%res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
73+
ret <2 x float> %res
74+
}
75+
76+
define <8 x half> @test_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
77+
entry:
78+
; CHECK-LABEL: test_16x8
79+
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #0
80+
;
81+
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
82+
ret <8 x half> %res
83+
}
84+
85+
define <8 x half> @test_rot90_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
86+
entry:
87+
; CHECK-LABEL: test_rot90_16x8
88+
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #90
89+
;
90+
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
91+
ret <8 x half> %res
92+
}
93+
94+
define <8 x half> @test_rot180_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
95+
entry:
96+
; CHECK-LABEL: test_rot180_16x8
97+
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #180
98+
;
99+
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
100+
ret <8 x half> %res
101+
}
102+
103+
define <8 x half> @test_rot270_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
104+
entry:
105+
; CHECK-LABEL: test_rot270_16x8
106+
; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #270
107+
;
108+
%res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
109+
ret <8 x half> %res
110+
}
111+
112+
define <4 x float> @test_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
113+
entry:
114+
; CHECK-LABEL: test_32x4
115+
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #0
116+
;
117+
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
118+
ret <4 x float> %res
119+
}
120+
121+
define <4 x float> @test_rot90_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
122+
entry:
123+
; CHECK-LABEL: test_rot90_32x4
124+
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #90
125+
;
126+
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
127+
ret <4 x float> %res
128+
}
129+
130+
define <4 x float> @test_rot180_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
131+
entry:
132+
; CHECK-LABEL: test_rot180_32x4
133+
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #180
134+
;
135+
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
136+
ret <4 x float> %res
137+
}
138+
139+
define <4 x float> @test_rot270_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
140+
entry:
141+
; CHECK-LABEL: test_rot270_32x4
142+
; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #270
143+
;
144+
%res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
145+
ret <4 x float> %res
146+
}
147+
148+
define <2 x double> @test_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
149+
entry:
150+
; CHECK-LABEL: test_64x2
151+
; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #0
152+
;
153+
%res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
154+
ret <2 x double> %res
155+
}
156+
157+
define <2 x double> @test_rot90_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
158+
entry:
159+
; CHECK-LABEL: test_rot90_64x2
160+
; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #90
161+
;
162+
%res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
163+
ret <2 x double> %res
164+
}
165+
166+
define <2 x double> @test_rot180_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
167+
entry:
168+
; CHECK-LABEL: test_rot180_64x2
169+
; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #180
170+
;
171+
%res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
172+
ret <2 x double> %res
173+
}
174+
175+
define <2 x double> @test_rot270_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
176+
entry:
177+
; CHECK-LABEL: test_rot270_64x2
178+
; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #270
179+
;
180+
%res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
181+
ret <2 x double> %res
182+
}
183+
184+
declare <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half>, <4 x half>, <4 x half>)
185+
declare <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half>, <4 x half>, <4 x half>)
186+
declare <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half>, <4 x half>, <4 x half>)
187+
declare <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half>, <4 x half>, <4 x half>)
188+
declare <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half>, <8 x half>, <8 x half>)
189+
declare <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half>, <8 x half>, <8 x half>)
190+
declare <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half>, <8 x half>, <8 x half>)
191+
declare <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half>, <8 x half>, <8 x half>)
192+
declare <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float>, <2 x float>, <2 x float>)
193+
declare <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float>, <2 x float>, <2 x float>)
194+
declare <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float>, <2 x float>, <2 x float>)
195+
declare <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float>, <2 x float>, <2 x float>)
196+
declare <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float>, <4 x float>, <4 x float>)
197+
declare <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float>, <4 x float>, <4 x float>)
198+
declare <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float>, <4 x float>, <4 x float>)
199+
declare <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float>, <4 x float>, <4 x float>)
200+
declare <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double>, <2 x double>, <2 x double>)
201+
declare <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double>, <2 x double>, <2 x double>)
202+
declare <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double>, <2 x double>, <2 x double>)
203+
declare <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double>, <2 x double>, <2 x double>)

0 commit comments

Comments
 (0)