Skip to content

Commit c7a6b11

Browse files
committed
[ARM][AArch64] Add some extra shuffle conversion test coverage. NFC
This adds a big endian run line for the AArch64 TRN tests and regenerated the check lines, along with adding an extra MVE VMOVN case and regenerating vector-DAGCombine.ll for easier updating.
1 parent d134442 commit c7a6b11

File tree

3 files changed

+407
-72
lines changed

3 files changed

+407
-72
lines changed

llvm/test/CodeGen/AArch64/arm64-trn.ll

Lines changed: 211 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,26 @@
1-
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s --check-prefixes=CHECKLE
3+
; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s --check-prefixes=CHECKBE
24

35
define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4-
;CHECK-LABEL: vtrni8:
5-
;CHECK: trn1.8b
6-
;CHECK: trn2.8b
7-
;CHECK-NEXT: add.8b
6+
; CHECKLE-LABEL: vtrni8:
7+
; CHECKLE: // %bb.0:
8+
; CHECKLE-NEXT: ldr d0, [x0]
9+
; CHECKLE-NEXT: ldr d1, [x1]
10+
; CHECKLE-NEXT: trn1 v2.8b, v0.8b, v1.8b
11+
; CHECKLE-NEXT: trn2 v0.8b, v0.8b, v1.8b
12+
; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b
13+
; CHECKLE-NEXT: ret
14+
;
15+
; CHECKBE-LABEL: vtrni8:
16+
; CHECKBE: // %bb.0:
17+
; CHECKBE-NEXT: ld1 { v0.8b }, [x0]
18+
; CHECKBE-NEXT: ld1 { v1.8b }, [x1]
19+
; CHECKBE-NEXT: trn1 v2.8b, v0.8b, v1.8b
20+
; CHECKBE-NEXT: trn2 v0.8b, v0.8b, v1.8b
21+
; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b
22+
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
23+
; CHECKBE-NEXT: ret
824
%tmp1 = load <8 x i8>, <8 x i8>* %A
925
%tmp2 = load <8 x i8>, <8 x i8>* %B
1026
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -14,10 +30,24 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
1430
}
1531

1632
define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
17-
;CHECK-LABEL: vtrni16:
18-
;CHECK: trn1.4h
19-
;CHECK: trn2.4h
20-
;CHECK-NEXT: add.4h
33+
; CHECKLE-LABEL: vtrni16:
34+
; CHECKLE: // %bb.0:
35+
; CHECKLE-NEXT: ldr d0, [x0]
36+
; CHECKLE-NEXT: ldr d1, [x1]
37+
; CHECKLE-NEXT: trn1 v2.4h, v0.4h, v1.4h
38+
; CHECKLE-NEXT: trn2 v0.4h, v0.4h, v1.4h
39+
; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h
40+
; CHECKLE-NEXT: ret
41+
;
42+
; CHECKBE-LABEL: vtrni16:
43+
; CHECKBE: // %bb.0:
44+
; CHECKBE-NEXT: ld1 { v0.4h }, [x0]
45+
; CHECKBE-NEXT: ld1 { v1.4h }, [x1]
46+
; CHECKBE-NEXT: trn1 v2.4h, v0.4h, v1.4h
47+
; CHECKBE-NEXT: trn2 v0.4h, v0.4h, v1.4h
48+
; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h
49+
; CHECKBE-NEXT: rev64 v0.4h, v0.4h
50+
; CHECKBE-NEXT: ret
2151
%tmp1 = load <4 x i16>, <4 x i16>* %A
2252
%tmp2 = load <4 x i16>, <4 x i16>* %B
2353
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -26,12 +56,49 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
2656
ret <4 x i16> %tmp5
2757
}
2858

59+
define <8 x i8> @vtrni16_viabitcast(<4 x i16> *%A, <4 x i16> *%B) nounwind {
60+
; CHECKLE-LABEL: vtrni16_viabitcast:
61+
; CHECKLE: // %bb.0:
62+
; CHECKLE-NEXT: ldr d0, [x0]
63+
; CHECKLE-NEXT: ldr d1, [x1]
64+
; CHECKLE-NEXT: trn1 v0.4h, v0.4h, v1.4h
65+
; CHECKLE-NEXT: ret
66+
;
67+
; CHECKBE-LABEL: vtrni16_viabitcast:
68+
; CHECKBE: // %bb.0:
69+
; CHECKBE-NEXT: ld1 { v0.4h }, [x0]
70+
; CHECKBE-NEXT: ld1 { v1.4h }, [x1]
71+
; CHECKBE-NEXT: trn1 v0.4h, v0.4h, v1.4h
72+
; CHECKBE-NEXT: rev64 v0.4h, v0.4h
73+
; CHECKBE-NEXT: ret
74+
%l1 = load <4 x i16>, <4 x i16> *%A
75+
%l2 = load <4 x i16>, <4 x i16> *%B
76+
%b1 = bitcast <4 x i16> %l1 to <8 x i8>
77+
%b2 = bitcast <4 x i16> %l2 to <8 x i8>
78+
%tmp3 = shufflevector <8 x i8> %b1, <8 x i8> %b2, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
79+
ret <8 x i8> %tmp3
80+
}
81+
2982
; 2xi32 TRN is redundant with ZIP
3083
define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
31-
;CHECK-LABEL: vtrni32:
32-
;CHECK: zip1.2s
33-
;CHECK: zip2.2s
34-
;CHECK-NEXT: add.2s
84+
; CHECKLE-LABEL: vtrni32:
85+
; CHECKLE: // %bb.0:
86+
; CHECKLE-NEXT: ldr d0, [x0]
87+
; CHECKLE-NEXT: ldr d1, [x1]
88+
; CHECKLE-NEXT: zip1 v2.2s, v0.2s, v1.2s
89+
; CHECKLE-NEXT: zip2 v0.2s, v0.2s, v1.2s
90+
; CHECKLE-NEXT: add v0.2s, v2.2s, v0.2s
91+
; CHECKLE-NEXT: ret
92+
;
93+
; CHECKBE-LABEL: vtrni32:
94+
; CHECKBE: // %bb.0:
95+
; CHECKBE-NEXT: ld1 { v0.2s }, [x0]
96+
; CHECKBE-NEXT: ld1 { v1.2s }, [x1]
97+
; CHECKBE-NEXT: zip1 v2.2s, v0.2s, v1.2s
98+
; CHECKBE-NEXT: zip2 v0.2s, v0.2s, v1.2s
99+
; CHECKBE-NEXT: add v0.2s, v2.2s, v0.2s
100+
; CHECKBE-NEXT: rev64 v0.2s, v0.2s
101+
; CHECKBE-NEXT: ret
35102
%tmp1 = load <2 x i32>, <2 x i32>* %A
36103
%tmp2 = load <2 x i32>, <2 x i32>* %B
37104
%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
@@ -41,10 +108,24 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
41108
}
42109

43110
define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
44-
;CHECK-LABEL: vtrnf:
45-
;CHECK: zip1.2s
46-
;CHECK: zip2.2s
47-
;CHECK-NEXT: fadd.2s
111+
; CHECKLE-LABEL: vtrnf:
112+
; CHECKLE: // %bb.0:
113+
; CHECKLE-NEXT: ldr d0, [x0]
114+
; CHECKLE-NEXT: ldr d1, [x1]
115+
; CHECKLE-NEXT: zip1 v2.2s, v0.2s, v1.2s
116+
; CHECKLE-NEXT: zip2 v0.2s, v0.2s, v1.2s
117+
; CHECKLE-NEXT: fadd v0.2s, v2.2s, v0.2s
118+
; CHECKLE-NEXT: ret
119+
;
120+
; CHECKBE-LABEL: vtrnf:
121+
; CHECKBE: // %bb.0:
122+
; CHECKBE-NEXT: ld1 { v0.2s }, [x0]
123+
; CHECKBE-NEXT: ld1 { v1.2s }, [x1]
124+
; CHECKBE-NEXT: zip1 v2.2s, v0.2s, v1.2s
125+
; CHECKBE-NEXT: zip2 v0.2s, v0.2s, v1.2s
126+
; CHECKBE-NEXT: fadd v0.2s, v2.2s, v0.2s
127+
; CHECKBE-NEXT: rev64 v0.2s, v0.2s
128+
; CHECKBE-NEXT: ret
48129
%tmp1 = load <2 x float>, <2 x float>* %A
49130
%tmp2 = load <2 x float>, <2 x float>* %B
50131
%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
@@ -54,10 +135,25 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
54135
}
55136

56137
define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
57-
;CHECK-LABEL: vtrnQi8:
58-
;CHECK: trn1.16b
59-
;CHECK: trn2.16b
60-
;CHECK-NEXT: add.16b
138+
; CHECKLE-LABEL: vtrnQi8:
139+
; CHECKLE: // %bb.0:
140+
; CHECKLE-NEXT: ldr q0, [x0]
141+
; CHECKLE-NEXT: ldr q1, [x1]
142+
; CHECKLE-NEXT: trn1 v2.16b, v0.16b, v1.16b
143+
; CHECKLE-NEXT: trn2 v0.16b, v0.16b, v1.16b
144+
; CHECKLE-NEXT: add v0.16b, v2.16b, v0.16b
145+
; CHECKLE-NEXT: ret
146+
;
147+
; CHECKBE-LABEL: vtrnQi8:
148+
; CHECKBE: // %bb.0:
149+
; CHECKBE-NEXT: ld1 { v0.16b }, [x0]
150+
; CHECKBE-NEXT: ld1 { v1.16b }, [x1]
151+
; CHECKBE-NEXT: trn1 v2.16b, v0.16b, v1.16b
152+
; CHECKBE-NEXT: trn2 v0.16b, v0.16b, v1.16b
153+
; CHECKBE-NEXT: add v0.16b, v2.16b, v0.16b
154+
; CHECKBE-NEXT: rev64 v0.16b, v0.16b
155+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
156+
; CHECKBE-NEXT: ret
61157
%tmp1 = load <16 x i8>, <16 x i8>* %A
62158
%tmp2 = load <16 x i8>, <16 x i8>* %B
63159
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -67,10 +163,25 @@ define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
67163
}
68164

69165
define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
70-
;CHECK-LABEL: vtrnQi16:
71-
;CHECK: trn1.8h
72-
;CHECK: trn2.8h
73-
;CHECK-NEXT: add.8h
166+
; CHECKLE-LABEL: vtrnQi16:
167+
; CHECKLE: // %bb.0:
168+
; CHECKLE-NEXT: ldr q0, [x0]
169+
; CHECKLE-NEXT: ldr q1, [x1]
170+
; CHECKLE-NEXT: trn1 v2.8h, v0.8h, v1.8h
171+
; CHECKLE-NEXT: trn2 v0.8h, v0.8h, v1.8h
172+
; CHECKLE-NEXT: add v0.8h, v2.8h, v0.8h
173+
; CHECKLE-NEXT: ret
174+
;
175+
; CHECKBE-LABEL: vtrnQi16:
176+
; CHECKBE: // %bb.0:
177+
; CHECKBE-NEXT: ld1 { v0.8h }, [x0]
178+
; CHECKBE-NEXT: ld1 { v1.8h }, [x1]
179+
; CHECKBE-NEXT: trn1 v2.8h, v0.8h, v1.8h
180+
; CHECKBE-NEXT: trn2 v0.8h, v0.8h, v1.8h
181+
; CHECKBE-NEXT: add v0.8h, v2.8h, v0.8h
182+
; CHECKBE-NEXT: rev64 v0.8h, v0.8h
183+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
184+
; CHECKBE-NEXT: ret
74185
%tmp1 = load <8 x i16>, <8 x i16>* %A
75186
%tmp2 = load <8 x i16>, <8 x i16>* %B
76187
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -80,10 +191,25 @@ define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
80191
}
81192

82193
define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
83-
;CHECK-LABEL: vtrnQi32:
84-
;CHECK: trn1.4s
85-
;CHECK: trn2.4s
86-
;CHECK-NEXT: add.4s
194+
; CHECKLE-LABEL: vtrnQi32:
195+
; CHECKLE: // %bb.0:
196+
; CHECKLE-NEXT: ldr q0, [x0]
197+
; CHECKLE-NEXT: ldr q1, [x1]
198+
; CHECKLE-NEXT: trn1 v2.4s, v0.4s, v1.4s
199+
; CHECKLE-NEXT: trn2 v0.4s, v0.4s, v1.4s
200+
; CHECKLE-NEXT: add v0.4s, v2.4s, v0.4s
201+
; CHECKLE-NEXT: ret
202+
;
203+
; CHECKBE-LABEL: vtrnQi32:
204+
; CHECKBE: // %bb.0:
205+
; CHECKBE-NEXT: ld1 { v0.4s }, [x0]
206+
; CHECKBE-NEXT: ld1 { v1.4s }, [x1]
207+
; CHECKBE-NEXT: trn1 v2.4s, v0.4s, v1.4s
208+
; CHECKBE-NEXT: trn2 v0.4s, v0.4s, v1.4s
209+
; CHECKBE-NEXT: add v0.4s, v2.4s, v0.4s
210+
; CHECKBE-NEXT: rev64 v0.4s, v0.4s
211+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
212+
; CHECKBE-NEXT: ret
87213
%tmp1 = load <4 x i32>, <4 x i32>* %A
88214
%tmp2 = load <4 x i32>, <4 x i32>* %B
89215
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -93,10 +219,25 @@ define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
93219
}
94220

95221
define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
96-
;CHECK-LABEL: vtrnQf:
97-
;CHECK: trn1.4s
98-
;CHECK: trn2.4s
99-
;CHECK-NEXT: fadd.4s
222+
; CHECKLE-LABEL: vtrnQf:
223+
; CHECKLE: // %bb.0:
224+
; CHECKLE-NEXT: ldr q0, [x0]
225+
; CHECKLE-NEXT: ldr q1, [x1]
226+
; CHECKLE-NEXT: trn1 v2.4s, v0.4s, v1.4s
227+
; CHECKLE-NEXT: trn2 v0.4s, v0.4s, v1.4s
228+
; CHECKLE-NEXT: fadd v0.4s, v2.4s, v0.4s
229+
; CHECKLE-NEXT: ret
230+
;
231+
; CHECKBE-LABEL: vtrnQf:
232+
; CHECKBE: // %bb.0:
233+
; CHECKBE-NEXT: ld1 { v0.4s }, [x0]
234+
; CHECKBE-NEXT: ld1 { v1.4s }, [x1]
235+
; CHECKBE-NEXT: trn1 v2.4s, v0.4s, v1.4s
236+
; CHECKBE-NEXT: trn2 v0.4s, v0.4s, v1.4s
237+
; CHECKBE-NEXT: fadd v0.4s, v2.4s, v0.4s
238+
; CHECKBE-NEXT: rev64 v0.4s, v0.4s
239+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
240+
; CHECKBE-NEXT: ret
100241
%tmp1 = load <4 x float>, <4 x float>* %A
101242
%tmp2 = load <4 x float>, <4 x float>* %B
102243
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -108,10 +249,24 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
108249
; Undef shuffle indices should not prevent matching to VTRN:
109250

110251
define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
111-
;CHECK-LABEL: vtrni8_undef:
112-
;CHECK: trn1.8b
113-
;CHECK: trn2.8b
114-
;CHECK-NEXT: add.8b
252+
; CHECKLE-LABEL: vtrni8_undef:
253+
; CHECKLE: // %bb.0:
254+
; CHECKLE-NEXT: ldr d0, [x0]
255+
; CHECKLE-NEXT: ldr d1, [x1]
256+
; CHECKLE-NEXT: trn1 v2.8b, v0.8b, v1.8b
257+
; CHECKLE-NEXT: trn2 v0.8b, v0.8b, v1.8b
258+
; CHECKLE-NEXT: add v0.8b, v2.8b, v0.8b
259+
; CHECKLE-NEXT: ret
260+
;
261+
; CHECKBE-LABEL: vtrni8_undef:
262+
; CHECKBE: // %bb.0:
263+
; CHECKBE-NEXT: ld1 { v0.8b }, [x0]
264+
; CHECKBE-NEXT: ld1 { v1.8b }, [x1]
265+
; CHECKBE-NEXT: trn1 v2.8b, v0.8b, v1.8b
266+
; CHECKBE-NEXT: trn2 v0.8b, v0.8b, v1.8b
267+
; CHECKBE-NEXT: add v0.8b, v2.8b, v0.8b
268+
; CHECKBE-NEXT: rev64 v0.8b, v0.8b
269+
; CHECKBE-NEXT: ret
115270
%tmp1 = load <8 x i8>, <8 x i8>* %A
116271
%tmp2 = load <8 x i8>, <8 x i8>* %B
117272
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
@@ -121,10 +276,25 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
121276
}
122277

123278
define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
124-
;CHECK-LABEL: vtrnQi16_undef:
125-
;CHECK: trn1.8h
126-
;CHECK: trn2.8h
127-
;CHECK-NEXT: add.8h
279+
; CHECKLE-LABEL: vtrnQi16_undef:
280+
; CHECKLE: // %bb.0:
281+
; CHECKLE-NEXT: ldr q0, [x0]
282+
; CHECKLE-NEXT: ldr q1, [x1]
283+
; CHECKLE-NEXT: trn1 v2.8h, v0.8h, v1.8h
284+
; CHECKLE-NEXT: trn2 v0.8h, v0.8h, v1.8h
285+
; CHECKLE-NEXT: add v0.8h, v2.8h, v0.8h
286+
; CHECKLE-NEXT: ret
287+
;
288+
; CHECKBE-LABEL: vtrnQi16_undef:
289+
; CHECKBE: // %bb.0:
290+
; CHECKBE-NEXT: ld1 { v0.8h }, [x0]
291+
; CHECKBE-NEXT: ld1 { v1.8h }, [x1]
292+
; CHECKBE-NEXT: trn1 v2.8h, v0.8h, v1.8h
293+
; CHECKBE-NEXT: trn2 v0.8h, v0.8h, v1.8h
294+
; CHECKBE-NEXT: add v0.8h, v2.8h, v0.8h
295+
; CHECKBE-NEXT: rev64 v0.8h, v0.8h
296+
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
297+
; CHECKBE-NEXT: ret
128298
%tmp1 = load <8 x i16>, <8 x i16>* %A
129299
%tmp2 = load <8 x i16>, <8 x i16>* %B
130300
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>

0 commit comments

Comments
 (0)