Skip to content

Commit c6c6723

Browse files
committed
[AArch64] More consistently use buildvector for zero and all-ones constants
The AArch64 backend will use legal BUILDVECTORs for zero vectors or all-ones vectors, so during selection tablegen patterns get rely on immAllZerosV and immAllOnesV pattern frags in patterns like vnot. It was not always consistent though, which this patch attempt to fix by recognizing where constant splat + insert vector element is used. The main outcome of this will be that full vector movi v0.2d, #0000000000000000 will be used as opposed to movi d0, #0, as per https://reviews.llvm.org/D53579. This helps simplify what tablegen will see, to make pattern matching simpler. Differential Revision: https://reviews.llvm.org/D144018
1 parent c21cceb commit c6c6723

10 files changed

+59
-55
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12440,12 +12440,16 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
1244012440
// for each lane.
1244112441
if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
1244212442
// Firstly, try to materialize the splat constant.
12443-
SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
12444-
Val = ConstantBuildVector(Vec, DAG);
12445-
if (!Val) {
12446-
// Otherwise, materialize the constant and splat it.
12447-
Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
12448-
DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
12443+
SDValue Val = DAG.getSplatBuildVector(VT, dl, ConstantValue);
12444+
unsigned BitSize = VT.getScalarSizeInBits();
12445+
APInt ConstantValueAPInt(1, 0);
12446+
if (auto *C = dyn_cast<ConstantSDNode>(ConstantValue))
12447+
ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize);
12448+
if (!isNullConstant(ConstantValue) && !ConstantValueAPInt.isAllOnes()) {
12449+
Val = ConstantBuildVector(Val, DAG);
12450+
if (!Val)
12451+
// Otherwise, materialize the constant and splat it.
12452+
Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
1244912453
}
1245012454

1245112455
// Now insert the non-constant lanes.

llvm/test/CodeGen/AArch64/build-one-lane.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
define <8 x i8> @v8i8z(i8 %t, i8 %s) nounwind {
88
; CHECK-LABEL: v8i8z:
99
; CHECK: // %bb.0:
10-
; CHECK-NEXT: movi d0, #0000000000000000
10+
; CHECK-NEXT: movi v0.2d, #0000000000000000
1111
; CHECK-NEXT: mov v0.b[7], w1
1212
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1313
; CHECK-NEXT: ret
@@ -28,7 +28,7 @@ define <16 x i8> @v16i8z(i8 %t, i8 %s) nounwind {
2828
define <4 x i16> @v4i16z(i16 %t, i16 %s) nounwind {
2929
; CHECK-LABEL: v4i16z:
3030
; CHECK: // %bb.0:
31-
; CHECK-NEXT: movi d0, #0000000000000000
31+
; CHECK-NEXT: movi v0.2d, #0000000000000000
3232
; CHECK-NEXT: mov v0.h[3], w1
3333
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
3434
; CHECK-NEXT: ret
@@ -49,7 +49,7 @@ define <8 x i16> @v8i16z(i16 %t, i16 %s) nounwind {
4949
define <2 x i32> @v2i32z(i32 %t, i32 %s) nounwind {
5050
; CHECK-LABEL: v2i32z:
5151
; CHECK: // %bb.0:
52-
; CHECK-NEXT: movi d0, #0000000000000000
52+
; CHECK-NEXT: movi v0.2d, #0000000000000000
5353
; CHECK-NEXT: mov v0.s[1], w1
5454
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
5555
; CHECK-NEXT: ret
@@ -117,7 +117,7 @@ define <2 x double> @v2f64z(double %t, double %s) nounwind {
117117
define <8 x i8> @v8i8m(i8 %t, i8 %s) nounwind {
118118
; CHECK-LABEL: v8i8m:
119119
; CHECK: // %bb.0:
120-
; CHECK-NEXT: movi d0, #0xffffffffffffffff
120+
; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
121121
; CHECK-NEXT: mov v0.b[7], w1
122122
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
123123
; CHECK-NEXT: ret
@@ -138,7 +138,7 @@ define <16 x i8> @v16i8m(i8 %t, i8 %s) nounwind {
138138
define <4 x i16> @v4i16m(i16 %t, i16 %s) nounwind {
139139
; CHECK-LABEL: v4i16m:
140140
; CHECK: // %bb.0:
141-
; CHECK-NEXT: movi d0, #0xffffffffffffffff
141+
; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
142142
; CHECK-NEXT: mov v0.h[3], w1
143143
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
144144
; CHECK-NEXT: ret
@@ -159,7 +159,7 @@ define <8 x i16> @v8i16m(i16 %t, i16 %s) nounwind {
159159
define <2 x i32> @v2i32m(i32 %t, i32 %s) nounwind {
160160
; CHECK-LABEL: v2i32m:
161161
; CHECK: // %bb.0:
162-
; CHECK-NEXT: movi d0, #0xffffffffffffffff
162+
; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
163163
; CHECK-NEXT: mov v0.s[1], w1
164164
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
165165
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/srem-vector-lkk.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -197,37 +197,37 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
197197
; CHECK: // %bb.0:
198198
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
199199
; CHECK-NEXT: smov w8, v0.h[1]
200-
; CHECK-NEXT: smov w9, v0.h[2]
201-
; CHECK-NEXT: mov w10, #30865
200+
; CHECK-NEXT: mov w9, #30865
201+
; CHECK-NEXT: movk w9, #51306, lsl #16
202+
; CHECK-NEXT: smov w10, v0.h[2]
202203
; CHECK-NEXT: mov w11, #17097
203-
; CHECK-NEXT: movk w10, #51306, lsl #16
204-
; CHECK-NEXT: movk w11, #45590, lsl #16
205204
; CHECK-NEXT: mov w12, #654
206-
; CHECK-NEXT: smull x10, w8, w10
207-
; CHECK-NEXT: smull x11, w9, w11
208-
; CHECK-NEXT: lsr x10, x10, #32
205+
; CHECK-NEXT: movk w11, #45590, lsl #16
206+
; CHECK-NEXT: smull x9, w8, w9
207+
; CHECK-NEXT: smull x11, w10, w11
208+
; CHECK-NEXT: lsr x9, x9, #32
209+
; CHECK-NEXT: add w9, w9, w8
209210
; CHECK-NEXT: lsr x11, x11, #32
210-
; CHECK-NEXT: add w10, w10, w8
211-
; CHECK-NEXT: add w11, w11, w9
212-
; CHECK-NEXT: asr w13, w10, #9
213-
; CHECK-NEXT: add w10, w13, w10, lsr #31
214-
; CHECK-NEXT: asr w13, w11, #4
215-
; CHECK-NEXT: add w11, w13, w11, lsr #31
216-
; CHECK-NEXT: smov w13, v0.h[3]
217-
; CHECK-NEXT: msub w8, w10, w12, w8
218-
; CHECK-NEXT: movi d0, #0000000000000000
219-
; CHECK-NEXT: mov w12, #47143
220-
; CHECK-NEXT: mov w10, #23
221-
; CHECK-NEXT: movk w12, #24749, lsl #16
222-
; CHECK-NEXT: msub w9, w11, w10, w9
223-
; CHECK-NEXT: smull x10, w13, w12
211+
; CHECK-NEXT: asr w13, w9, #9
212+
; CHECK-NEXT: add w11, w11, w10
213+
; CHECK-NEXT: add w9, w13, w9, lsr #31
214+
; CHECK-NEXT: mov w13, #23
215+
; CHECK-NEXT: msub w8, w9, w12, w8
216+
; CHECK-NEXT: asr w9, w11, #4
217+
; CHECK-NEXT: smov w12, v0.h[3]
218+
; CHECK-NEXT: add w9, w9, w11, lsr #31
219+
; CHECK-NEXT: movi v0.2d, #0000000000000000
220+
; CHECK-NEXT: mov w11, #47143
221+
; CHECK-NEXT: movk w11, #24749, lsl #16
222+
; CHECK-NEXT: msub w9, w9, w13, w10
223+
; CHECK-NEXT: smull x10, w12, w11
224224
; CHECK-NEXT: mov v0.h[1], w8
225225
; CHECK-NEXT: lsr x8, x10, #63
226226
; CHECK-NEXT: asr x10, x10, #43
227227
; CHECK-NEXT: add w8, w10, w8
228228
; CHECK-NEXT: mov w10, #5423
229229
; CHECK-NEXT: mov v0.h[2], w9
230-
; CHECK-NEXT: msub w8, w8, w10, w13
230+
; CHECK-NEXT: msub w8, w8, w10, w12
231231
; CHECK-NEXT: mov v0.h[3], w8
232232
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
233233
; CHECK-NEXT: ret
@@ -240,25 +240,25 @@ define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) {
240240
; CHECK-LABEL: dont_fold_srem_i16_smax:
241241
; CHECK: // %bb.0:
242242
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
243-
; CHECK-NEXT: smov w8, v0.h[2]
244-
; CHECK-NEXT: mov w9, #17097
245-
; CHECK-NEXT: movk w9, #45590, lsl #16
243+
; CHECK-NEXT: smov w9, v0.h[2]
244+
; CHECK-NEXT: mov w8, #17097
245+
; CHECK-NEXT: movk w8, #45590, lsl #16
246246
; CHECK-NEXT: smov w10, v0.h[1]
247247
; CHECK-NEXT: smov w12, v0.h[3]
248-
; CHECK-NEXT: movi d1, #0000000000000000
249248
; CHECK-NEXT: mov w11, #23
250-
; CHECK-NEXT: smull x9, w8, w9
251-
; CHECK-NEXT: lsr x9, x9, #32
252-
; CHECK-NEXT: add w9, w9, w8
253-
; CHECK-NEXT: asr w13, w9, #4
254-
; CHECK-NEXT: add w9, w13, w9, lsr #31
249+
; CHECK-NEXT: movi v1.2d, #0000000000000000
250+
; CHECK-NEXT: smull x8, w9, w8
251+
; CHECK-NEXT: lsr x8, x8, #32
252+
; CHECK-NEXT: add w8, w8, w9
253+
; CHECK-NEXT: asr w13, w8, #4
254+
; CHECK-NEXT: add w8, w13, w8, lsr #31
255255
; CHECK-NEXT: negs w13, w10
256256
; CHECK-NEXT: and w10, w10, #0x7fff
257257
; CHECK-NEXT: and w13, w13, #0x7fff
258258
; CHECK-NEXT: csneg w10, w10, w13, mi
259259
; CHECK-NEXT: mov w13, #47143
260260
; CHECK-NEXT: movk w13, #24749, lsl #16
261-
; CHECK-NEXT: msub w8, w9, w11, w8
261+
; CHECK-NEXT: msub w8, w8, w11, w9
262262
; CHECK-NEXT: smull x9, w12, w13
263263
; CHECK-NEXT: mov v1.h[1], w10
264264
; CHECK-NEXT: lsr x10, x9, #63

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,8 +572,8 @@ define void @masked_gather_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
572572
; CHECK-LABEL: masked_gather_v2f16:
573573
; CHECK: // %bb.0:
574574
; CHECK-NEXT: ldr s1, [x0]
575-
; CHECK-NEXT: movi d0, #0000000000000000
576575
; CHECK-NEXT: ptrue p0.d, vl4
576+
; CHECK-NEXT: movi v0.2d, #0000000000000000
577577
; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0
578578
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
579579
; CHECK-NEXT: fmov w8, s1

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define <2 x half> @masked_load_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
1313
; CHECK-LABEL: masked_load_v2f16:
1414
; CHECK: // %bb.0:
1515
; CHECK-NEXT: ldr s1, [x0]
16-
; CHECK-NEXT: movi d0, #0000000000000000
17-
; CHECK-NEXT: ldr s2, [x1]
1816
; CHECK-NEXT: ptrue p0.h, vl4
17+
; CHECK-NEXT: ldr s2, [x1]
18+
; CHECK-NEXT: movi v0.2d, #0000000000000000
1919
; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h
2020
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
2121
; CHECK-NEXT: fmov w8, s1

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,8 +534,8 @@ define void @masked_scatter_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
534534
; CHECK-LABEL: masked_scatter_v2f16:
535535
; CHECK: // %bb.0:
536536
; CHECK-NEXT: ldr s1, [x0]
537-
; CHECK-NEXT: movi d0, #0000000000000000
538537
; CHECK-NEXT: ptrue p0.d, vl4
538+
; CHECK-NEXT: movi v0.2d, #0000000000000000
539539
; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0
540540
; CHECK-NEXT: uunpklo z1.s, z1.h
541541
; CHECK-NEXT: sshll v2.4s, v2.4h, #0

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define void @masked_store_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
1313
; CHECK-LABEL: masked_store_v2f16:
1414
; CHECK: // %bb.0:
1515
; CHECK-NEXT: ldr s1, [x0]
16-
; CHECK-NEXT: movi d0, #0000000000000000
17-
; CHECK-NEXT: ldr s2, [x1]
1816
; CHECK-NEXT: ptrue p0.h, vl4
17+
; CHECK-NEXT: ldr s2, [x1]
18+
; CHECK-NEXT: movi v0.2d, #0000000000000000
1919
; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h
2020
; CHECK-NEXT: sshll v2.4s, v2.4h, #0
2121
; CHECK-NEXT: fmov w8, s2

llvm/test/CodeGen/AArch64/urem-vector-lkk.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,12 +158,12 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
158158
; CHECK-NEXT: mov w11, #25645
159159
; CHECK-NEXT: mov w12, #654
160160
; CHECK-NEXT: movk w11, #2849, lsl #16
161-
; CHECK-NEXT: movi d1, #0000000000000000
162-
; CHECK-NEXT: umull x9, w8, w9
163161
; CHECK-NEXT: mov w13, #5560
164-
; CHECK-NEXT: umull x11, w10, w11
162+
; CHECK-NEXT: umull x9, w8, w9
165163
; CHECK-NEXT: movk w13, #12, lsl #16
164+
; CHECK-NEXT: umull x11, w10, w11
166165
; CHECK-NEXT: lsr x9, x9, #32
166+
; CHECK-NEXT: movi v1.2d, #0000000000000000
167167
; CHECK-NEXT: lsr x11, x11, #32
168168
; CHECK-NEXT: msub w8, w9, w12, w8
169169
; CHECK-NEXT: umov w9, v0.h[3]

llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind {
8585
define i8 @test_v3i8(<3 x i8> %a) nounwind {
8686
; CHECK-LABEL: test_v3i8:
8787
; CHECK: // %bb.0:
88-
; CHECK-NEXT: movi d0, #0000000000000000
88+
; CHECK-NEXT: movi v0.2d, #0000000000000000
8989
; CHECK-NEXT: mov v0.h[0], w0
9090
; CHECK-NEXT: mov v0.h[1], w1
9191
; CHECK-NEXT: mov v0.h[2], w2

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ define i64 @test_v2i64(<2 x i64> %a) nounwind {
9999
define i8 @test_v3i8(<3 x i8> %a) nounwind {
100100
; CHECK-LABEL: test_v3i8:
101101
; CHECK: // %bb.0:
102-
; CHECK-NEXT: movi d0, #0000000000000000
102+
; CHECK-NEXT: movi v0.2d, #0000000000000000
103103
; CHECK-NEXT: mov v0.h[0], w0
104104
; CHECK-NEXT: mov v0.h[1], w1
105105
; CHECK-NEXT: mov v0.h[2], w2

0 commit comments

Comments
 (0)