Skip to content

Commit 042a7a5

Browse files
committed
[riscv] Use X0 for destination of VSETVLI instruction if result unused
If the GPR destination register of a VSETVLI instruction is unused, we can replace it with X0. This discards the result, and thus reduces register pressure. Since after the core insertion/lowering algorithm has run, many user written VSETVLIs will have their GPR result unused (as VTYPE/VLEN is now explicitly read instead), this kicks in for most tests which involve a vsetvli intrinsic for fixed length vectorization. (vscale vectorization generally uses the GPR result to know how far to e.g. advance pointers in a loop and these uses are not removed.) When inserting VSETVLIs to lower psuedos, we prefer the X0 form anyways. Differential Revision: https://reviews.llvm.org/D124961
1 parent c7a6b11 commit 042a7a5

File tree

7 files changed

+39
-23
lines changed

7 files changed

+39
-23
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,22 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
12181218
// predecessors.
12191219
for (MachineBasicBlock &MBB : MF)
12201220
emitVSETVLIs(MBB);
1221+
1222+
// Once we're fully done rewriting all the instructions, do a final pass
1223+
// through to check for VSETVLIs which write to an unused destination.
1224+
// For the non X0, X0 variant, we can replace the destination register
1225+
// with X0 to reduce register pressure. This is really a generic
1226+
// optimization which can be applied to any dead def (TODO: generalize).
1227+
for (MachineBasicBlock &MBB : MF) {
1228+
for (MachineInstr &MI : MBB) {
1229+
if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
1230+
MI.getOpcode() == RISCV::PseudoVSETIVLI) {
1231+
Register VRegDef = MI.getOperand(0).getReg();
1232+
if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
1233+
MI.getOperand(0).setReg(RISCV::X0);
1234+
}
1235+
}
1236+
}
12211237
}
12221238

12231239
BlockInfo.clear();

llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ declare i32 @llvm.riscv.vsetvlimax.opt.i32(i32, i32)
99
define void @test_vsetvli_e64mf8(i32 %avl) nounwind {
1010
; CHECK-LABEL: test_vsetvli_e64mf8:
1111
; CHECK: # %bb.0:
12-
; CHECK-NEXT: vsetvli a0, a0, e64, mf8, ta, mu
12+
; CHECK-NEXT: vsetvli zero, a0, e64, mf8, ta, mu
1313
; CHECK-NEXT: ret
1414
call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 3, i32 5)
1515
ret void
@@ -18,7 +18,7 @@ define void @test_vsetvli_e64mf8(i32 %avl) nounwind {
1818
define void @test_vsetvli_e8mf2_zero_avl() nounwind {
1919
; CHECK-LABEL: test_vsetvli_e8mf2_zero_avl:
2020
; CHECK: # %bb.0:
21-
; CHECK-NEXT: vsetivli a0, 0, e8, mf2, ta, mu
21+
; CHECK-NEXT: vsetivli zero, 0, e8, mf2, ta, mu
2222
; CHECK-NEXT: ret
2323
call i32 @llvm.riscv.vsetvli.i32(i32 0, i32 0, i32 7)
2424
ret void
@@ -101,7 +101,7 @@ declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>, <vsca
101101
define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
102102
; CHECK-LABEL: redundant_vsetvli:
103103
; CHECK: # %bb.0:
104-
; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
104+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
105105
; CHECK-NEXT: vle32.v v8, (a1)
106106
; CHECK-NEXT: ret
107107
%vl = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
@@ -117,7 +117,7 @@ define <vscale x 4 x i32> @repeated_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr)
117117
; CHECK-LABEL: repeated_vsetvli:
118118
; CHECK: # %bb.0:
119119
; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
120-
; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
120+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
121121
; CHECK-NEXT: vle32.v v8, (a1)
122122
; CHECK-NEXT: ret
123123
%vl0 = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)

llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ declare i64 @llvm.riscv.vsetvlimax.opt.i64(i64, i64)
99
define void @test_vsetvli_e8m1(i64 %avl) nounwind {
1010
; CHECK-LABEL: test_vsetvli_e8m1:
1111
; CHECK: # %bb.0:
12-
; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, mu
12+
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
1313
; CHECK-NEXT: ret
1414
call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 0, i64 0)
1515
ret void
@@ -18,7 +18,7 @@ define void @test_vsetvli_e8m1(i64 %avl) nounwind {
1818
define void @test_vsetvli_e16mf4(i64 %avl) nounwind {
1919
; CHECK-LABEL: test_vsetvli_e16mf4:
2020
; CHECK: # %bb.0:
21-
; CHECK-NEXT: vsetvli a0, a0, e16, mf4, ta, mu
21+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
2222
; CHECK-NEXT: ret
2323
call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 1, i64 6)
2424
ret void
@@ -27,7 +27,7 @@ define void @test_vsetvli_e16mf4(i64 %avl) nounwind {
2727
define void @test_vsetvli_e32mf8_zero_avl() nounwind {
2828
; CHECK-LABEL: test_vsetvli_e32mf8_zero_avl:
2929
; CHECK: # %bb.0:
30-
; CHECK-NEXT: vsetivli a0, 0, e16, mf4, ta, mu
30+
; CHECK-NEXT: vsetivli zero, 0, e16, mf4, ta, mu
3131
; CHECK-NEXT: ret
3232
call i64 @llvm.riscv.vsetvli.i64(i64 0, i64 1, i64 6)
3333
ret void
@@ -119,7 +119,7 @@ declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>, <vsca
119119
define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
120120
; CHECK-LABEL: redundant_vsetvli:
121121
; CHECK: # %bb.0:
122-
; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
122+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
123123
; CHECK-NEXT: vle32.v v8, (a1)
124124
; CHECK-NEXT: ret
125125
%vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
@@ -135,7 +135,7 @@ define <vscale x 4 x i32> @repeated_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr)
135135
; CHECK-LABEL: repeated_vsetvli:
136136
; CHECK: # %bb.0:
137137
; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
138-
; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, mu
138+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
139139
; CHECK-NEXT: vle32.v v8, (a1)
140140
; CHECK-NEXT: ret
141141
%vl0 = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ declare void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>*
2323
define <vscale x 1 x double> @test1(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
2424
; CHECK-LABEL: test1:
2525
; CHECK: # %bb.0: # %entry
26-
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
26+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
2727
; CHECK-NEXT: beqz a1, .LBB0_2
2828
; CHECK-NEXT: # %bb.1: # %if.then
2929
; CHECK-NEXT: vfadd.vv v8, v8, v9
@@ -54,7 +54,7 @@ if.end: ; preds = %if.else, %if.then
5454
define <vscale x 1 x double> @test2(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
5555
; CHECK-LABEL: test2:
5656
; CHECK: # %bb.0: # %entry
57-
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
57+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
5858
; CHECK-NEXT: beqz a1, .LBB1_2
5959
; CHECK-NEXT: # %bb.1: # %if.then
6060
; CHECK-NEXT: vfadd.vv v9, v8, v9
@@ -180,7 +180,7 @@ define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
180180
; CHECK-LABEL: test5:
181181
; CHECK: # %bb.0: # %entry
182182
; CHECK-NEXT: andi a2, a1, 1
183-
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
183+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
184184
; CHECK-NEXT: bnez a2, .LBB4_3
185185
; CHECK-NEXT: # %bb.1: # %if.else
186186
; CHECK-NEXT: vfsub.vv v9, v8, v9
@@ -244,7 +244,7 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
244244
; CHECK-NEXT: andi a1, a1, 2
245245
; CHECK-NEXT: beqz a1, .LBB5_4
246246
; CHECK-NEXT: .LBB5_2: # %if.then4
247-
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
247+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
248248
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
249249
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
250250
; CHECK-NEXT: vlse64.v v9, (a0), zero
@@ -261,7 +261,7 @@ define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
261261
; CHECK-NEXT: andi a1, a1, 2
262262
; CHECK-NEXT: bnez a1, .LBB5_2
263263
; CHECK-NEXT: .LBB5_4: # %if.else5
264-
; CHECK-NEXT: vsetvli a0, a0, e32, m1, ta, mu
264+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
265265
; CHECK-NEXT: lui a0, %hi(.LCPI5_2)
266266
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_2)
267267
; CHECK-NEXT: vlse32.v v9, (a0), zero

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ body: |
428428
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9
429429
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8
430430
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
431-
; CHECK-NEXT: [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
431+
; CHECK-NEXT: $x0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
432432
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0
433433
; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2
434434
; CHECK-NEXT: PseudoBR %bb.1

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ declare <vscale x 1 x i64> @llvm.riscv.vle.mask.nxv1i64(
1818
define <vscale x 1 x double> @test1(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
1919
; CHECK-LABEL: test1:
2020
; CHECK: # %bb.0: # %entry
21-
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
21+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
2222
; CHECK-NEXT: vfadd.vv v8, v8, v9
2323
; CHECK-NEXT: ret
2424
entry:
@@ -34,7 +34,7 @@ entry:
3434
define <vscale x 1 x double> @test2(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
3535
; CHECK-LABEL: test2:
3636
; CHECK: # %bb.0: # %entry
37-
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
37+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
3838
; CHECK-NEXT: vfadd.vv v8, v8, v9
3939
; CHECK-NEXT: ret
4040
entry:
@@ -50,7 +50,7 @@ entry:
5050
define <vscale x 1 x i64> @test3(i64 %avl, <vscale x 1 x i64> %a, <vscale x 1 x i64>* %b, <vscale x 1 x i1> %c) nounwind {
5151
; CHECK-LABEL: test3:
5252
; CHECK: # %bb.0: # %entry
53-
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
53+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
5454
; CHECK-NEXT: vle64.v v8, (a1), v0.t
5555
; CHECK-NEXT: ret
5656
entry:
@@ -67,7 +67,7 @@ entry:
6767
define <vscale x 1 x i64> @test4(i64 %avl, <vscale x 1 x i64> %a, <vscale x 1 x i64>* %b, <vscale x 1 x i1> %c) nounwind {
6868
; CHECK-LABEL: test4:
6969
; CHECK: # %bb.0: # %entry
70-
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
70+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
7171
; CHECK-NEXT: vle64.v v8, (a1), v0.t
7272
; CHECK-NEXT: ret
7373
entry:
@@ -85,7 +85,7 @@ entry:
8585
define <vscale x 1 x i1> @test5(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %avl) nounwind {
8686
; CHECK-LABEL: test5:
8787
; CHECK: # %bb.0: # %entry
88-
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, mu
88+
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
8989
; CHECK-NEXT: vmseq.vv v8, v8, v9
9090
; CHECK-NEXT: vmand.mm v0, v8, v0
9191
; CHECK-NEXT: ret
@@ -165,7 +165,7 @@ entry:
165165
define <vscale x 1 x i64> @test8(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i1> %mask) nounwind {
166166
; CHECK-LABEL: test8:
167167
; CHECK: # %bb.0: # %entry
168-
; CHECK-NEXT: vsetivli a1, 6, e64, m1, tu, mu
168+
; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, mu
169169
; CHECK-NEXT: vmv.s.x v8, a0
170170
; CHECK-NEXT: ret
171171
entry:
@@ -209,7 +209,7 @@ entry:
209209
define <vscale x 1 x double> @test11(<vscale x 1 x double> %a, double %b) nounwind {
210210
; CHECK-LABEL: test11:
211211
; CHECK: # %bb.0: # %entry
212-
; CHECK-NEXT: vsetivli a0, 6, e64, m1, tu, mu
212+
; CHECK-NEXT: vsetivli zero, 6, e64, m1, tu, mu
213213
; CHECK-NEXT: vfmv.s.f v8, fa0
214214
; CHECK-NEXT: ret
215215
entry:

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ body: |
349349
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
350350
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9
351351
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8
352-
; CHECK-NEXT: [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
352+
; CHECK-NEXT: $x0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
353353
; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype
354354
; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]]
355355
; CHECK-NEXT: PseudoRET implicit $v8

0 commit comments

Comments
 (0)