Skip to content

Commit 6550f28

Browse files
authored
[RISCV][VLOPT] Support vslide{up,down} (#146710)
For vslideup and vslidedown, vl controls the elements which are written just like other vector instructions. So unless I'm missing something it should be safe to reduce them. For vslidedown, the specification states that elements past vl may be read. We already reduce vslideup and vslidedown in RISCVVectorPeephole::tryToReduceVL where we just check for RISCVII::elementsDependOnVL. Eventually we should replace the whitelist with RISCVII::elementsDependOnVL once we have test coverage. I've also added an assert just to double check the instructions we currently support. This helps reduce vl toggles for fixed-order recurrences vectorized with EVL tail folding.
1 parent c6abab2 commit 6550f28

File tree

4 files changed

+131
-42
lines changed

4 files changed

+131
-42
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,12 @@ static bool isSupportedInstr(const MachineInstr &MI) {
10401040
case RISCV::VMSOF_M:
10411041
case RISCV::VIOTA_M:
10421042
case RISCV::VID_V:
1043+
// Vector Slide Instructions
1044+
case RISCV::VSLIDEUP_VX:
1045+
case RISCV::VSLIDEUP_VI:
1046+
case RISCV::VSLIDEDOWN_VX:
1047+
case RISCV::VSLIDEDOWN_VI:
1048+
// TODO: Handle v[f]slide1up, but not v[f]slide1down.
10431049
// Vector Single-Width Floating-Point Add/Subtract Instructions
10441050
case RISCV::VFADD_VF:
10451051
case RISCV::VFADD_VV:
@@ -1256,6 +1262,9 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
12561262
return false;
12571263
}
12581264

1265+
assert(!RISCVII::elementsDependOnVL(RISCV::getRVVMCOpcode(MI.getOpcode())) &&
1266+
"Instruction shouldn't be supported if elements depend on VL");
1267+
12591268
assert(MI.getOperand(0).isReg() &&
12601269
isVectorRegClass(MI.getOperand(0).getReg(), MRI) &&
12611270
"All supported instructions produce a vector register result");

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
2525
; RV32-NEXT: vslide1down.vx v10, v10, a1
2626
; RV32-NEXT: vslide1down.vx v10, v10, a4
2727
; RV32-NEXT: vslide1down.vx v10, v10, a2
28-
; RV32-NEXT: vslidedown.vi v10, v10, 2
2928
; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
29+
; RV32-NEXT: vslidedown.vi v10, v10, 2
3030
; RV32-NEXT: vand.vi v10, v10, 1
3131
; RV32-NEXT: vmsne.vi v0, v10, 0
3232
; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu
@@ -56,8 +56,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
5656
; RV64-NEXT: vslide1down.vx v10, v10, a1
5757
; RV64-NEXT: vslide1down.vx v10, v10, a4
5858
; RV64-NEXT: vslide1down.vx v10, v10, a2
59-
; RV64-NEXT: vslidedown.vi v10, v10, 2
6059
; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
60+
; RV64-NEXT: vslidedown.vi v10, v10, 2
6161
; RV64-NEXT: vand.vi v10, v10, 1
6262
; RV64-NEXT: vmsne.vi v0, v10, 0
6363
; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu
@@ -95,8 +95,8 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
9595
; RV32-NEXT: vslide1down.vx v10, v10, a1
9696
; RV32-NEXT: vslide1down.vx v10, v10, a4
9797
; RV32-NEXT: vslide1down.vx v10, v10, a2
98-
; RV32-NEXT: vslidedown.vi v10, v10, 2
9998
; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
99+
; RV32-NEXT: vslidedown.vi v10, v10, 2
100100
; RV32-NEXT: vand.vi v10, v10, 1
101101
; RV32-NEXT: vmsne.vi v0, v10, 0
102102
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -126,8 +126,8 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
126126
; RV64-NEXT: vslide1down.vx v10, v10, a1
127127
; RV64-NEXT: vslide1down.vx v10, v10, a4
128128
; RV64-NEXT: vslide1down.vx v10, v10, a2
129-
; RV64-NEXT: vslidedown.vi v10, v10, 2
130129
; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
130+
; RV64-NEXT: vslidedown.vi v10, v10, 2
131131
; RV64-NEXT: vand.vi v10, v10, 1
132132
; RV64-NEXT: vmsne.vi v0, v10, 0
133133
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -166,8 +166,8 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
166166
; RV32-NEXT: vslide1down.vx v10, v10, a0
167167
; RV32-NEXT: vslide1down.vx v10, v10, a3
168168
; RV32-NEXT: vslide1down.vx v10, v10, a1
169-
; RV32-NEXT: vslidedown.vi v10, v10, 2
170169
; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
170+
; RV32-NEXT: vslidedown.vi v10, v10, 2
171171
; RV32-NEXT: vand.vi v10, v10, 1
172172
; RV32-NEXT: vmsne.vi v0, v10, 0
173173
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -197,8 +197,8 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
197197
; RV64-NEXT: vslide1down.vx v10, v10, a0
198198
; RV64-NEXT: vslide1down.vx v10, v10, a3
199199
; RV64-NEXT: vslide1down.vx v10, v10, a1
200-
; RV64-NEXT: vslidedown.vi v10, v10, 2
201200
; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
201+
; RV64-NEXT: vslidedown.vi v10, v10, 2
202202
; RV64-NEXT: vand.vi v10, v10, 1
203203
; RV64-NEXT: vmsne.vi v0, v10, 0
204204
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -236,8 +236,8 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
236236
; RV32-NEXT: vslide1down.vx v10, v10, a1
237237
; RV32-NEXT: vslide1down.vx v10, v10, a4
238238
; RV32-NEXT: vslide1down.vx v10, v10, a2
239-
; RV32-NEXT: vslidedown.vi v10, v10, 2
240239
; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
240+
; RV32-NEXT: vslidedown.vi v10, v10, 2
241241
; RV32-NEXT: vand.vi v10, v10, 1
242242
; RV32-NEXT: vmsne.vi v0, v10, 0
243243
; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu
@@ -267,8 +267,8 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
267267
; RV64-NEXT: vslide1down.vx v10, v10, a1
268268
; RV64-NEXT: vslide1down.vx v10, v10, a4
269269
; RV64-NEXT: vslide1down.vx v10, v10, a2
270-
; RV64-NEXT: vslidedown.vi v10, v10, 2
271270
; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
271+
; RV64-NEXT: vslidedown.vi v10, v10, 2
272272
; RV64-NEXT: vand.vi v10, v10, 1
273273
; RV64-NEXT: vmsne.vi v0, v10, 0
274274
; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu
@@ -306,8 +306,8 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
306306
; RV32-NEXT: vslide1down.vx v10, v10, a0
307307
; RV32-NEXT: vslide1down.vx v10, v10, a3
308308
; RV32-NEXT: vslide1down.vx v10, v10, a1
309-
; RV32-NEXT: vslidedown.vi v10, v10, 2
310309
; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
310+
; RV32-NEXT: vslidedown.vi v10, v10, 2
311311
; RV32-NEXT: vand.vi v10, v10, 1
312312
; RV32-NEXT: vmsne.vi v0, v10, 0
313313
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -337,8 +337,8 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
337337
; RV64-NEXT: vslide1down.vx v10, v10, a0
338338
; RV64-NEXT: vslide1down.vx v10, v10, a3
339339
; RV64-NEXT: vslide1down.vx v10, v10, a1
340-
; RV64-NEXT: vslidedown.vi v10, v10, 2
341340
; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
341+
; RV64-NEXT: vslidedown.vi v10, v10, 2
342342
; RV64-NEXT: vand.vi v10, v10, 1
343343
; RV64-NEXT: vmsne.vi v0, v10, 0
344344
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -377,8 +377,8 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
377377
; RV32-NEXT: vslide1down.vx v10, v10, a0
378378
; RV32-NEXT: vslide1down.vx v10, v10, a3
379379
; RV32-NEXT: vslide1down.vx v10, v10, a1
380-
; RV32-NEXT: vslidedown.vi v10, v10, 2
381380
; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
381+
; RV32-NEXT: vslidedown.vi v10, v10, 2
382382
; RV32-NEXT: vand.vi v10, v10, 1
383383
; RV32-NEXT: vmsne.vi v0, v10, 0
384384
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
@@ -408,8 +408,8 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
408408
; RV64-NEXT: vslide1down.vx v10, v10, a0
409409
; RV64-NEXT: vslide1down.vx v10, v10, a3
410410
; RV64-NEXT: vslide1down.vx v10, v10, a1
411-
; RV64-NEXT: vslidedown.vi v10, v10, 2
412411
; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
412+
; RV64-NEXT: vslidedown.vi v10, v10, 2
413413
; RV64-NEXT: vand.vi v10, v10, 1
414414
; RV64-NEXT: vmsne.vi v0, v10, 0
415415
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3434,6 +3434,86 @@ define <vscale x 4 x i32> @vid.v(<vscale x 4 x i32> %c, iXLen %vl) {
34343434
ret <vscale x 4 x i32> %2
34353435
}
34363436

3437+
define <vscale x 4 x i32> @vslideup_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) {
3438+
; NOVLOPT-LABEL: vslideup_vx:
3439+
; NOVLOPT: # %bb.0:
3440+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
3441+
; NOVLOPT-NEXT: vslideup.vx v10, v8, a0
3442+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
3443+
; NOVLOPT-NEXT: vadd.vv v8, v10, v10
3444+
; NOVLOPT-NEXT: ret
3445+
;
3446+
; VLOPT-LABEL: vslideup_vx:
3447+
; VLOPT: # %bb.0:
3448+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
3449+
; VLOPT-NEXT: vslideup.vx v10, v8, a0
3450+
; VLOPT-NEXT: vadd.vv v8, v10, v10
3451+
; VLOPT-NEXT: ret
3452+
%1 = call <vscale x 4 x i32> @llvm.riscv.vslideup(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1, iXLen 3)
3453+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
3454+
ret <vscale x 4 x i32> %2
3455+
}
3456+
3457+
define <vscale x 4 x i32> @vslideup_vi(<vscale x 4 x i32> %a, iXLen %vl) {
3458+
; NOVLOPT-LABEL: vslideup_vi:
3459+
; NOVLOPT: # %bb.0:
3460+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
3461+
; NOVLOPT-NEXT: vslideup.vi v10, v8, 2
3462+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
3463+
; NOVLOPT-NEXT: vadd.vv v8, v10, v10
3464+
; NOVLOPT-NEXT: ret
3465+
;
3466+
; VLOPT-LABEL: vslideup_vi:
3467+
; VLOPT: # %bb.0:
3468+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
3469+
; VLOPT-NEXT: vslideup.vi v10, v8, 2
3470+
; VLOPT-NEXT: vadd.vv v8, v10, v10
3471+
; VLOPT-NEXT: ret
3472+
%1 = call <vscale x 4 x i32> @llvm.riscv.vslideup(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 2, iXLen -1, iXLen 3)
3473+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
3474+
ret <vscale x 4 x i32> %2
3475+
}
3476+
3477+
define <vscale x 4 x i32> @vslidedown_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) {
3478+
; NOVLOPT-LABEL: vslidedown_vx:
3479+
; NOVLOPT: # %bb.0:
3480+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
3481+
; NOVLOPT-NEXT: vslidedown.vx v8, v8, a0
3482+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
3483+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
3484+
; NOVLOPT-NEXT: ret
3485+
;
3486+
; VLOPT-LABEL: vslidedown_vx:
3487+
; VLOPT: # %bb.0:
3488+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
3489+
; VLOPT-NEXT: vslidedown.vx v8, v8, a0
3490+
; VLOPT-NEXT: vadd.vv v8, v8, v8
3491+
; VLOPT-NEXT: ret
3492+
%1 = call <vscale x 4 x i32> @llvm.riscv.vslidedown(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1, iXLen 3)
3493+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
3494+
ret <vscale x 4 x i32> %2
3495+
}
3496+
3497+
define <vscale x 4 x i32> @vslidedown_vi(<vscale x 4 x i32> %a, iXLen %vl) {
3498+
; NOVLOPT-LABEL: vslidedown_vi:
3499+
; NOVLOPT: # %bb.0:
3500+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
3501+
; NOVLOPT-NEXT: vslidedown.vi v8, v8, 2
3502+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
3503+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
3504+
; NOVLOPT-NEXT: ret
3505+
;
3506+
; VLOPT-LABEL: vslidedown_vi:
3507+
; VLOPT: # %bb.0:
3508+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
3509+
; VLOPT-NEXT: vslidedown.vi v8, v8, 2
3510+
; VLOPT-NEXT: vadd.vv v8, v8, v8
3511+
; VLOPT-NEXT: ret
3512+
%1 = call <vscale x 4 x i32> @llvm.riscv.vslidedown(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 2, iXLen -1, iXLen 3)
3513+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
3514+
ret <vscale x 4 x i32> %2
3515+
}
3516+
34373517
define <vscale x 4 x float> @vfadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
34383518
; NOVLOPT-LABEL: vfadd_vv:
34393519
; NOVLOPT: # %bb.0:

llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -536,37 +536,37 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
536536
; RV32: # %bb.0:
537537
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
538538
; RV32-NEXT: vmv1r.v v8, v0
539+
; RV32-NEXT: slli a2, a1, 1
539540
; RV32-NEXT: vmv.v.i v9, 0
540-
; RV32-NEXT: li a2, -1
541-
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
541+
; RV32-NEXT: li a1, -1
542+
; RV32-NEXT: vmerge.vim v10, v9, 1, v0
543+
; RV32-NEXT: vwaddu.vv v11, v10, v10
544+
; RV32-NEXT: vwmaccu.vx v11, a1, v10
545+
; RV32-NEXT: csrr a1, vlenb
546+
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
542547
; RV32-NEXT: vmv.v.i v10, 0
548+
; RV32-NEXT: srli a1, a1, 2
543549
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
544-
; RV32-NEXT: vmerge.vim v11, v9, 1, v0
545-
; RV32-NEXT: vwaddu.vv v12, v11, v11
546-
; RV32-NEXT: vwmaccu.vx v12, a2, v11
547-
; RV32-NEXT: csrr a2, vlenb
548-
; RV32-NEXT: srli a2, a2, 2
549-
; RV32-NEXT: vmsne.vi v0, v12, 0
550+
; RV32-NEXT: vmsne.vi v0, v11, 0
550551
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
551-
; RV32-NEXT: vslidedown.vx v11, v12, a2
552+
; RV32-NEXT: vslidedown.vx v11, v11, a1
553+
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
552554
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
553555
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
554556
; RV32-NEXT: vmsne.vi v0, v11, 0
555-
; RV32-NEXT: slli a3, a1, 1
556557
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
557-
; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
558-
; RV32-NEXT: vslideup.vx v10, v9, a2
559-
; RV32-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
558+
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
559+
; RV32-NEXT: vslideup.vx v10, v9, a1
560560
; RV32-NEXT: vmsne.vi v0, v10, 0
561561
; RV32-NEXT: vle32.v v10, (a0), v0.t
562562
; RV32-NEXT: li a1, 32
563-
; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma
563+
; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
564564
; RV32-NEXT: vnsrl.wx v13, v10, a1
565565
; RV32-NEXT: vmv.x.s a1, v10
566566
; RV32-NEXT: vnsrl.wi v12, v10, 0
567-
; RV32-NEXT: srli a3, a3, 1
567+
; RV32-NEXT: srli a2, a2, 1
568568
; RV32-NEXT: vmv1r.v v0, v8
569-
; RV32-NEXT: vsetvli zero, a3, e32, m1, ta, ma
569+
; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma
570570
; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t
571571
; RV32-NEXT: mv a0, a1
572572
; RV32-NEXT: ret
@@ -657,30 +657,30 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1>
657657
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
658658
; RV32-NEXT: vmv1r.v v9, v0
659659
; RV32-NEXT: vmv1r.v v0, v8
660+
; RV32-NEXT: slli a1, a1, 1
660661
; RV32-NEXT: vmv.v.i v8, 0
661662
; RV32-NEXT: li a2, -1
662-
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
663-
; RV32-NEXT: vmv.v.i v10, 0
664-
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
665-
; RV32-NEXT: vmerge.vim v11, v8, 1, v0
663+
; RV32-NEXT: vmerge.vim v10, v8, 1, v0
666664
; RV32-NEXT: vmv1r.v v0, v9
667665
; RV32-NEXT: vmerge.vim v9, v8, 1, v0
668-
; RV32-NEXT: vwaddu.vv v12, v9, v11
669-
; RV32-NEXT: vwmaccu.vx v12, a2, v11
666+
; RV32-NEXT: vwaddu.vv v11, v9, v10
667+
; RV32-NEXT: vwmaccu.vx v11, a2, v10
670668
; RV32-NEXT: csrr a2, vlenb
669+
; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
670+
; RV32-NEXT: vmv.v.i v9, 0
671671
; RV32-NEXT: srli a2, a2, 2
672-
; RV32-NEXT: vmsne.vi v0, v12, 0
673-
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
674-
; RV32-NEXT: vslidedown.vx v9, v12, a2
675-
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
676672
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
677-
; RV32-NEXT: vmsne.vi v0, v9, 0
678-
; RV32-NEXT: slli a1, a1, 1
679-
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
673+
; RV32-NEXT: vmsne.vi v0, v11, 0
680674
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
681-
; RV32-NEXT: vslideup.vx v10, v8, a2
675+
; RV32-NEXT: vslidedown.vx v10, v11, a2
682676
; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
677+
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
678+
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
683679
; RV32-NEXT: vmsne.vi v0, v10, 0
680+
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
681+
; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
682+
; RV32-NEXT: vslideup.vx v9, v8, a2
683+
; RV32-NEXT: vmsne.vi v0, v9, 0
684684
; RV32-NEXT: vle32.v v10, (a0), v0.t
685685
; RV32-NEXT: li a0, 32
686686
; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma

0 commit comments

Comments
 (0)