Skip to content

Commit 9702d37

Browse files
authored
[RISCV] Support scalable vector vp.reverse/splice with Zvfhmin/Zvfbfmin. (#145588)
1 parent 9e3bb5b commit 9702d37

File tree

3 files changed

+220
-2
lines changed

3 files changed

+220
-2
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
11561156
ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
11571157
ISD::VECTOR_COMPRESS},
11581158
VT, Custom);
1159+
setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1160+
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
11591161
MVT EltVT = VT.getVectorElementType();
11601162
if (isTypeLegal(EltVT))
11611163
setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,

llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll

Lines changed: 176 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
34

45
define <vscale x 1 x half> @test_vp_reverse_nxv1f16_masked(<vscale x 1 x half> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
56
; CHECK-LABEL: test_vp_reverse_nxv1f16_masked:
@@ -435,3 +436,177 @@ define <vscale x 32 x half> @test_vp_reverse_nxv32f16(<vscale x 32 x half> %src,
435436
%dst = call <vscale x 32 x half> @llvm.experimental.vp.reverse.nxv32f16(<vscale x 32 x half> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl)
436437
ret <vscale x 32 x half> %dst
437438
}
439+
440+
define <vscale x 1 x bfloat> @test_vp_reverse_nxv1bf16_masked(<vscale x 1 x bfloat> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
441+
; CHECK-LABEL: test_vp_reverse_nxv1bf16_masked:
442+
; CHECK: # %bb.0:
443+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
444+
; CHECK-NEXT: vid.v v9, v0.t
445+
; CHECK-NEXT: addi a0, a0, -1
446+
; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
447+
; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
448+
; CHECK-NEXT: vmv1r.v v8, v9
449+
; CHECK-NEXT: ret
450+
%dst = call <vscale x 1 x bfloat> @llvm.experimental.vp.reverse.nxv1bf16(<vscale x 1 x bfloat> %src, <vscale x 1 x i1> %mask, i32 %evl)
451+
ret <vscale x 1 x bfloat> %dst
452+
}
453+
454+
define <vscale x 1 x bfloat> @test_vp_reverse_nxv1bf16(<vscale x 1 x bfloat> %src, i32 zeroext %evl) {
455+
; CHECK-LABEL: test_vp_reverse_nxv1bf16:
456+
; CHECK: # %bb.0:
457+
; CHECK-NEXT: addi a1, a0, -1
458+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
459+
; CHECK-NEXT: vid.v v9
460+
; CHECK-NEXT: vrsub.vx v10, v9, a1
461+
; CHECK-NEXT: vrgather.vv v9, v8, v10
462+
; CHECK-NEXT: vmv1r.v v8, v9
463+
; CHECK-NEXT: ret
464+
465+
%dst = call <vscale x 1 x bfloat> @llvm.experimental.vp.reverse.nxv1bf16(<vscale x 1 x bfloat> %src, <vscale x 1 x i1> splat (i1 1), i32 %evl)
466+
ret <vscale x 1 x bfloat> %dst
467+
}
468+
469+
define <vscale x 2 x bfloat> @test_vp_reverse_nxv2bf16_masked(<vscale x 2 x bfloat> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
470+
; CHECK-LABEL: test_vp_reverse_nxv2bf16_masked:
471+
; CHECK: # %bb.0:
472+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
473+
; CHECK-NEXT: vid.v v9, v0.t
474+
; CHECK-NEXT: addi a0, a0, -1
475+
; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
476+
; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
477+
; CHECK-NEXT: vmv1r.v v8, v9
478+
; CHECK-NEXT: ret
479+
%dst = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> %src, <vscale x 2 x i1> %mask, i32 %evl)
480+
ret <vscale x 2 x bfloat> %dst
481+
}
482+
483+
define <vscale x 2 x bfloat> @test_vp_reverse_nxv2bf16(<vscale x 2 x bfloat> %src, i32 zeroext %evl) {
484+
; CHECK-LABEL: test_vp_reverse_nxv2bf16:
485+
; CHECK: # %bb.0:
486+
; CHECK-NEXT: addi a1, a0, -1
487+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
488+
; CHECK-NEXT: vid.v v9
489+
; CHECK-NEXT: vrsub.vx v10, v9, a1
490+
; CHECK-NEXT: vrgather.vv v9, v8, v10
491+
; CHECK-NEXT: vmv1r.v v8, v9
492+
; CHECK-NEXT: ret
493+
494+
%dst = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl)
495+
ret <vscale x 2 x bfloat> %dst
496+
}
497+
498+
define <vscale x 4 x bfloat> @test_vp_reverse_nxv4bf16_masked(<vscale x 4 x bfloat> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
499+
; CHECK-LABEL: test_vp_reverse_nxv4bf16_masked:
500+
; CHECK: # %bb.0:
501+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
502+
; CHECK-NEXT: vid.v v9, v0.t
503+
; CHECK-NEXT: addi a0, a0, -1
504+
; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
505+
; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
506+
; CHECK-NEXT: vmv.v.v v8, v9
507+
; CHECK-NEXT: ret
508+
%dst = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> %src, <vscale x 4 x i1> %mask, i32 %evl)
509+
ret <vscale x 4 x bfloat> %dst
510+
}
511+
512+
define <vscale x 4 x bfloat> @test_vp_reverse_nxv4bf16(<vscale x 4 x bfloat> %src, i32 zeroext %evl) {
513+
; CHECK-LABEL: test_vp_reverse_nxv4bf16:
514+
; CHECK: # %bb.0:
515+
; CHECK-NEXT: addi a1, a0, -1
516+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
517+
; CHECK-NEXT: vid.v v9
518+
; CHECK-NEXT: vrsub.vx v10, v9, a1
519+
; CHECK-NEXT: vrgather.vv v9, v8, v10
520+
; CHECK-NEXT: vmv.v.v v8, v9
521+
; CHECK-NEXT: ret
522+
523+
%dst = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl)
524+
ret <vscale x 4 x bfloat> %dst
525+
}
526+
527+
define <vscale x 8 x bfloat> @test_vp_reverse_nxv8bf16_masked(<vscale x 8 x bfloat> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
528+
; CHECK-LABEL: test_vp_reverse_nxv8bf16_masked:
529+
; CHECK: # %bb.0:
530+
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
531+
; CHECK-NEXT: vid.v v10, v0.t
532+
; CHECK-NEXT: addi a0, a0, -1
533+
; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
534+
; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
535+
; CHECK-NEXT: vmv.v.v v8, v10
536+
; CHECK-NEXT: ret
537+
%dst = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> %src, <vscale x 8 x i1> %mask, i32 %evl)
538+
ret <vscale x 8 x bfloat> %dst
539+
}
540+
541+
define <vscale x 8 x bfloat> @test_vp_reverse_nxv8bf16(<vscale x 8 x bfloat> %src, i32 zeroext %evl) {
542+
; CHECK-LABEL: test_vp_reverse_nxv8bf16:
543+
; CHECK: # %bb.0:
544+
; CHECK-NEXT: addi a1, a0, -1
545+
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
546+
; CHECK-NEXT: vid.v v10
547+
; CHECK-NEXT: vrsub.vx v12, v10, a1
548+
; CHECK-NEXT: vrgather.vv v10, v8, v12
549+
; CHECK-NEXT: vmv.v.v v8, v10
550+
; CHECK-NEXT: ret
551+
552+
%dst = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
553+
ret <vscale x 8 x bfloat> %dst
554+
}
555+
556+
define <vscale x 16 x bfloat> @test_vp_reverse_nxv16bf16_masked(<vscale x 16 x bfloat> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
557+
; CHECK-LABEL: test_vp_reverse_nxv16bf16_masked:
558+
; CHECK: # %bb.0:
559+
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
560+
; CHECK-NEXT: vid.v v12, v0.t
561+
; CHECK-NEXT: addi a0, a0, -1
562+
; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
563+
; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
564+
; CHECK-NEXT: vmv.v.v v8, v12
565+
; CHECK-NEXT: ret
566+
%dst = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> %src, <vscale x 16 x i1> %mask, i32 %evl)
567+
ret <vscale x 16 x bfloat> %dst
568+
}
569+
570+
define <vscale x 16 x bfloat> @test_vp_reverse_nxv16bf16(<vscale x 16 x bfloat> %src, i32 zeroext %evl) {
571+
; CHECK-LABEL: test_vp_reverse_nxv16bf16:
572+
; CHECK: # %bb.0:
573+
; CHECK-NEXT: addi a1, a0, -1
574+
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
575+
; CHECK-NEXT: vid.v v12
576+
; CHECK-NEXT: vrsub.vx v16, v12, a1
577+
; CHECK-NEXT: vrgather.vv v12, v8, v16
578+
; CHECK-NEXT: vmv.v.v v8, v12
579+
; CHECK-NEXT: ret
580+
581+
%dst = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl)
582+
ret <vscale x 16 x bfloat> %dst
583+
}
584+
585+
define <vscale x 32 x bfloat> @test_vp_reverse_nxv32bf16_masked(<vscale x 32 x bfloat> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) {
586+
; CHECK-LABEL: test_vp_reverse_nxv32bf16_masked:
587+
; CHECK: # %bb.0:
588+
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
589+
; CHECK-NEXT: vid.v v16, v0.t
590+
; CHECK-NEXT: addi a0, a0, -1
591+
; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
592+
; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
593+
; CHECK-NEXT: vmv.v.v v8, v16
594+
; CHECK-NEXT: ret
595+
%dst = call <vscale x 32 x bfloat> @llvm.experimental.vp.reverse.nxv32bf16(<vscale x 32 x bfloat> %src, <vscale x 32 x i1> %mask, i32 %evl)
596+
ret <vscale x 32 x bfloat> %dst
597+
}
598+
599+
define <vscale x 32 x bfloat> @test_vp_reverse_nxv32bf16(<vscale x 32 x bfloat> %src, i32 zeroext %evl) {
600+
; CHECK-LABEL: test_vp_reverse_nxv32bf16:
601+
; CHECK: # %bb.0:
602+
; CHECK-NEXT: addi a1, a0, -1
603+
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
604+
; CHECK-NEXT: vid.v v16
605+
; CHECK-NEXT: vrsub.vx v24, v16, a1
606+
; CHECK-NEXT: vrgather.vv v16, v8, v24
607+
; CHECK-NEXT: vmv.v.v v8, v16
608+
; CHECK-NEXT: ret
609+
610+
%dst = call <vscale x 32 x bfloat> @llvm.experimental.vp.reverse.nxv32bf16(<vscale x 32 x bfloat> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl)
611+
ret <vscale x 32 x bfloat> %dst
612+
}

llvm/test/CodeGen/RISCV/rvv/vp-splice.ll

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh -verify-machineinstrs \
2+
; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs \
3+
; RUN: < %s | FileCheck %s
4+
; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs \
35
; RUN: < %s | FileCheck %s
46

57
define <vscale x 2 x i64> @test_vp_splice_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
@@ -464,3 +466,42 @@ define <vscale x 2 x half> @test_vp_splice_nxv2f16_masked(<vscale x 2 x half> %v
464466
%v = call <vscale x 2 x half> @llvm.experimental.vp.splice.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, i32 5, <vscale x 2 x i1> %mask, i32 %evla, i32 %evlb)
465467
ret <vscale x 2 x half> %v
466468
}
469+
470+
define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
471+
; CHECK-LABEL: test_vp_splice_nxv2bf16:
472+
; CHECK: # %bb.0:
473+
; CHECK-NEXT: addi a0, a0, -5
474+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
475+
; CHECK-NEXT: vslidedown.vi v8, v8, 5
476+
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
477+
; CHECK-NEXT: vslideup.vx v8, v9, a0
478+
; CHECK-NEXT: ret
479+
%v = call <vscale x 2 x bfloat> @llvm.experimental.vp.splice.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 5, <vscale x 2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
480+
ret <vscale x 2 x bfloat> %v
481+
}
482+
483+
define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16_negative_offset(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
484+
; CHECK-LABEL: test_vp_splice_nxv2bf16_negative_offset:
485+
; CHECK: # %bb.0:
486+
; CHECK-NEXT: addi a0, a0, -5
487+
; CHECK-NEXT: vsetivli zero, 5, e16, mf2, ta, ma
488+
; CHECK-NEXT: vslidedown.vx v8, v8, a0
489+
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
490+
; CHECK-NEXT: vslideup.vi v8, v9, 5
491+
; CHECK-NEXT: ret
492+
%v = call <vscale x 2 x bfloat> @llvm.experimental.vp.splice.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 -5, <vscale x 2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
493+
ret <vscale x 2 x bfloat> %v
494+
}
495+
496+
define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16_masked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
497+
; CHECK-LABEL: test_vp_splice_nxv2bf16_masked:
498+
; CHECK: # %bb.0:
499+
; CHECK-NEXT: addi a0, a0, -5
500+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
501+
; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
502+
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
503+
; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
504+
; CHECK-NEXT: ret
505+
%v = call <vscale x 2 x bfloat> @llvm.experimental.vp.splice.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 5, <vscale x 2 x i1> %mask, i32 %evla, i32 %evlb)
506+
ret <vscale x 2 x bfloat> %v
507+
}

0 commit comments

Comments
 (0)