Skip to content

Commit f6ab1f0

Browse files
tclin914circYuan
andauthored
[RISCV] Support LLVM IR intrinsics for XAndesVBFHCvt (#145321)
This patch adds LLVM IR intrinsic support for XAndesVBFHCvt. The document for the intrinsics can be found at: https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-widening-convert-intrinsicsxandesvbfhcvt https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-narrowing-convert-intrinsicsxandesvbfhcvt Vector bf16 load/store intrisics is also enabled when +xandesvbfhcvt is specified. The corresponding LLVM IR intrisic testcase would be added in a follow-up patches. The clang part will be added in a later patch. Co-authored-by: Tony Chuan-Yue Yuan <yuan593@andestech.com>
1 parent f0d898f commit f6ab1f0

File tree

5 files changed

+224
-1
lines changed

5 files changed

+224
-1
lines changed

llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
let TargetPrefix = "riscv" in {
14+
// Andes Vector BFloat16 Conversion Extension
15+
def int_riscv_nds_vfwcvt_s_bf16 : RISCVConversionUnMasked;
16+
def int_riscv_nds_vfncvt_bf16_s : RISCVConversionUnMaskedRoundingMode;
17+
1418
// Andes Vector Packed FP16 Extension
1519
defm nds_vfpmadt : RISCVBinaryAAXRoundingMode;
1620
defm nds_vfpmadb : RISCVBinaryAAXRoundingMode;

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
215215
for (MVT VT : F16VecVTs)
216216
addRegClassForRVV(VT);
217217

218-
if (Subtarget.hasVInstructionsBF16Minimal())
218+
if (Subtarget.hasVInstructionsBF16Minimal() ||
219+
Subtarget.hasVendorXAndesVBFHCvt())
219220
for (MVT VT : BF16VecVTs)
220221
addRegClassForRVV(VT);
221222

llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,46 @@ class NDSRVInstVBFHCvt<bits<7> funct7, bits<5> vs1, string opcodestr>
384384
// Multiclass
385385
//===----------------------------------------------------------------------===//
386386

387+
multiclass VPseudoVWCVT_S_BF16 {
388+
defvar constraint = "@earlyclobber $rd";
389+
foreach m = MxListFW in {
390+
let VLMul = m.value, SEW=16 in
391+
def "_" # m.MX : VPseudoUnaryNoMask<m.wvrclass, m.vrclass, constraint>,
392+
SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, 16,
393+
forcePassthruRead=true>;
394+
}
395+
}
396+
397+
multiclass VPseudoVNCVT_BF16_S {
398+
defvar constraint = "@earlyclobber $rd";
399+
foreach m = MxListFW in {
400+
let VLMul = m.value, SEW=16 in
401+
def "_" # m.MX : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.wvrclass, constraint>,
402+
SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16,
403+
forcePassthruRead=true>;
404+
}
405+
}
406+
407+
multiclass VPatConversionS_BF16<string intrinsic, string instruction> {
408+
foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
409+
defvar fvti = fvtiToFWti.Vti;
410+
defvar fwti = fvtiToFWti.Wti;
411+
let Predicates = [HasVendorXAndesVBFHCvt] in
412+
def : VPatUnaryNoMask<intrinsic, instruction, "BF16", fwti.Vector, fvti.Vector,
413+
fvti.Log2SEW, fvti.LMul, fwti.RegClass, fvti.RegClass>;
414+
}
415+
}
416+
417+
multiclass VPatConversionBF16_S<string intrinsic, string instruction> {
418+
foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
419+
defvar fvti = fvtiToFWti.Vti;
420+
defvar fwti = fvtiToFWti.Wti;
421+
let Predicates = [HasVendorXAndesVBFHCvt] in
422+
def : VPatUnaryNoMaskRoundingMode<intrinsic, instruction, "S", fvti.Vector, fwti.Vector,
423+
fvti.Log2SEW, fvti.LMul, fvti.RegClass, fwti.RegClass>;
424+
}
425+
}
426+
387427
let fprclass = !cast<RegisterClass>("FPR32") in
388428
def SCALAR_F16_FPR32 : FPR_Info<16>;
389429

@@ -547,6 +587,14 @@ def : Sh2AddPat<NDS_LEA_W_ZE>;
547587
def : Sh3AddPat<NDS_LEA_D_ZE>;
548588
} // Predicates = [HasVendorXAndesPerf, IsRV64]
549589

590+
let Predicates = [HasVendorXAndesVBFHCvt] in {
591+
defm PseudoNDS_VFWCVT_S_BF16 : VPseudoVWCVT_S_BF16;
592+
defm PseudoNDS_VFNCVT_BF16_S : VPseudoVNCVT_BF16_S;
593+
} // Predicates = [HasVendorXAndesVBFHCvt]
594+
595+
defm : VPatConversionS_BF16<"int_riscv_nds_vfwcvt_s_bf16", "PseudoNDS_VFWCVT_S">;
596+
defm : VPatConversionBF16_S<"int_riscv_nds_vfncvt_bf16_s", "PseudoNDS_VFNCVT_BF16">;
597+
550598
let Predicates = [HasVendorXAndesVPackFPH],
551599
mayRaiseFPException = true in {
552600
defm PseudoNDS_VFPMADT : VPseudoVFPMAD_VF_RM;
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+xandesvbfhcvt \
3+
; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
4+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+xandesvbfhcvt \
5+
; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
6+
7+
define <vscale x 1 x bfloat> @intrinsic_vfncvt_bf16.s_nxv1bf16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
8+
; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv1bf16_nxv1f32:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
11+
; CHECK-NEXT: nds.vfncvt.bf16.s v9, v8
12+
; CHECK-NEXT: vmv1r.v v8, v9
13+
; CHECK-NEXT: ret
14+
entry:
15+
%a = call <vscale x 1 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv1bf16.nxv1f32(
16+
<vscale x 1 x bfloat> undef,
17+
<vscale x 1 x float> %0,
18+
iXLen 7, iXLen %1)
19+
20+
ret <vscale x 1 x bfloat> %a
21+
}
22+
23+
define <vscale x 2 x bfloat> @intrinsic_vfncvt_bf16.s_nxv2bf16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
24+
; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv2bf16_nxv2f32:
25+
; CHECK: # %bb.0: # %entry
26+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
27+
; CHECK-NEXT: nds.vfncvt.bf16.s v9, v8
28+
; CHECK-NEXT: vmv1r.v v8, v9
29+
; CHECK-NEXT: ret
30+
entry:
31+
%a = call <vscale x 2 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv2bf16.nxv2f32(
32+
<vscale x 2 x bfloat> undef,
33+
<vscale x 2 x float> %0,
34+
iXLen 7, iXLen %1)
35+
36+
ret <vscale x 2 x bfloat> %a
37+
}
38+
39+
define <vscale x 4 x bfloat> @intrinsic_vfncvt_bf16.s_nxv4bf16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
40+
; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv4bf16_nxv4f32:
41+
; CHECK: # %bb.0: # %entry
42+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
43+
; CHECK-NEXT: nds.vfncvt.bf16.s v10, v8
44+
; CHECK-NEXT: vmv.v.v v8, v10
45+
; CHECK-NEXT: ret
46+
entry:
47+
%a = call <vscale x 4 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv4bf16.nxv4f32(
48+
<vscale x 4 x bfloat> undef,
49+
<vscale x 4 x float> %0,
50+
iXLen 7, iXLen %1)
51+
52+
ret <vscale x 4 x bfloat> %a
53+
}
54+
55+
define <vscale x 8 x bfloat> @intrinsic_vfncvt_bf16.s_nxv8bf16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
56+
; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv8bf16_nxv8f32:
57+
; CHECK: # %bb.0: # %entry
58+
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
59+
; CHECK-NEXT: nds.vfncvt.bf16.s v12, v8
60+
; CHECK-NEXT: vmv.v.v v8, v12
61+
; CHECK-NEXT: ret
62+
entry:
63+
%a = call <vscale x 8 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv8bf16.nxv8f32(
64+
<vscale x 8 x bfloat> undef,
65+
<vscale x 8 x float> %0,
66+
iXLen 7, iXLen %1)
67+
68+
ret <vscale x 8 x bfloat> %a
69+
}
70+
71+
define <vscale x 16 x bfloat> @intrinsic_vfncvt_bf16.s_nxv16bf16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
72+
; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv16bf16_nxv16f32:
73+
; CHECK: # %bb.0: # %entry
74+
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
75+
; CHECK-NEXT: nds.vfncvt.bf16.s v16, v8
76+
; CHECK-NEXT: vmv.v.v v8, v16
77+
; CHECK-NEXT: ret
78+
entry:
79+
%a = call <vscale x 16 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv16bf16.nxv16f32(
80+
<vscale x 16 x bfloat> undef,
81+
<vscale x 16 x float> %0,
82+
iXLen 7, iXLen %1)
83+
84+
ret <vscale x 16 x bfloat> %a
85+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+xandesvbfhcvt \
3+
; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
4+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+xandesvbfhcvt \
5+
; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
6+
7+
define <vscale x 1 x float> @intrinsic_vfwcvt_s.bf16_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
8+
; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv1f32_nxv1bf16:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
11+
; CHECK-NEXT: nds.vfwcvt.s.bf16 v9, v8
12+
; CHECK-NEXT: vmv1r.v v8, v9
13+
; CHECK-NEXT: ret
14+
entry:
15+
%a = call <vscale x 1 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv1f32.nxv1bf16(
16+
<vscale x 1 x float> undef,
17+
<vscale x 1 x bfloat> %0,
18+
iXLen %1)
19+
20+
ret <vscale x 1 x float> %a
21+
}
22+
23+
define <vscale x 2 x float> @intrinsic_vfwcvt_s.bf16_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
24+
; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv2f32_nxv2bf16:
25+
; CHECK: # %bb.0: # %entry
26+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
27+
; CHECK-NEXT: nds.vfwcvt.s.bf16 v9, v8
28+
; CHECK-NEXT: vmv1r.v v8, v9
29+
; CHECK-NEXT: ret
30+
entry:
31+
%a = call <vscale x 2 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv2f32.nxv2bf16(
32+
<vscale x 2 x float> undef,
33+
<vscale x 2 x bfloat> %0,
34+
iXLen %1)
35+
36+
ret <vscale x 2 x float> %a
37+
}
38+
39+
define <vscale x 4 x float> @intrinsic_vfwcvt_s.bf16_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
40+
; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv4f32_nxv4bf16:
41+
; CHECK: # %bb.0: # %entry
42+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
43+
; CHECK-NEXT: vmv1r.v v10, v8
44+
; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v10
45+
; CHECK-NEXT: ret
46+
entry:
47+
%a = call <vscale x 4 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv4f32.nxv4bf16(
48+
<vscale x 4 x float> undef,
49+
<vscale x 4 x bfloat> %0,
50+
iXLen %1)
51+
52+
ret <vscale x 4 x float> %a
53+
}
54+
55+
define <vscale x 8 x float> @intrinsic_vfwcvt_s.bf16_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
56+
; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv8f32_nxv8bf16:
57+
; CHECK: # %bb.0: # %entry
58+
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
59+
; CHECK-NEXT: vmv2r.v v12, v8
60+
; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v12
61+
; CHECK-NEXT: ret
62+
entry:
63+
%a = call <vscale x 8 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv8f32.nxv8bf16(
64+
<vscale x 8 x float> undef,
65+
<vscale x 8 x bfloat> %0,
66+
iXLen %1)
67+
68+
ret <vscale x 8 x float> %a
69+
}
70+
71+
define <vscale x 16 x float> @intrinsic_vfwcvt_s.bf16_nxv16f32_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
72+
; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv16f32_nxv16bf16:
73+
; CHECK: # %bb.0: # %entry
74+
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
75+
; CHECK-NEXT: vmv4r.v v16, v8
76+
; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v16
77+
; CHECK-NEXT: ret
78+
entry:
79+
%a = call <vscale x 16 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv16f32.nxv16bf16(
80+
<vscale x 16 x float> undef,
81+
<vscale x 16 x bfloat> %0,
82+
iXLen %1)
83+
84+
ret <vscale x 16 x float> %a
85+
}

0 commit comments

Comments
 (0)