Skip to content

Commit b9024a1

Browse files
author
Simon Moll
committed
Merge commit '5527139302d9b0416b9fa7f1b84760d6acacda12' into merge/vp-rvv-select
2 parents a3b812b + 5527139 commit b9024a1

File tree

4 files changed

+204
-0
lines changed

4 files changed

+204
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ class VectorLegalizer {
133133
/// Implement vselect in terms of XOR, AND, OR when blend is not
134134
/// supported by the target.
135135
SDValue ExpandVSELECT(SDNode *Node);
136+
SDValue ExpandVP_SELECT(SDNode *Node);
136137
SDValue ExpandSELECT(SDNode *Node);
137138
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
138139
SDValue ExpandStore(SDNode *N);
@@ -727,6 +728,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
727728
case ISD::VSELECT:
728729
Results.push_back(ExpandVSELECT(Node));
729730
return;
731+
case ISD::VP_SELECT:
732+
Results.push_back(ExpandVP_SELECT(Node));
733+
return;
730734
case ISD::SELECT:
731735
Results.push_back(ExpandSELECT(Node));
732736
return;
@@ -1227,6 +1231,37 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
12271231
return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
12281232
}
12291233

1234+
SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1235+
// Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1236+
// do not support it natively.
1237+
SDLoc DL(Node);
1238+
1239+
SDValue Mask = Node->getOperand(0);
1240+
SDValue Op1 = Node->getOperand(1);
1241+
SDValue Op2 = Node->getOperand(2);
1242+
SDValue EVL = Node->getOperand(3);
1243+
1244+
EVT VT = Mask.getValueType();
1245+
1246+
// If we can't even use the basic vector operations of
1247+
// VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1248+
if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
1249+
TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
1250+
TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
1251+
return DAG.UnrollVectorOp(Node);
1252+
1253+
// This operation also isn't safe when the operands aren't also booleans.
1254+
if (Op1.getValueType().getVectorElementType() != MVT::i1)
1255+
return DAG.UnrollVectorOp(Node);
1256+
1257+
SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1258+
SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL);
1259+
1260+
Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL);
1261+
Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL);
1262+
return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL);
1263+
}
1264+
12301265
void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
12311266
SmallVectorImpl<SDValue> &Results) {
12321267
// Attempt to expand using TargetLowering.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
561561
setOperationAction(ISD::SELECT, VT, Custom);
562562
setOperationAction(ISD::SELECT_CC, VT, Expand);
563563
setOperationAction(ISD::VSELECT, VT, Expand);
564+
setOperationAction(ISD::VP_SELECT, VT, Expand);
564565

565566
setOperationAction(ISD::VP_AND, VT, Custom);
566567
setOperationAction(ISD::VP_OR, VT, Custom);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,76 @@
44
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
55
; RUN: -verify-machineinstrs < %s | FileCheck %s
66

7+
declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32)
8+
9+
define <1 x i1> @select_v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 zeroext %evl) {
10+
; CHECK-LABEL: select_v1i1:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
13+
; CHECK-NEXT: vmandn.mm v9, v9, v0
14+
; CHECK-NEXT: vmand.mm v8, v8, v0
15+
; CHECK-NEXT: vmor.mm v0, v8, v9
16+
; CHECK-NEXT: ret
17+
%v = call <1 x i1> @llvm.vp.select.v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 %evl)
18+
ret <1 x i1> %v
19+
}
20+
21+
declare <2 x i1> @llvm.vp.select.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32)
22+
23+
define <2 x i1> @select_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 zeroext %evl) {
24+
; CHECK-LABEL: select_v2i1:
25+
; CHECK: # %bb.0:
26+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
27+
; CHECK-NEXT: vmandn.mm v9, v9, v0
28+
; CHECK-NEXT: vmand.mm v8, v8, v0
29+
; CHECK-NEXT: vmor.mm v0, v8, v9
30+
; CHECK-NEXT: ret
31+
%v = call <2 x i1> @llvm.vp.select.v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 %evl)
32+
ret <2 x i1> %v
33+
}
34+
35+
declare <4 x i1> @llvm.vp.select.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32)
36+
37+
define <4 x i1> @select_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 zeroext %evl) {
38+
; CHECK-LABEL: select_v4i1:
39+
; CHECK: # %bb.0:
40+
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
41+
; CHECK-NEXT: vmandn.mm v9, v9, v0
42+
; CHECK-NEXT: vmand.mm v8, v8, v0
43+
; CHECK-NEXT: vmor.mm v0, v8, v9
44+
; CHECK-NEXT: ret
45+
%v = call <4 x i1> @llvm.vp.select.v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 %evl)
46+
ret <4 x i1> %v
47+
}
48+
49+
declare <8 x i1> @llvm.vp.select.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32)
50+
51+
define <8 x i1> @select_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 zeroext %evl) {
52+
; CHECK-LABEL: select_v8i1:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
55+
; CHECK-NEXT: vmandn.mm v9, v9, v0
56+
; CHECK-NEXT: vmand.mm v8, v8, v0
57+
; CHECK-NEXT: vmor.mm v0, v8, v9
58+
; CHECK-NEXT: ret
59+
%v = call <8 x i1> @llvm.vp.select.v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 %evl)
60+
ret <8 x i1> %v
61+
}
62+
63+
declare <16 x i1> @llvm.vp.select.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32)
64+
65+
define <16 x i1> @select_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 zeroext %evl) {
66+
; CHECK-LABEL: select_v16i1:
67+
; CHECK: # %bb.0:
68+
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
69+
; CHECK-NEXT: vmandn.mm v9, v9, v0
70+
; CHECK-NEXT: vmand.mm v8, v8, v0
71+
; CHECK-NEXT: vmor.mm v0, v8, v9
72+
; CHECK-NEXT: ret
73+
%v = call <16 x i1> @llvm.vp.select.v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 %evl)
74+
ret <16 x i1> %v
75+
}
76+
777
declare <2 x i8> @llvm.vp.select.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
878

979
define <2 x i8> @select_v2i8(<2 x i1> %a, <2 x i8> %b, <2 x i8> %c, i32 zeroext %evl) {

llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,104 @@
44
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
55
; RUN: -verify-machineinstrs < %s | FileCheck %s
66

7+
declare <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
8+
9+
define <vscale x 1 x i1> @select_nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i1> %c, i32 zeroext %evl) {
10+
; CHECK-LABEL: select_nxv1i1:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
13+
; CHECK-NEXT: vmandn.mm v9, v9, v0
14+
; CHECK-NEXT: vmand.mm v8, v8, v0
15+
; CHECK-NEXT: vmor.mm v0, v8, v9
16+
; CHECK-NEXT: ret
17+
%v = call <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i1> %c, i32 %evl)
18+
ret <vscale x 1 x i1> %v
19+
}
20+
21+
declare <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x i1>, i32)
22+
23+
define <vscale x 2 x i1> @select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c, i32 zeroext %evl) {
24+
; CHECK-LABEL: select_nxv2i1:
25+
; CHECK: # %bb.0:
26+
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
27+
; CHECK-NEXT: vmandn.mm v9, v9, v0
28+
; CHECK-NEXT: vmand.mm v8, v8, v0
29+
; CHECK-NEXT: vmor.mm v0, v8, v9
30+
; CHECK-NEXT: ret
31+
%v = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c, i32 %evl)
32+
ret <vscale x 2 x i1> %v
33+
}
34+
35+
declare <vscale x 4 x i1> @llvm.vp.select.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x i1>, i32)
36+
37+
define <vscale x 4 x i1> @select_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, <vscale x 4 x i1> %c, i32 zeroext %evl) {
38+
; CHECK-LABEL: select_nxv4i1:
39+
; CHECK: # %bb.0:
40+
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
41+
; CHECK-NEXT: vmandn.mm v9, v9, v0
42+
; CHECK-NEXT: vmand.mm v8, v8, v0
43+
; CHECK-NEXT: vmor.mm v0, v8, v9
44+
; CHECK-NEXT: ret
45+
%v = call <vscale x 4 x i1> @llvm.vp.select.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, <vscale x 4 x i1> %c, i32 %evl)
46+
ret <vscale x 4 x i1> %v
47+
}
48+
49+
declare <vscale x 8 x i1> @llvm.vp.select.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i1>, i32)
50+
51+
define <vscale x 8 x i1> @select_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, <vscale x 8 x i1> %c, i32 zeroext %evl) {
52+
; CHECK-LABEL: select_nxv8i1:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
55+
; CHECK-NEXT: vmandn.mm v9, v9, v0
56+
; CHECK-NEXT: vmand.mm v8, v8, v0
57+
; CHECK-NEXT: vmor.mm v0, v8, v9
58+
; CHECK-NEXT: ret
59+
%v = call <vscale x 8 x i1> @llvm.vp.select.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, <vscale x 8 x i1> %c, i32 %evl)
60+
ret <vscale x 8 x i1> %v
61+
}
62+
63+
declare <vscale x 16 x i1> @llvm.vp.select.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32)
64+
65+
define <vscale x 16 x i1> @select_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c, i32 zeroext %evl) {
66+
; CHECK-LABEL: select_nxv16i1:
67+
; CHECK: # %bb.0:
68+
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
69+
; CHECK-NEXT: vmandn.mm v9, v9, v0
70+
; CHECK-NEXT: vmand.mm v8, v8, v0
71+
; CHECK-NEXT: vmor.mm v0, v8, v9
72+
; CHECK-NEXT: ret
73+
%v = call <vscale x 16 x i1> @llvm.vp.select.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c, i32 %evl)
74+
ret <vscale x 16 x i1> %v
75+
}
76+
77+
declare <vscale x 32 x i1> @llvm.vp.select.nxv32i1(<vscale x 32 x i1>, <vscale x 32 x i1>, <vscale x 32 x i1>, i32)
78+
79+
define <vscale x 32 x i1> @select_nxv32i1(<vscale x 32 x i1> %a, <vscale x 32 x i1> %b, <vscale x 32 x i1> %c, i32 zeroext %evl) {
80+
; CHECK-LABEL: select_nxv32i1:
81+
; CHECK: # %bb.0:
82+
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
83+
; CHECK-NEXT: vmandn.mm v9, v9, v0
84+
; CHECK-NEXT: vmand.mm v8, v8, v0
85+
; CHECK-NEXT: vmor.mm v0, v8, v9
86+
; CHECK-NEXT: ret
87+
%v = call <vscale x 32 x i1> @llvm.vp.select.nxv32i1(<vscale x 32 x i1> %a, <vscale x 32 x i1> %b, <vscale x 32 x i1> %c, i32 %evl)
88+
ret <vscale x 32 x i1> %v
89+
}
90+
91+
declare <vscale x 64 x i1> @llvm.vp.select.nxv64i1(<vscale x 64 x i1>, <vscale x 64 x i1>, <vscale x 64 x i1>, i32)
92+
93+
define <vscale x 64 x i1> @select_nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b, <vscale x 64 x i1> %c, i32 zeroext %evl) {
94+
; CHECK-LABEL: select_nxv64i1:
95+
; CHECK: # %bb.0:
96+
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
97+
; CHECK-NEXT: vmandn.mm v9, v9, v0
98+
; CHECK-NEXT: vmand.mm v8, v8, v0
99+
; CHECK-NEXT: vmor.mm v0, v8, v9
100+
; CHECK-NEXT: ret
101+
%v = call <vscale x 64 x i1> @llvm.vp.select.nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b, <vscale x 64 x i1> %c, i32 %evl)
102+
ret <vscale x 64 x i1> %v
103+
}
104+
7105
declare <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
8106

9107
define <vscale x 1 x i8> @select_nxv1i8(<vscale x 1 x i1> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, i32 zeroext %evl) {

0 commit comments

Comments
 (0)