Skip to content

Commit 95bf5ac

Browse files
author
Simon Moll
committed
[VE] select|vp.merge|vp.select v256 isel and tests
Use the `VMRG` for all three operations for now. `vp_select` will be used in passthru patterns. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D117206
1 parent d97fb55 commit 95bf5ac

File tree

7 files changed

+475
-1
lines changed

7 files changed

+475
-1
lines changed

llvm/lib/Target/VE/VEISelLowering.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1720,7 +1720,7 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
17201720
case ISD::EXTRACT_VECTOR_ELT:
17211721
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
17221722

1723-
#define ADD_BINARY_VVP_OP(VVP_NAME, VP_NAME, ISD_NAME) case ISD::ISD_NAME:
1723+
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
17241724
#include "VVPNodes.def"
17251725
return lowerToVVP(Op, DAG);
17261726
}
@@ -2729,6 +2729,11 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
27292729
assert(LegalVecVT.isSimple());
27302730
return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
27312731
Op->getOperand(1), Mask, AVL);
2732+
} else if (VVPOpcode == VEISD::VVP_SELECT) {
2733+
auto Mask = Op->getOperand(0);
2734+
auto OnTrue = Op->getOperand(1);
2735+
auto OnFalse = Op->getOperand(2);
2736+
return DAG.getNode(VVPOpcode, DL, LegalVecVT, OnTrue, OnFalse, Mask, AVL);
27322737
}
27332738
llvm_unreachable("lowerToVVP called for unexpected SDNode.");
27342739
}

llvm/lib/Target/VE/VVPInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@ def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
3939
IsVLVT<4>
4040
]>;
4141

42+
// Select(OnTrue, OnFalse, SelMask, vl)
43+
def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge
44+
SDTCisVec<0>,
45+
SDTCisSameNumEltsAs<0, 3>,
46+
SDTCisSameAs<0, 1>,
47+
SDTCisSameAs<1, 2>,
48+
IsVLVT<4>
49+
]>;
50+
4251
// Binary operator commutative pattern.
4352
class vvp_commutative<SDNode RootOp> :
4453
PatFrags<
@@ -79,3 +88,5 @@ def c_vvp_fmul : vvp_commutative<vvp_fmul>;
7988
def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
8089

8190
// } Binary Operators
91+
92+
def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>;

llvm/lib/Target/VE/VVPInstrPatternsVec.td

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,35 @@ defm : Binary_rv_vv_ShortLong<vvp_fsub,
191191
defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
192192
f64, v256f64, "VFDIVD",
193193
f32, v256f32, "VFDIVS">;
194+
195+
multiclass Merge_mvv<
196+
SDPatternOperator OpNode,
197+
ValueType DataVT, ValueType MaskVT,
198+
string OpBaseName> {
199+
// Masked.
200+
def : Pat<(OpNode
201+
DataVT:$vtrue, DataVT:$vfalse,
202+
MaskVT:$vm,
203+
i32:$avl),
204+
(!cast<Instruction>(OpBaseName#"vvml_v")
205+
$vfalse, $vtrue, $vm, $avl, $vfalse)>;
206+
}
207+
208+
multiclass Merge_mvv_ShortLong<
209+
SDPatternOperator OpNode,
210+
ValueType LongDataVT, ValueType ShortDataVT,
211+
string OpBaseName> {
212+
defm : Merge_mvv<OpNode,
213+
LongDataVT, v256i1,
214+
OpBaseName>;
215+
defm : Merge_mvv<OpNode,
216+
ShortDataVT, v256i1,
217+
OpBaseName>;
218+
}
219+
220+
defm : Merge_mvv_ShortLong<vvp_select,
221+
v256f64,
222+
v256f32, "VMRG">;
223+
defm : Merge_mvv_ShortLong<vvp_select,
224+
v256i64,
225+
v256i32, "VMRG">;

llvm/lib/Target/VE/VVPNodes.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ ADD_BINARY_VVP_OP_COMPACT(FSUB)
5959
ADD_BINARY_VVP_OP_COMPACT(FMUL)
6060
ADD_BINARY_VVP_OP_COMPACT(FDIV)
6161

62+
// Shuffles.
63+
ADD_VVP_OP(VVP_SELECT,VSELECT)
64+
HANDLE_VP_TO_VVP(VP_SELECT, VVP_SELECT)
65+
HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT)
66+
6267
#undef ADD_BINARY_VVP_OP
6368
#undef ADD_BINARY_VVP_OP_COMPACT
6469
#undef ADD_VVP_OP
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
3+
4+
declare <256 x i32> @llvm.vec.select.v256i32(<256 x i1>, <256 x i32>, <256 x i32>, i32)
5+
6+
define fastcc <256 x i32> @test_vec_select_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m) {
7+
; CHECK-LABEL: test_vec_select_v256i32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: lea %s0, 256
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
12+
; CHECK-NEXT: lea %s16, 256
13+
; CHECK-NEXT: lvl %s16
14+
; CHECK-NEXT: vor %v0, (0)1, %v1
15+
; CHECK-NEXT: b.l.t (, %s10)
16+
%r0 = select <256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1
17+
ret <256 x i32> %r0
18+
}
19+
20+
define fastcc <256 x i32> @test_vec_select_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m) {
21+
; CHECK-LABEL: test_vec_select_v256i32_vr:
22+
; CHECK: # %bb.0:
23+
; CHECK-NEXT: and %s0, %s0, (32)0
24+
; CHECK-NEXT: lea %s1, 256
25+
; CHECK-NEXT: lvl %s1
26+
; CHECK-NEXT: vbrd %v1, %s0
27+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
28+
; CHECK-NEXT: lea %s16, 256
29+
; CHECK-NEXT: lvl %s16
30+
; CHECK-NEXT: vor %v0, (0)1, %v1
31+
; CHECK-NEXT: b.l.t (, %s10)
32+
%xins = insertelement <256 x i32> undef, i32 %s1, i32 0
33+
%i1 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
34+
%r0 = select <256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1
35+
ret <256 x i32> %r0
36+
}
37+
38+
declare <256 x float> @llvm.vec.select.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32)
39+
40+
define fastcc <256 x float> @test_vec_select_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m) {
41+
; CHECK-LABEL: test_vec_select_v256f32_vv:
42+
; CHECK: # %bb.0:
43+
; CHECK-NEXT: lea %s0, 256
44+
; CHECK-NEXT: lvl %s0
45+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
46+
; CHECK-NEXT: lea %s16, 256
47+
; CHECK-NEXT: lvl %s16
48+
; CHECK-NEXT: vor %v0, (0)1, %v1
49+
; CHECK-NEXT: b.l.t (, %s10)
50+
%r0 = select <256 x i1> %m, <256 x float> %i0, <256 x float> %i1
51+
ret <256 x float> %r0
52+
}
53+
54+
define fastcc <256 x float> @test_vec_select_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m) {
55+
; CHECK-LABEL: test_vec_select_v256f32_vr:
56+
; CHECK: # %bb.0:
57+
; CHECK-NEXT: lea %s1, 256
58+
; CHECK-NEXT: lvl %s1
59+
; CHECK-NEXT: vbrd %v1, %s0
60+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
61+
; CHECK-NEXT: lea %s16, 256
62+
; CHECK-NEXT: lvl %s16
63+
; CHECK-NEXT: vor %v0, (0)1, %v1
64+
; CHECK-NEXT: b.l.t (, %s10)
65+
%xins = insertelement <256 x float> undef, float %s1, i32 0
66+
%i1 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
67+
%r0 = select <256 x i1> %m, <256 x float> %i0, <256 x float> %i1
68+
ret <256 x float> %r0
69+
}
70+
71+
declare <256 x double> @llvm.vec.select.v256f64(<256 x i1>, <256 x double>, <256 x double>, i32)
72+
73+
define fastcc <256 x double> @test_vec_select_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m) {
74+
; CHECK-LABEL: test_vec_select_v256f64_vv:
75+
; CHECK: # %bb.0:
76+
; CHECK-NEXT: lea %s0, 256
77+
; CHECK-NEXT: lvl %s0
78+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
79+
; CHECK-NEXT: lea %s16, 256
80+
; CHECK-NEXT: lvl %s16
81+
; CHECK-NEXT: vor %v0, (0)1, %v1
82+
; CHECK-NEXT: b.l.t (, %s10)
83+
%r0 = select <256 x i1> %m, <256 x double> %i0, <256 x double> %i1
84+
ret <256 x double> %r0
85+
}
86+
87+
define fastcc <256 x double> @test_vec_select_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m) {
88+
; CHECK-LABEL: test_vec_select_v256f64_vr:
89+
; CHECK: # %bb.0:
90+
; CHECK-NEXT: lea %s1, 256
91+
; CHECK-NEXT: lvl %s1
92+
; CHECK-NEXT: vbrd %v1, %s0
93+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
94+
; CHECK-NEXT: lea %s16, 256
95+
; CHECK-NEXT: lvl %s16
96+
; CHECK-NEXT: vor %v0, (0)1, %v1
97+
; CHECK-NEXT: b.l.t (, %s10)
98+
%xins = insertelement <256 x double> undef, double %s1, i32 0
99+
%i1 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
100+
%r0 = select <256 x i1> %m, <256 x double> %i0, <256 x double> %i1
101+
ret <256 x double> %r0
102+
}
103+
104+
declare <256 x i64> @llvm.vec.select.v256i64(<256 x i1>, <256 x i64>, <256 x i64>, i32)
105+
106+
define fastcc <256 x i64> @test_vec_select_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m) {
107+
; CHECK-LABEL: test_vec_select_v256i64_vv:
108+
; CHECK: # %bb.0:
109+
; CHECK-NEXT: lea %s0, 256
110+
; CHECK-NEXT: lvl %s0
111+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
112+
; CHECK-NEXT: lea %s16, 256
113+
; CHECK-NEXT: lvl %s16
114+
; CHECK-NEXT: vor %v0, (0)1, %v1
115+
; CHECK-NEXT: b.l.t (, %s10)
116+
%r0 = select <256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1
117+
ret <256 x i64> %r0
118+
}
119+
120+
define fastcc <256 x i64> @test_vec_select_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m) {
121+
; CHECK-LABEL: test_vec_select_v256i64_vr:
122+
; CHECK: # %bb.0:
123+
; CHECK-NEXT: lea %s1, 256
124+
; CHECK-NEXT: lvl %s1
125+
; CHECK-NEXT: vbrd %v1, %s0
126+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
127+
; CHECK-NEXT: lea %s16, 256
128+
; CHECK-NEXT: lvl %s16
129+
; CHECK-NEXT: vor %v0, (0)1, %v1
130+
; CHECK-NEXT: b.l.t (, %s10)
131+
%xins = insertelement <256 x i64> undef, i64 %s1, i32 0
132+
%i1 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
133+
%r0 = select <256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1
134+
ret <256 x i64> %r0
135+
}
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
3+
4+
declare <256 x i32> @llvm.vp.merge.v256i32(<256 x i1>, <256 x i32>, <256 x i32>, i32)
5+
6+
define fastcc <256 x i32> @test_vp_merge_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %pivot) {
7+
; CHECK-LABEL: test_vp_merge_v256i32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
12+
; CHECK-NEXT: lea %s16, 256
13+
; CHECK-NEXT: lvl %s16
14+
; CHECK-NEXT: vor %v0, (0)1, %v1
15+
; CHECK-NEXT: b.l.t (, %s10)
16+
%r0 = call <256 x i32> @llvm.vp.merge.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot)
17+
ret <256 x i32> %r0
18+
}
19+
20+
define fastcc <256 x i32> @test_vp_merge_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %pivot) {
21+
; CHECK-LABEL: test_vp_merge_v256i32_vr:
22+
; CHECK: # %bb.0:
23+
; CHECK-NEXT: and %s1, %s1, (32)0
24+
; CHECK-NEXT: and %s0, %s0, (32)0
25+
; CHECK-NEXT: lea %s2, 256
26+
; CHECK-NEXT: lvl %s2
27+
; CHECK-NEXT: vbrd %v1, %s0
28+
; CHECK-NEXT: lvl %s1
29+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
30+
; CHECK-NEXT: lea %s16, 256
31+
; CHECK-NEXT: lvl %s16
32+
; CHECK-NEXT: vor %v0, (0)1, %v1
33+
; CHECK-NEXT: b.l.t (, %s10)
34+
%xins = insertelement <256 x i32> undef, i32 %s1, i32 0
35+
%i1 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
36+
%r0 = call <256 x i32> @llvm.vp.merge.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot)
37+
ret <256 x i32> %r0
38+
}
39+
40+
declare <256 x float> @llvm.vp.merge.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32)
41+
42+
define fastcc <256 x float> @test_vp_merge_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %pivot) {
43+
; CHECK-LABEL: test_vp_merge_v256f32_vv:
44+
; CHECK: # %bb.0:
45+
; CHECK-NEXT: and %s0, %s0, (32)0
46+
; CHECK-NEXT: lvl %s0
47+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
48+
; CHECK-NEXT: lea %s16, 256
49+
; CHECK-NEXT: lvl %s16
50+
; CHECK-NEXT: vor %v0, (0)1, %v1
51+
; CHECK-NEXT: b.l.t (, %s10)
52+
%r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot)
53+
ret <256 x float> %r0
54+
}
55+
56+
define fastcc <256 x float> @test_vp_merge_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %pivot) {
57+
; CHECK-LABEL: test_vp_merge_v256f32_vr:
58+
; CHECK: # %bb.0:
59+
; CHECK-NEXT: and %s1, %s1, (32)0
60+
; CHECK-NEXT: lea %s2, 256
61+
; CHECK-NEXT: lvl %s2
62+
; CHECK-NEXT: vbrd %v1, %s0
63+
; CHECK-NEXT: lvl %s1
64+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
65+
; CHECK-NEXT: lea %s16, 256
66+
; CHECK-NEXT: lvl %s16
67+
; CHECK-NEXT: vor %v0, (0)1, %v1
68+
; CHECK-NEXT: b.l.t (, %s10)
69+
%xins = insertelement <256 x float> undef, float %s1, i32 0
70+
%i1 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
71+
%r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot)
72+
ret <256 x float> %r0
73+
}
74+
75+
declare <256 x double> @llvm.vp.merge.v256f64(<256 x i1>, <256 x double>, <256 x double>, i32)
76+
77+
define fastcc <256 x double> @test_vp_merge_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %pivot) {
78+
; CHECK-LABEL: test_vp_merge_v256f64_vv:
79+
; CHECK: # %bb.0:
80+
; CHECK-NEXT: and %s0, %s0, (32)0
81+
; CHECK-NEXT: lvl %s0
82+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
83+
; CHECK-NEXT: lea %s16, 256
84+
; CHECK-NEXT: lvl %s16
85+
; CHECK-NEXT: vor %v0, (0)1, %v1
86+
; CHECK-NEXT: b.l.t (, %s10)
87+
%r0 = call <256 x double> @llvm.vp.merge.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot)
88+
ret <256 x double> %r0
89+
}
90+
91+
define fastcc <256 x double> @test_vp_merge_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %pivot) {
92+
; CHECK-LABEL: test_vp_merge_v256f64_vr:
93+
; CHECK: # %bb.0:
94+
; CHECK-NEXT: and %s1, %s1, (32)0
95+
; CHECK-NEXT: lea %s2, 256
96+
; CHECK-NEXT: lvl %s2
97+
; CHECK-NEXT: vbrd %v1, %s0
98+
; CHECK-NEXT: lvl %s1
99+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
100+
; CHECK-NEXT: lea %s16, 256
101+
; CHECK-NEXT: lvl %s16
102+
; CHECK-NEXT: vor %v0, (0)1, %v1
103+
; CHECK-NEXT: b.l.t (, %s10)
104+
%xins = insertelement <256 x double> undef, double %s1, i32 0
105+
%i1 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
106+
%r0 = call <256 x double> @llvm.vp.merge.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot)
107+
ret <256 x double> %r0
108+
}
109+
110+
declare <256 x i64> @llvm.vp.merge.v256i64(<256 x i1>, <256 x i64>, <256 x i64>, i32)
111+
112+
define fastcc <256 x i64> @test_vp_merge_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %pivot) {
113+
; CHECK-LABEL: test_vp_merge_v256i64_vv:
114+
; CHECK: # %bb.0:
115+
; CHECK-NEXT: and %s0, %s0, (32)0
116+
; CHECK-NEXT: lvl %s0
117+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
118+
; CHECK-NEXT: lea %s16, 256
119+
; CHECK-NEXT: lvl %s16
120+
; CHECK-NEXT: vor %v0, (0)1, %v1
121+
; CHECK-NEXT: b.l.t (, %s10)
122+
%r0 = call <256 x i64> @llvm.vp.merge.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot)
123+
ret <256 x i64> %r0
124+
}
125+
126+
define fastcc <256 x i64> @test_vp_merge_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %pivot) {
127+
; CHECK-LABEL: test_vp_merge_v256i64_vr:
128+
; CHECK: # %bb.0:
129+
; CHECK-NEXT: and %s1, %s1, (32)0
130+
; CHECK-NEXT: lea %s2, 256
131+
; CHECK-NEXT: lvl %s2
132+
; CHECK-NEXT: vbrd %v1, %s0
133+
; CHECK-NEXT: lvl %s1
134+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
135+
; CHECK-NEXT: lea %s16, 256
136+
; CHECK-NEXT: lvl %s16
137+
; CHECK-NEXT: vor %v0, (0)1, %v1
138+
; CHECK-NEXT: b.l.t (, %s10)
139+
%xins = insertelement <256 x i64> undef, i64 %s1, i32 0
140+
%i1 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
141+
%r0 = call <256 x i64> @llvm.vp.merge.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot)
142+
ret <256 x i64> %r0
143+
}

0 commit comments

Comments
 (0)