Skip to content

Commit b9f31da

Browse files
authored
Merge pull request #141 from sx-aurora-dev/merge/ve-merge-isel
Merge/ve merge isel
2 parents 2ab1d52 + 67bc134 commit b9f31da

File tree

7 files changed

+380
-21
lines changed

7 files changed

+380
-21
lines changed

llvm/lib/Target/VE/VEISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ class VETargetLowering final : public TargetLowering, public VELoweringInfo {
308308
SDValue combineEntryToken_VVP(SDNode *N, DAGCombinerInfo &DCI) const;
309309
// Expand SETCC operands directly used in vector arithmetic ops.
310310
SDValue lowerSETCCInVectorArithmetic(SDValue Op, SelectionDAG &DAG) const;
311-
SDValue expandSELECT(SDValue Op, SmallVectorImpl<SDValue> &LegalOperands,
311+
SDValue expandSELECT(SDValue MaskV, SDValue OnTrueV, SDValue OnFalseV,
312312
EVT LegalResVT, CustomDAG &CDAG, SDValue AVL) const;
313313

314314
/// Custom Lower for VVP {

llvm/lib/Target/VE/VVPISelLowering.cpp

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,16 +1022,11 @@ SDValue VETargetLowering::lowerVVP_TRUNCATE(SDValue Op,
10221022
return Op.getOperand(0);
10231023
}
10241024

1025-
SDValue VETargetLowering::expandSELECT(SDValue Op,
1026-
SmallVectorImpl<SDValue> &LegalOperands,
1025+
SDValue VETargetLowering::expandSELECT(SDValue MaskV, SDValue OnTrueV, SDValue OnFalseV,
10271026
EVT LegalResVT, CustomDAG &CDAG,
10281027
SDValue AVL) const {
1029-
SDValue MaskV = LegalOperands[0];
1030-
SDValue OnTrueV = LegalOperands[1];
1031-
SDValue OnFalseV = LegalOperands[2];
1032-
10331028
// Expand vNi1 selects into a boolean expression
1034-
if (isMaskType(Op.getValueType())) {
1029+
if (isMaskType(LegalResVT)) {
10351030
auto NotMaskV = CDAG.createNot(MaskV, LegalResVT);
10361031

10371032
return CDAG.getNode(
@@ -1542,6 +1537,51 @@ SDValue VETargetLowering::splitVectorArithmetic(SDValue Op,
15421537
return CDAG.createPack(MVT::v512i1, LoRes, HiRes, AVL);
15431538
}
15441539

1540+
SDValue getSelectMask(SDValue Op) {
1541+
switch (Op->getOpcode()) {
1542+
case ISD::VSELECT:
1543+
case ISD::SELECT:
1544+
case ISD::VP_SELECT:
1545+
case ISD::VP_MERGE:
1546+
return Op->getOperand(0);
1547+
case VEISD::VVP_SELECT:
1548+
return Op->getOperand(2);
1549+
default:
1550+
break;
1551+
}
1552+
return SDValue();
1553+
}
1554+
1555+
SDValue getSelectOnTrueVal(SDValue Op) {
1556+
switch (Op->getOpcode()) {
1557+
case ISD::VSELECT:
1558+
case ISD::SELECT:
1559+
case ISD::VP_SELECT:
1560+
case ISD::VP_MERGE:
1561+
return Op->getOperand(1);
1562+
case VEISD::VVP_SELECT:
1563+
return Op->getOperand(0);
1564+
default:
1565+
break;
1566+
}
1567+
return SDValue();
1568+
}
1569+
1570+
SDValue getSelectOnFalseVal(SDValue Op) {
1571+
switch (Op->getOpcode()) {
1572+
case ISD::VSELECT:
1573+
case ISD::SELECT:
1574+
case ISD::VP_SELECT:
1575+
case ISD::VP_MERGE:
1576+
return Op->getOperand(2);
1577+
case VEISD::VVP_SELECT:
1578+
return Op->getOperand(1);
1579+
default:
1580+
break;
1581+
}
1582+
return SDValue();
1583+
}
1584+
15451585
SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG,
15461586
VVPExpansionMode Mode) const {
15471587

@@ -1669,7 +1709,11 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG,
16691709
MaskingArgs.Mask, MaskingArgs.AVL});
16701710
}
16711711
case VEISD::VVP_SELECT: {
1672-
return expandSELECT(Op, LegalOperands, ResVecTy, CDAG, MaskingArgs.AVL);
1712+
SDValue CondMask = getSelectMask(Op);
1713+
SDValue OnTrue = getSelectOnTrueVal(Op);
1714+
SDValue OnFalse = getSelectOnFalseVal(Op);
1715+
return expandSELECT(CondMask, OnTrue, OnFalse, ResVecTy, CDAG,
1716+
MaskingArgs.AVL);
16731717
}
16741718
default:
16751719
llvm_unreachable("Unexpected ternary operator!");
@@ -2101,7 +2145,11 @@ SDValue VETargetLowering::lowerVPToVVP(SDValue Op, SelectionDAG &DAG,
21012145
OpVec.push_back(Op->getOperand(1));
21022146
OpVec.push_back(Mask);
21032147
OpVec.push_back(AVL);
2104-
2148+
} else if (*VVPOC == VEISD::VVP_SELECT) {
2149+
OpVec.push_back(getSelectOnTrueVal(Op));
2150+
OpVec.push_back(getSelectOnFalseVal(Op));
2151+
OpVec.push_back(Mask);
2152+
OpVec.push_back(AVL);
21052153
} else {
21062154
// Default.
21072155
unsigned NumOps = Op.getNumOperands();

llvm/lib/Target/VE/VVPInstrInfo.td

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,13 @@ def SDTGatherVVP: SDTypeProfile<1, 3, [ // vvp gather
6464
SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 2>, IsVLVT<3>
6565
]>;
6666

67-
// select (OnTrue, OnFalse, CondMask, VLen)
68-
def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select
69-
SDTCisVec<0>, SDTCisSameNumEltsAs<0, 3>, SDTCisSameAs<0,1>, SDTCisSameAs<1, 2>, IsVLVT<4>
67+
// Select(OnTrue, OnFalse, SelMask, vl)
68+
def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge
69+
SDTCisVec<0>,
70+
SDTCisSameNumEltsAs<0, 3>,
71+
SDTCisSameAs<0, 1>,
72+
SDTCisSameAs<1, 2>,
73+
IsVLVT<4>
7074
]>;
7175

7276
// setcc (lhs, rhs, cc, mask, vl)
@@ -164,7 +168,7 @@ def vvp_ffmsn : SDNode<"VEISD::VVP_FFMSN", SDTFPTernaryOpVVP>; // (x - (y*z)
164168
// def vvp_ffman : SDNode<"VEISD::VVP_FFMAN", SDTFPTernaryOpVVP>; // -((y*z) + x)
165169

166170
// select (SelM,OnT,OnF,Pivot) = (lane < Pivot && SelM[lane]) ? OnT[lane] : OnF[lane]
167-
def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>;
171+
def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>;
168172

169173
// setcc (lhs, rhs, cc, mask, vl)
170174
def vvp_setcc : SDNode<"VEISD::VVP_SETCC", SDTSetCCVVP>;

llvm/lib/Target/VE/VVPInstrPatternsVec.td

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -404,15 +404,37 @@ defm : Ternary_ShortLong<c_vvp_ffmsn,
404404
// TODO: vvp_ffman
405405

406406
///// Selection /////
407-
multiclass VectorSelect<ValueType DataVT> {
408-
def :Pat<(DataVT (vvp_select DataVT:$vtrue, DataVT:$vfalse, v256i1:$vm, i32:$pivot)),
409-
(VMRGvvml_v $vfalse, $vtrue, $vm, $pivot, $vfalse)>;
407+
multiclass Merge_mvv<
408+
SDPatternOperator OpNode,
409+
ValueType DataVT, ValueType MaskVT,
410+
string OpBaseName> {
411+
// Masked.
412+
def : Pat<(OpNode
413+
DataVT:$vtrue, DataVT:$vfalse,
414+
MaskVT:$vm,
415+
i32:$avl),
416+
(!cast<Instruction>(OpBaseName#"vvml_v")
417+
$vfalse, $vtrue, $vm, $avl, $vfalse)>;
410418
}
411419

412-
defm : VectorSelect<v256f64>;
413-
defm : VectorSelect<v256i64>;
414-
defm : VectorSelect<v256i32>;
415-
defm : VectorSelect<v256f32>;
420+
multiclass Merge_mvv_ShortLong<
421+
SDPatternOperator OpNode,
422+
ValueType LongDataVT, ValueType ShortDataVT,
423+
string OpBaseName> {
424+
defm : Merge_mvv<OpNode,
425+
LongDataVT, v256i1,
426+
OpBaseName>;
427+
defm : Merge_mvv<OpNode,
428+
ShortDataVT, v256i1,
429+
OpBaseName>;
430+
}
431+
432+
defm : Merge_mvv_ShortLong<vvp_select,
433+
v256f64,
434+
v256f32, "VMRG">;
435+
defm : Merge_mvv_ShortLong<vvp_select,
436+
v256i64,
437+
v256i32, "VMRG">;
416438

417439
multiclass VectorSelect_Packed<ValueType PackedVT> {
418440
def : Pat<(PackedVT (vvp_select PackedVT:$vtrue, PackedVT:$vfalse, v512i1:$vm, i32:$pivot)),

llvm/lib/Target/VE/VVPNodes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ REGISTER_TERNARY_VVP_OP(VVP_FFMSN) REGISTER_PACKED(VVP_FFMSN)
160160

161161
// Select
162162
ADD_TERNARY_VVP_OP(VVP_SELECT,VSELECT) HANDLE_VP_TO_VVP(VP_SELECT, VVP_SELECT) REGISTER_PACKED(VVP_SELECT)
163+
HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT)
163164
ADD_TERNARY_VVP_OP(VVP_SETCC,SETCC) HANDLE_VP_TO_VVP(VP_SETCC, VVP_SETCC)
164165

165166
// sint <> fp
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
3+
4+
declare <256 x i32> @llvm.vp.merge.v256i32(<256 x i1>, <256 x i32>, <256 x i32>, i32)
5+
6+
define fastcc <256 x i32> @test_vp_merge_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %pivot) {
7+
; CHECK-LABEL: test_vp_merge_v256i32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
12+
; CHECK-NEXT: lea %s16, 256
13+
; CHECK-NEXT: lvl %s16
14+
; CHECK-NEXT: vor %v0, (0)1, %v1
15+
; CHECK-NEXT: b.l.t (, %s10)
16+
%r0 = call <256 x i32> @llvm.vp.merge.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot)
17+
ret <256 x i32> %r0
18+
}
19+
20+
define fastcc <256 x i32> @test_vp_merge_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %pivot) {
21+
; CHECK-LABEL: test_vp_merge_v256i32_vr:
22+
; CHECK: # %bb.0:
23+
; CHECK-NEXT: and %s1, %s1, (32)0
24+
; CHECK-NEXT: lea %s2, 256
25+
; CHECK-NEXT: lvl %s2
26+
; CHECK-NEXT: vbrd %v1, %s0
27+
; CHECK-NEXT: lvl %s1
28+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
29+
; CHECK-NEXT: lea %s16, 256
30+
; CHECK-NEXT: lvl %s16
31+
; CHECK-NEXT: vor %v0, (0)1, %v1
32+
; CHECK-NEXT: b.l.t (, %s10)
33+
%xins = insertelement <256 x i32> undef, i32 %s1, i32 0
34+
%i1 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
35+
%r0 = call <256 x i32> @llvm.vp.merge.v256i32(<256 x i1> %m, <256 x i32> %i0, <256 x i32> %i1, i32 %pivot)
36+
ret <256 x i32> %r0
37+
}
38+
39+
declare <256 x float> @llvm.vp.merge.v256f32(<256 x i1>, <256 x float>, <256 x float>, i32)
40+
41+
define fastcc <256 x float> @test_vp_merge_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %pivot) {
42+
; CHECK-LABEL: test_vp_merge_v256f32_vv:
43+
; CHECK: # %bb.0:
44+
; CHECK-NEXT: and %s0, %s0, (32)0
45+
; CHECK-NEXT: lvl %s0
46+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
47+
; CHECK-NEXT: lea %s16, 256
48+
; CHECK-NEXT: lvl %s16
49+
; CHECK-NEXT: vor %v0, (0)1, %v1
50+
; CHECK-NEXT: b.l.t (, %s10)
51+
%r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot)
52+
ret <256 x float> %r0
53+
}
54+
55+
define fastcc <256 x float> @test_vp_merge_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %pivot) {
56+
; CHECK-LABEL: test_vp_merge_v256f32_vr:
57+
; CHECK: # %bb.0:
58+
; CHECK-NEXT: and %s1, %s1, (32)0
59+
; CHECK-NEXT: lea %s2, 256
60+
; CHECK-NEXT: lvl %s2
61+
; CHECK-NEXT: vbrd %v1, %s0
62+
; CHECK-NEXT: lvl %s1
63+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
64+
; CHECK-NEXT: lea %s16, 256
65+
; CHECK-NEXT: lvl %s16
66+
; CHECK-NEXT: vor %v0, (0)1, %v1
67+
; CHECK-NEXT: b.l.t (, %s10)
68+
%xins = insertelement <256 x float> undef, float %s1, i32 0
69+
%i1 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
70+
%r0 = call <256 x float> @llvm.vp.merge.v256f32(<256 x i1> %m, <256 x float> %i0, <256 x float> %i1, i32 %pivot)
71+
ret <256 x float> %r0
72+
}
73+
74+
declare <256 x double> @llvm.vp.merge.v256f64(<256 x i1>, <256 x double>, <256 x double>, i32)
75+
76+
define fastcc <256 x double> @test_vp_merge_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %pivot) {
77+
; CHECK-LABEL: test_vp_merge_v256f64_vv:
78+
; CHECK: # %bb.0:
79+
; CHECK-NEXT: and %s0, %s0, (32)0
80+
; CHECK-NEXT: lvl %s0
81+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
82+
; CHECK-NEXT: lea %s16, 256
83+
; CHECK-NEXT: lvl %s16
84+
; CHECK-NEXT: vor %v0, (0)1, %v1
85+
; CHECK-NEXT: b.l.t (, %s10)
86+
%r0 = call <256 x double> @llvm.vp.merge.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot)
87+
ret <256 x double> %r0
88+
}
89+
90+
define fastcc <256 x double> @test_vp_merge_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %pivot) {
91+
; CHECK-LABEL: test_vp_merge_v256f64_vr:
92+
; CHECK: # %bb.0:
93+
; CHECK-NEXT: and %s1, %s1, (32)0
94+
; CHECK-NEXT: lea %s2, 256
95+
; CHECK-NEXT: lvl %s2
96+
; CHECK-NEXT: vbrd %v1, %s0
97+
; CHECK-NEXT: lvl %s1
98+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
99+
; CHECK-NEXT: lea %s16, 256
100+
; CHECK-NEXT: lvl %s16
101+
; CHECK-NEXT: vor %v0, (0)1, %v1
102+
; CHECK-NEXT: b.l.t (, %s10)
103+
%xins = insertelement <256 x double> undef, double %s1, i32 0
104+
%i1 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
105+
%r0 = call <256 x double> @llvm.vp.merge.v256f64(<256 x i1> %m, <256 x double> %i0, <256 x double> %i1, i32 %pivot)
106+
ret <256 x double> %r0
107+
}
108+
109+
declare <256 x i64> @llvm.vp.merge.v256i64(<256 x i1>, <256 x i64>, <256 x i64>, i32)
110+
111+
define fastcc <256 x i64> @test_vp_merge_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %pivot) {
112+
; CHECK-LABEL: test_vp_merge_v256i64_vv:
113+
; CHECK: # %bb.0:
114+
; CHECK-NEXT: and %s0, %s0, (32)0
115+
; CHECK-NEXT: lvl %s0
116+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
117+
; CHECK-NEXT: lea %s16, 256
118+
; CHECK-NEXT: lvl %s16
119+
; CHECK-NEXT: vor %v0, (0)1, %v1
120+
; CHECK-NEXT: b.l.t (, %s10)
121+
%r0 = call <256 x i64> @llvm.vp.merge.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot)
122+
ret <256 x i64> %r0
123+
}
124+
125+
define fastcc <256 x i64> @test_vp_merge_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %pivot) {
126+
; CHECK-LABEL: test_vp_merge_v256i64_vr:
127+
; CHECK: # %bb.0:
128+
; CHECK-NEXT: and %s1, %s1, (32)0
129+
; CHECK-NEXT: lea %s2, 256
130+
; CHECK-NEXT: lvl %s2
131+
; CHECK-NEXT: vbrd %v1, %s0
132+
; CHECK-NEXT: lvl %s1
133+
; CHECK-NEXT: vmrg %v1, %v1, %v0, %vm1
134+
; CHECK-NEXT: lea %s16, 256
135+
; CHECK-NEXT: lvl %s16
136+
; CHECK-NEXT: vor %v0, (0)1, %v1
137+
; CHECK-NEXT: b.l.t (, %s10)
138+
%xins = insertelement <256 x i64> undef, i64 %s1, i32 0
139+
%i1 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
140+
%r0 = call <256 x i64> @llvm.vp.merge.v256i64(<256 x i1> %m, <256 x i64> %i0, <256 x i64> %i1, i32 %pivot)
141+
ret <256 x i64> %r0
142+
}

0 commit comments

Comments
 (0)