Skip to content

Commit 23524b6

Browse files
authored
Merge pull request #148 from sx-aurora-dev/merge/ve-packed-broadcast
Merge/ve packed broadcast
2 parents 235c4a8 + 71f90c2 commit 23524b6

File tree

10 files changed

+106
-75
lines changed

10 files changed

+106
-75
lines changed

llvm/lib/Target/VE/ShuffleSynthesis.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ bool MaskShuffleAnalysis::analyzeVectorSources(bool &AllTrue) const {
355355
// materialize the code to synthesize this operation
356356
SDValue MaskShuffleAnalysis::synthesize(VECustomDAG &CDAG, EVT LegalMaskVT) {
357357
Packing PackFlag =
358-
isPackedType(LegalMaskVT) ? Packing::Dense : Packing::Normal;
358+
isPackedVectorType(LegalMaskVT) ? Packing::Dense : Packing::Normal;
359359

360360
// this view reflects exactly those insertions that are non-constant and have
361361
// a MVT::i32 type
@@ -741,7 +741,7 @@ struct PatternShuffleOp final : public AbstractShuffleOp {
741741
EVT LegalResVT =
742742
PartialV.getValueType(); // LegalizeVectorType(Op.getValueType(),
743743
// Op, DAG, Mode);
744-
bool Packed = isPackedType(LegalResVT);
744+
bool Packed = isPackedVectorType(LegalResVT);
745745
unsigned NativeNumElems = LegalResVT.getVectorNumElements();
746746

747747
EVT ElemTy = PartialV.getValueType().getVectorElementType();
@@ -1014,7 +1014,7 @@ struct ConstantElemOp final : public AbstractShuffleOp {
10141014
cast<FixedVectorType>(VecConstant->getType())->getElementType();
10151015
uint64_t Stride = (ElemTy->getPrimitiveSizeInBits().getFixedSize() + 7) /
10161016
8; // FIXME should be using datala
1017-
Packing P = isPackedType(LegalResVT) ? Packing::Dense : Packing::Normal;
1017+
Packing P = isPackedVectorType(LegalResVT) ? Packing::Dense : Packing::Normal;
10181018
SDValue MaskV = CDAG.createUniformConstMask(
10191019
P, LegalResVT.getVectorNumElements(), true);
10201020
SDValue StrideV = CDAG.getConstant(Stride, MVT::i64);

llvm/lib/Target/VE/VECustomDAG.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ namespace llvm {
2828
/// Packing {
2929

3030
bool isPackedMaskType(EVT SomeVT) {
31-
return isPackedType(SomeVT) && isMaskType(SomeVT);
31+
return isPackedVectorType(SomeVT) && isMaskType(SomeVT);
3232
}
3333
template <> Packing getPackingForMaskBits(const LaneBits MB) {
3434
return Packing::Normal;
@@ -81,7 +81,7 @@ MVT getUnpackSourceType(EVT VT, PackElem Elem) {
8181

8282
Packing getPackingForVT(EVT VT) {
8383
assert(VT.isVector());
84-
return isPackedType(VT) ? Packing::Dense : Packing::Normal;
84+
return isPackedVectorType(VT) ? Packing::Dense : Packing::Normal;
8585
}
8686

8787
// True, iff this is a VEC_UNPACK_LO/HI, VEC_SWAP or VEC_PACK.
@@ -278,7 +278,7 @@ unsigned getScalarReductionOpcode(unsigned VVPOC, bool IsMask) {
278278
}
279279

280280
bool supportsPackedMode(unsigned Opcode, EVT IdiomVT) {
281-
bool IsPackedOp = isPackedType(IdiomVT);
281+
bool IsPackedOp = isPackedVectorType(IdiomVT);
282282
bool IsMaskOp = IdiomVT.getVectorElementType() == MVT::i1;
283283

284284
#if 0
@@ -639,7 +639,7 @@ VecLenOpt minVectorLength(VecLenOpt A, VecLenOpt B) {
639639
}
640640

641641
EVT splitType(LLVMContext &Ctx, EVT PackedVT, PackElem P) {
642-
assert(isPackedType(PackedVT));
642+
assert(isPackedVectorType(PackedVT));
643643
unsigned PackedNumEls = PackedVT.getVectorNumElements();
644644

645645
unsigned OneExtra = P == PackElem::Hi ? PackedNumEls % 2 : 0;
@@ -650,7 +650,7 @@ EVT splitType(LLVMContext &Ctx, EVT PackedVT, PackElem P) {
650650
// Whether direct codegen for this type will result in a packed operation
651651
// (requiring a packed VL param..)
652652

653-
bool isPackedType(EVT SomeVT) {
653+
bool isPackedVectorType(EVT SomeVT) {
654654
if (!SomeVT.isVector())
655655
return false;
656656
return SomeVT.getVectorNumElements() > StandardVectorWidth;
@@ -669,7 +669,7 @@ static SDValue supplementPackedReplication(SDValue Op, SelectionDAG &DAG) {
669669
auto VLOp = Op.getOperand(1);
670670

671671
// v256x broadcast (element has to be i64/f64 always)
672-
if (!isPackedType(VT))
672+
if (!isPackedVectorType(VT))
673673
return Op;
674674

675675
LLVM_DEBUG(dbgs() << "Legalize packed broadcast\n");
@@ -822,7 +822,7 @@ SDValue VECustomDAG::createElementShift(EVT ResVT, SDValue Src, int Offset,
822822

823823
// vector shift
824824
EVT VecVT = Src.getValueType();
825-
assert(!isPackedType(VecVT) && "TODO implement");
825+
assert(!isPackedVectorType(VecVT) && "TODO implement");
826826
assert(!isMaskType(VecVT));
827827
return createVMV(ResVT, Src, getConstant(Offset, MVT::i32),
828828
createUniformConstMask(Packing::Normal,
@@ -1076,7 +1076,7 @@ SDValue VECustomDAG::createMaskCast(SDValue VectorV, SDValue AVL) const {
10761076
if (isMaskType(VectorV.getValueType()))
10771077
return VectorV;
10781078

1079-
if (isPackedType(VectorV.getValueType())) {
1079+
if (isPackedVectorType(VectorV.getValueType())) {
10801080
auto ValVT = VectorV.getValueType();
10811081
auto LoPart =
10821082
createUnpack(splitVectorType(ValVT), VectorV, PackElem::Lo, AVL);

llvm/lib/Target/VE/VECustomDAG.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ EVT splitType(EVT);
9292

9393
// Whether direct codegen for this type will result in a packed operation
9494
// (requiring a packed VL param..)
95-
bool isPackedType(EVT SomeVT);
95+
bool isPackedVectorType(EVT SomeVT);
9696
bool isMaskType(EVT VT);
9797
bool isPackedMaskType(EVT SomeVT);
9898
bool isOverPackedType(EVT VT);

llvm/lib/Target/VE/VEISelLowering.cpp

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,9 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14-
#include "VECustomDAG.h"
1514
#include "VEISelLowering.h"
1615
#include "MCTargetDesc/VEMCExpr.h"
17-
#include "VEISelLowering.h"
16+
#include "VECustomDAG.h"
1817
#include "VEInstrBuilder.h"
1918
#include "VEMachineFunctionInfo.h"
2019
#include "VERegisterInfo.h"
@@ -2906,43 +2905,6 @@ SDValue VETargetLowering::generateEquivalentSub(SDNode *N, bool Signed,
29062905
return Final;
29072906
}
29082907

2909-
/// This function is called when we have proved that a SETCC node can be
2910-
/// replaced by EQV/XOR+CMOV instead of CMP+LEA+CMOV
2911-
static SDValue generateEquivalentBitOp(SDNode *N, unsigned Cmp,
2912-
SelectionDAG &DAG) {
2913-
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
2914-
2915-
SDLoc DL(N);
2916-
auto Op0 = N->getOperand(0);
2917-
auto Op1 = N->getOperand(1);
2918-
EVT SrcVT = Op0.getValueType();
2919-
EVT VT = N->getValueType(0);
2920-
assert(SrcVT.isScalarInteger() &&
2921-
"Scalar integer is expected as inputs of ISD::SETCC.");
2922-
assert(VT == MVT::i32 && "i32 is expected as a result of ISD::SETCC.");
2923-
2924-
// Compare or equiv integers.
2925-
auto CmpNode = DAG.getNode(Cmp, DL, SrcVT, Op0, Op1);
2926-
2927-
// Adjust register size for CMOV's base register.
2928-
// CMOV cmp, 1, base (=cmp)
2929-
auto Base = CmpNode;
2930-
if (VT != SrcVT) {
2931-
// Cmp is equal to 0 iff it is used as base register, so safe to use
2932-
// INSERT_SUBREG/EXTRACT_SUBRAG.
2933-
SDValue Sub_i32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2934-
Base = SDValue(
2935-
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, Base, Sub_i32),
2936-
0);
2937-
}
2938-
// Set 1 iff comparison result is not equal to 0.
2939-
auto Cmoved =
2940-
DAG.getNode(VEISD::CMOV, DL, VT, CmpNode, DAG.getConstant(1, DL, VT),
2941-
Base, DAG.getConstant(VECC::CC_INE, DL, MVT::i32));
2942-
2943-
return Cmoved;
2944-
}
2945-
29462908
/// This function is called when we have proved that a SETCC node can be
29472909
/// replaced by CMP+CMOV or CMP+LEA+CMOV.
29482910
SDValue VETargetLowering::generateEquivalentCmp(SDNode *N, bool UseCompAsBase,

llvm/lib/Target/VE/VEInstrInfo.td

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1721,6 +1721,7 @@ def f2l : OutPatFrag<(ops node:$exp),
17211721
def l2f : OutPatFrag<(ops node:$exp),
17221722
(EXTRACT_SUBREG $exp, sub_f32)>;
17231723

1724+
// Zero out subregisters.
17241725
def zero_i32 : OutPatFrag<(ops node:$expr),
17251726
(ANDrm $expr, 32)>;
17261727
def zero_f32 : OutPatFrag<(ops node:$expr),
@@ -2725,10 +2726,15 @@ def vec_pack : SDNode<"VEISD::VEC_PACK", SDTypeProfile<1, 3, [SDTCi
27252726
// exchange the odd-even pairs in a v512.32
27262727
def vec_swap : SDNode<"VEISD::VEC_SWAP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0,1>, IsVLVT<2>]>>;
27272728

2728-
// replicate lower 32bit to upper 32bit (f32 scalar replication)
2729-
def repl_f32 : SDNode<"VEISD::REPL_F32", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisFP<1>]>>;
2730-
// replicate upper 32bit to lower 32 bit (i32 scalar replication)
2731-
def repl_i32 : SDNode<"VEISD::REPL_I32", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>>;
2729+
// replicate lower 32bit to upper 32bit (f32 scalar replication).
2730+
def repl_f32 : SDNode<"VEISD::REPL_F32",
2731+
SDTypeProfile<1, 1,
2732+
[SDTCisInt<0>, SDTCisFP<1>]>>;
2733+
// replicate upper 32bit to lower 32 bit (i32 scalar replication).
2734+
def repl_i32 : SDNode<"VEISD::REPL_I32",
2735+
SDTypeProfile<1, 1,
2736+
[SDTCisInt<0>, SDTCisInt<1>]>>;
2737+
27322738

27332739
// Whether this is an all-true mask (assuming undef-bits above VL are all-true).
27342740
def true_mask : PatLeaf<

llvm/lib/Target/VE/VEInstrPatternsVec.td

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ def: Pat<(i64 (repl_i32 i32:$val)),
2626
(SLLri (i2l $val), 32))>;
2727

2828

29-
3029
///// Mask insert, extract, popcount /////
3130

3231
// Mask Insert & Extract
@@ -238,11 +237,10 @@ defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>;
238237
defm : patterns_elem64<v256i64, i64, simm7, LO7>;
239238
defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>;
240239

241-
242-
def: Pat<(v512i32 (vec_broadcast i64:$val, i32:$vl)),
243-
(PVBRDrl $val, $vl)>;
244-
def: Pat<(v512f32 (vec_broadcast i64:$val, i32:$vl)),
245-
(PVBRDrl $val, $vl)>;
240+
defm : vbrd_elem64<v512i32, i64, simm7, LO7>;
241+
defm : vbrd_elem64<v512f32, i64, simm7, LO7>;
242+
defm : vbrd_elem64<v512i32, f64, simm7fp, LO7FP>;
243+
defm : vbrd_elem64<v512f32, f64, simm7fp, LO7FP>;
246244

247245

248246
///// vec_seq /////

llvm/lib/Target/VE/VVPISelLowering.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ static const MVT PackedVectorVTs[] = {MVT::v512i32, MVT::v512f32, MVT::v512f64,
431431
void VETargetLowering::initRegisterClasses_VVP() {
432432
// VVP-based backend.
433433
for (MVT VecVT : AllVectorVTs)
434-
if (!isPackedType(VecVT) || Subtarget->hasPackedMode())
434+
if (!isPackedVectorType(VecVT) || Subtarget->hasPackedMode())
435435
addRegisterClass(VecVT, &VE::V64RegClass);
436436

437437
addRegisterClass(MVT::v256i1, &VE::VMRegClass);
@@ -687,7 +687,7 @@ void VETargetLowering::initVPUActions() {
687687
ForAll_setOperationAction(VectorTransformOCs, MaskVT, Custom);
688688

689689
// Custom split packed mask operations.
690-
if (isPackedType(MaskVT))
690+
if (isPackedVectorType(MaskVT))
691691
ForAll_setOperationAction(IntArithOCs, MaskVT, Custom);
692692
}
693693

@@ -1747,15 +1747,15 @@ SDValue VETargetLowering::legalizeInternalLoadStoreOp(SDValue Op,
17471747
EVT DataVT = *getIdiomaticType(Op.getNode());
17481748

17491749
// Ignore the VLD mask as an optimization.
1750-
if (!isPackedType(DataVT) &&
1750+
if (!isPackedVectorType(DataVT) &&
17511751
(Op->getOpcode() == VEISD::VVP_LOAD && OptimizeVectorMemory)) {
17521752
auto AllTrueMask = CDAG.createUniformConstMask(MVT::v256i1, true);
17531753
return CDAG.getVVPLoad(Op.getValueType(), Op.getOperand(0),
17541754
Op.getOperand(1), Op.getOperand(2), AllTrueMask,
17551755
Op.getOperand(4));
17561756
}
17571757

1758-
if (!isPackedType(DataVT)) {
1758+
if (!isPackedVectorType(DataVT)) {
17591759
LLVM_DEBUG(dbgs() << "Legal!\n");
17601760
return Op;
17611761
}
@@ -1805,7 +1805,7 @@ SDValue VETargetLowering::legalizeVM_POPCOUNT(SDValue Op,
18051805
LLVM_DEBUG(dbgs() << "::LegalizeVM_POPCOUNT\n";);
18061806
auto Mask = Op->getOperand(0);
18071807
auto AVL = Op->getOperand(1);
1808-
if (!isPackedType(Mask.getValueType()))
1808+
if (!isPackedVectorType(Mask.getValueType()))
18091809
return Op;
18101810

18111811
VECustomDAG CDAG(*this, DAG, Op);
@@ -2464,7 +2464,7 @@ SDValue VETargetLowering::lowerVectorShuffleOp(SDValue Op, SelectionDAG &DAG,
24642464
if (Res)
24652465
return Res;
24662466

2467-
assert(isPackedType(LegalResVT) &&
2467+
assert(isPackedVectorType(LegalResVT) &&
24682468
"normal and over-packed EVTs should have been lowered by now!");
24692469
return splitVectorShuffle(Op, CDAG, Mode);
24702470
}

llvm/test/CodeGen/VE/Packed/broadcast_packed.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ define x86_regcallcc <1024 x i32> @brdv512i32x2(i32) {
1111
; CHECK-NEXT: or %s0, %s0, %s1
1212
; CHECK-NEXT: lea %s1, 129
1313
; CHECK-NEXT: lvl %s1
14-
; CHECK-NEXT: pvbrd %v0, %s0
14+
; CHECK-NEXT: vbrd %v0, %s0
1515
; CHECK-NEXT: lea %s1, 130
1616
; CHECK-NEXT: lvl %s1
17-
; CHECK-NEXT: pvbrd %v1, %s0
17+
; CHECK-NEXT: vbrd %v1, %s0
1818
; CHECK-NEXT: b.l.t (, %s10)
1919
%val = insertelement <1024 x i32> undef, i32 %0, i32 0
2020
%r0 = insertelement <1024 x i32> %val, i32 %0, i32 1
@@ -34,7 +34,7 @@ define x86_regcallcc <512 x i32> @brdv512i32(i32) {
3434
; CHECK-NEXT: or %s0, %s0, %s1
3535
; CHECK-NEXT: or %s1, 1, (0)1
3636
; CHECK-NEXT: lvl %s1
37-
; CHECK-NEXT: pvbrd %v0, %s0
37+
; CHECK-NEXT: vbrd %v0, %s0
3838
; CHECK-NEXT: b.l.t (, %s10)
3939
%val = insertelement <512 x i32> undef, i32 %0, i32 0
4040
%ret = insertelement <512 x i32> %val, i32 %0, i32 1
@@ -50,7 +50,7 @@ define x86_regcallcc <512 x float> @brdv512f32(float) {
5050
; CHECK-NEXT: or %s0, %s0, %s1
5151
; CHECK-NEXT: or %s1, 1, (0)1
5252
; CHECK-NEXT: lvl %s1
53-
; CHECK-NEXT: pvbrd %v0, %s0
53+
; CHECK-NEXT: vbrd %v0, %s0
5454
; CHECK-NEXT: b.l.t (, %s10)
5555
%val = insertelement <512 x float> undef, float %0, i32 0
5656
%ret = insertelement <512 x float> %val, float %0, i32 1
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
3+
4+
define fastcc <512 x i32> @brd_v512i32(i32 %s) {
5+
; CHECK-LABEL: brd_v512i32:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: and %s0, %s0, (32)0
8+
; CHECK-NEXT: sll %s1, %s0, 32
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: or %s0, %s0, %s1
11+
; CHECK-NEXT: lea %s1, 256
12+
; CHECK-NEXT: lvl %s1
13+
; CHECK-NEXT: vbrd %v0, %s0
14+
; CHECK-NEXT: b.l.t (, %s10)
15+
%val = insertelement <512 x i32> undef, i32 %s, i32 0
16+
%ret = shufflevector <512 x i32> %val, <512 x i32> undef, <512 x i32> zeroinitializer
17+
ret <512 x i32> %ret
18+
}
19+
20+
define fastcc <512 x i32> @brdi_v512i32() {
21+
; CHECK-LABEL: brdi_v512i32:
22+
; CHECK: # %bb.0:
23+
; CHECK-NEXT: or %s0, 17, (0)1
24+
; CHECK-NEXT: sll %s1, %s0, 32
25+
; CHECK-NEXT: and %s0, %s0, (32)0
26+
; CHECK-NEXT: or %s0, %s0, %s1
27+
; CHECK-NEXT: lea %s1, 256
28+
; CHECK-NEXT: lvl %s1
29+
; CHECK-NEXT: vbrd %v0, %s0
30+
; CHECK-NEXT: b.l.t (, %s10)
31+
%val = insertelement <512 x i32> undef, i32 17, i32 0
32+
%ret = shufflevector <512 x i32> %val, <512 x i32> undef, <512 x i32> zeroinitializer
33+
ret <512 x i32> %ret
34+
}
35+
36+
define fastcc <512 x float> @brd_v512f32(float %s) {
37+
; CHECK-LABEL: brd_v512f32:
38+
; CHECK: # %bb.0:
39+
; CHECK-NEXT: and %s1, %s0, (32)1
40+
; CHECK-NEXT: srl %s0, %s0, 32
41+
; CHECK-NEXT: or %s0, %s0, %s1
42+
; CHECK-NEXT: lea %s1, 256
43+
; CHECK-NEXT: lvl %s1
44+
; CHECK-NEXT: vbrd %v0, %s0
45+
; CHECK-NEXT: b.l.t (, %s10)
46+
%val = insertelement <512 x float> undef, float %s, i32 0
47+
%ret = shufflevector <512 x float> %val, <512 x float> undef, <512 x i32> zeroinitializer
48+
ret <512 x float> %ret
49+
}
50+
51+
define fastcc <512 x float> @brdi_v512f32() {
52+
; CHECK-LABEL: brdi_v512f32:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: lea.sl %s0, 0
55+
; CHECK-NEXT: and %s1, %s0, (32)1
56+
; CHECK-NEXT: srl %s0, %s0, 32
57+
; CHECK-NEXT: or %s0, %s0, %s1
58+
; CHECK-NEXT: lea %s1, 256
59+
; CHECK-NEXT: lvl %s1
60+
; CHECK-NEXT: vbrd %v0, %s0
61+
; CHECK-NEXT: b.l.t (, %s10)
62+
%val = insertelement <512 x float> undef, float 0.e+00, i32 0
63+
%ret = shufflevector <512 x float> %val, <512 x float> undef, <512 x i32> zeroinitializer
64+
ret <512 x float> %ret
65+
}

0 commit comments

Comments
 (0)