Skip to content

Commit 8734f0e

Browse files
author
Simon Moll
committed
[VE] Backport strided load/store codegen to hpce/develop
1 parent 3094f96 commit 8734f0e

File tree

7 files changed

+425
-80
lines changed

7 files changed

+425
-80
lines changed

llvm/lib/IR/IntrinsicInst.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,10 +618,14 @@ static VPIntrinsic::ShortTypeVec getVPIntrinsicTypes(Intrinsic::ID ID,
618618
return VPIntrinsic::ShortTypeVec{VectorTy};
619619

620620
case Intrinsic::experimental_vp_strided_load:
621+
return VPIntrinsic::ShortTypeVec{VecRetTy,
622+
Type::getInt64Ty(VectorTy->getContext())};
621623
case Intrinsic::vp_gather:
622624
case Intrinsic::vp_load:
623625
return VPIntrinsic::ShortTypeVec{VecRetTy, VecPtrTy};
624626
case Intrinsic::experimental_vp_strided_store:
627+
return VPIntrinsic::ShortTypeVec{VectorTy,
628+
Type::getInt64Ty(VectorTy->getContext())};
625629
case Intrinsic::vp_scatter:
626630
case Intrinsic::vp_store:
627631
return VPIntrinsic::ShortTypeVec{VectorTy, VecPtrTy};
@@ -657,6 +661,8 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
657661
(VPID == Intrinsic::experimental_vp_splice);
658662
bool IsMemoryOp =
659663
(VPID == Intrinsic::vp_store) || (VPID == Intrinsic::vp_load) ||
664+
(VPID == Intrinsic::experimental_vp_strided_store) ||
665+
(VPID == Intrinsic::experimental_vp_strided_load) ||
660666
(VPID == Intrinsic::vp_scatter) || (VPID == Intrinsic::vp_gather);
661667
bool IsCastOp =
662668
(VPID == Intrinsic::vp_fptosi) || (VPID == Intrinsic::vp_fptoui) ||

llvm/lib/Target/VE/CustomDAG.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,10 @@ Optional<unsigned> getVVPForVP(unsigned VPOC) {
580580
}
581581

582582
Optional<EVT> getIdiomaticType(SDNode *Op) {
583+
// For memory ops -> the transfered data type
584+
if (auto MemN = dyn_cast<MemSDNode>(Op))
585+
return MemN->getMemoryVT();
586+
583587
// For reductions -> the reduced vector type
584588
PosOpt RedVecPos = getReductionVectorParamPos(Op->getOpcode());
585589
if (RedVecPos)
@@ -597,9 +601,6 @@ Optional<EVT> getIdiomaticType(SDNode *Op) {
597601
// translation code.
598602
switch (OC) {
599603
default:
600-
// For memory ops -> the transfered data type
601-
if (auto MemN = dyn_cast<MemSDNode>(Op))
602-
return MemN->getMemoryVT();
603604
return None;
604605

605606
// Standard ISD.

llvm/lib/Target/VE/VEISelLowering.cpp

Lines changed: 75 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,81 @@ void VETargetLowering::initRegisterClasses() {
125125
}
126126
}
127127

128+
SDValue
129+
VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
130+
bool IsVarArg,
131+
const SmallVectorImpl<ISD::OutputArg> &Outs,
132+
const SmallVectorImpl<SDValue> &OutVals,
133+
const SDLoc &DL, SelectionDAG &DAG) const {
134+
// CCValAssign - represent the assignment of the return value to locations.
135+
SmallVector<CCValAssign, 16> RVLocs;
136+
137+
// CCState - Info about the registers and stack slot.
138+
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
139+
*DAG.getContext());
140+
141+
// Analyze return values.
142+
CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
143+
144+
SDValue Flag;
145+
SmallVector<SDValue, 4> RetOps(1, Chain);
146+
147+
// Copy the result values into the output registers.
148+
for (unsigned i = 0; i != RVLocs.size(); ++i) {
149+
CCValAssign &VA = RVLocs[i];
150+
assert(VA.isRegLoc() && "Can only return in registers!");
151+
assert(!VA.needsCustom() && "Unexpected custom lowering");
152+
SDValue OutVal = OutVals[i];
153+
154+
// Integer return values must be sign or zero extended by the callee.
155+
switch (VA.getLocInfo()) {
156+
case CCValAssign::Full:
157+
break;
158+
case CCValAssign::SExt:
159+
OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
160+
break;
161+
case CCValAssign::ZExt:
162+
OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
163+
break;
164+
case CCValAssign::AExt:
165+
OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
166+
break;
167+
case CCValAssign::BCvt: {
168+
// Convert a float return value to i64 with padding.
169+
// 63 31 0
170+
// +------+------+
171+
// | float| 0 |
172+
// +------+------+
173+
assert(VA.getLocVT() == MVT::i64);
174+
assert(VA.getValVT() == MVT::f32);
175+
SDValue Undef = SDValue(
176+
DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
177+
SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
178+
OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
179+
MVT::i64, Undef, OutVal, Sub_f32),
180+
0);
181+
break;
182+
}
183+
default:
184+
llvm_unreachable("Unknown loc info!");
185+
}
186+
187+
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
188+
189+
// Guarantee that all emitted copies are stuck together with flags.
190+
Flag = Chain.getValue(1);
191+
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
192+
}
193+
194+
RetOps[0] = Chain; // Update chain.
195+
196+
// Add the flag if we have it.
197+
if (Flag.getNode())
198+
RetOps.push_back(Flag);
199+
200+
return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
201+
}
202+
128203
SDValue VETargetLowering::LowerFormalArguments(
129204
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
130205
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
@@ -590,81 +665,6 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
590665
return Chain;
591666
}
592667

593-
SDValue
594-
VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
595-
bool IsVarArg,
596-
const SmallVectorImpl<ISD::OutputArg> &Outs,
597-
const SmallVectorImpl<SDValue> &OutVals,
598-
const SDLoc &DL, SelectionDAG &DAG) const {
599-
// CCValAssign - represent the assignment of the return value to locations.
600-
SmallVector<CCValAssign, 16> RVLocs;
601-
602-
// CCState - Info about the registers and stack slot.
603-
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
604-
*DAG.getContext());
605-
606-
// Analyze return values.
607-
CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
608-
609-
SDValue Flag;
610-
SmallVector<SDValue, 4> RetOps(1, Chain);
611-
612-
// Copy the result values into the output registers.
613-
for (unsigned i = 0; i != RVLocs.size(); ++i) {
614-
CCValAssign &VA = RVLocs[i];
615-
assert(VA.isRegLoc() && "Can only return in registers!");
616-
assert(!VA.needsCustom() && "Unexpected custom lowering");
617-
SDValue OutVal = OutVals[i];
618-
619-
// Integer return values must be sign or zero extended by the callee.
620-
switch (VA.getLocInfo()) {
621-
case CCValAssign::Full:
622-
break;
623-
case CCValAssign::SExt:
624-
OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
625-
break;
626-
case CCValAssign::ZExt:
627-
OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
628-
break;
629-
case CCValAssign::AExt:
630-
OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
631-
break;
632-
case CCValAssign::BCvt: {
633-
// Convert a float return value to i64 with padding.
634-
// 63 31 0
635-
// +------+------+
636-
// | float| 0 |
637-
// +------+------+
638-
assert(VA.getLocVT() == MVT::i64);
639-
assert(VA.getValVT() == MVT::f32);
640-
SDValue Undef = SDValue(
641-
DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
642-
SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
643-
OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
644-
MVT::i64, Undef, OutVal, Sub_f32),
645-
0);
646-
break;
647-
}
648-
default:
649-
llvm_unreachable("Unknown loc info!");
650-
}
651-
652-
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
653-
654-
// Guarantee that all emitted copies are stuck together with flags.
655-
Flag = Chain.getValue(1);
656-
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
657-
}
658-
659-
RetOps[0] = Chain; // Update chain.
660-
661-
// Add the flag if we have it.
662-
if (Flag.getNode())
663-
RetOps.push_back(Flag);
664-
665-
return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
666-
}
667-
668668
bool VETargetLowering::isOffsetFoldingLegal(
669669
const GlobalAddressSDNode *GA) const {
670670
// VE uses 64 bit addressing, so we need multiple instructions to generate

llvm/lib/Target/VE/VVPISelLowering.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ static SDValue getLoadStoreStride(SDValue Op, CustomDAG &CDAG) {
214214
return Op->getOperand(2);
215215
}
216216

217+
if (auto *StoreN = dyn_cast<VPStridedStoreSDNode>(Op.getNode()))
218+
return StoreN->getStride();
219+
if (auto *StoreN = dyn_cast<VPStridedLoadSDNode>(Op.getNode()))
220+
return StoreN->getStride();
221+
217222
if (isa<MemSDNode>(Op.getNode())) {
218223
// Regular MLOAD/MSTORE/LOAD/STORE
219224
// No stride argument -> use the contiguous element size as stride.
@@ -239,6 +244,8 @@ static SDValue getStoredValue(SDValue Op) {
239244
if (auto *StoreN = dyn_cast<VPStoreSDNode>(Op.getNode())) {
240245
return StoreN->getValue();
241246
}
247+
if (auto *StoreN = dyn_cast<VPStridedStoreSDNode>(Op.getNode()))
248+
return StoreN->getValue();
242249
if (auto *StoreN = dyn_cast<MaskedScatterSDNode>(Op.getNode())) {
243250
return StoreN->getValue();
244251
}
@@ -2089,6 +2096,8 @@ SDValue VETargetLowering::lowerVPToVVP(SDValue Op, SelectionDAG &DAG,
20892096

20902097
case ISD::VP_LOAD:
20912098
case ISD::VP_STORE:
2099+
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
2100+
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
20922101
return lowerVVP_MLOAD_MSTORE(Op, DAG, VVPExpansionMode::ToNativeWidth);
20932102

20942103
case ISD::VP_GATHER:
@@ -2207,8 +2216,7 @@ SDValue VETargetLowering::lowerVVP_MLOAD_MSTORE(SDValue Op, SelectionDAG &DAG,
22072216
Mask = CDAG.createUniformConstMask(P, true);
22082217
}
22092218

2210-
uint64_t ElemBytes = LegalDataVT.getVectorElementType().getStoreSize();
2211-
auto StrideV = CDAG.getConstant(ElemBytes, MVT::i64);
2219+
auto StrideV = getLoadStoreStride(Op, CDAG);
22122220

22132221
if (IsLoad) {
22142222
// Emit.

llvm/lib/Target/VE/VVPNodes.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ ADD_VVP_OP(VVP_SCATTER, MSCATTER) HANDLE_VP_TO_VVP(VP_SCATTER, VVP_SCATTER)
117117

118118
ADD_VVP_OP(VVP_LOAD,LOAD) HANDLE_VP_TO_VVP(VP_LOAD, VVP_LOAD) REGISTER_PACKED(VVP_LOAD)
119119
ADD_VVP_OP(VVP_STORE,STORE) HANDLE_VP_TO_VVP(VP_STORE, VVP_STORE) REGISTER_PACKED(VVP_STORE)
120+
HANDLE_VP_TO_VVP(EXPERIMENTAL_VP_STRIDED_LOAD, VVP_LOAD)
121+
HANDLE_VP_TO_VVP(EXPERIMENTAL_VP_STRIDED_STORE, VVP_STORE)
120122

121123
// standard SIMD operators
122124
// int arith

0 commit comments

Comments
 (0)