19
19
#include " llvm/ADT/Statistic.h"
20
20
#include " llvm/Analysis/AssumptionCache.h"
21
21
#include " llvm/Analysis/BasicAliasAnalysis.h"
22
- #include " llvm/Analysis/ConstantFolding.h"
23
22
#include " llvm/Analysis/GlobalsModRef.h"
24
23
#include " llvm/Analysis/InstSimplifyFolder.h"
25
24
#include " llvm/Analysis/Loads.h"
25
+ #include " llvm/Analysis/TargetFolder.h"
26
26
#include " llvm/Analysis/TargetTransformInfo.h"
27
27
#include " llvm/Analysis/ValueTracking.h"
28
28
#include " llvm/Analysis/VectorUtils.h"
@@ -1093,12 +1093,14 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
1093
1093
return false ;
1094
1094
1095
1095
// TODO: Allow intrinsics with different argument types
1096
- // TODO: Allow intrinsics with scalar arguments
1097
- if (II && (!isTriviallyVectorizable (II->getIntrinsicID ()) ||
1098
- !all_of (II->args (), [&II](Value *Arg) {
1099
- return Arg->getType () == II->getType ();
1100
- })))
1101
- return false ;
1096
+ if (II) {
1097
+ if (!isTriviallyVectorizable (II->getIntrinsicID ()))
1098
+ return false ;
1099
+ for (auto [Idx, Arg] : enumerate(II->args ()))
1100
+ if (Arg->getType () != II->getType () &&
1101
+ !isVectorIntrinsicWithScalarOpAtArg (II->getIntrinsicID (), Idx, &TTI))
1102
+ return false ;
1103
+ }
1102
1104
1103
1105
// Do not convert the vector condition of a vector select into a scalar
1104
1106
// condition. That may cause problems for codegen because of differences in
@@ -1111,19 +1113,18 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
1111
1113
1112
1114
// Match constant vectors or scalars being inserted into constant vectors:
1113
1115
// vec_op [VecC0 | (inselt VecC0, V0, Index)], ...
1114
- SmallVector<Constant *> VecCs;
1115
- SmallVector<Value *> ScalarOps;
1116
+ SmallVector<Value *> VecCs, ScalarOps;
1116
1117
std::optional<uint64_t > Index;
1117
1118
1118
1119
auto Ops = II ? II->args () : I.operands ();
1119
- for (Value *Op : Ops) {
1120
+ for (auto [OpNum, Op] : enumerate( Ops) ) {
1120
1121
Constant *VecC;
1121
1122
Value *V;
1122
1123
uint64_t InsIdx = 0 ;
1123
- VectorType *OpTy = cast<VectorType>(Op->getType ());
1124
- if (match (Op, m_InsertElt (m_Constant (VecC), m_Value (V),
1125
- m_ConstantInt (InsIdx)))) {
1124
+ if (match (Op.get (), m_InsertElt (m_Constant (VecC), m_Value (V),
1125
+ m_ConstantInt (InsIdx)))) {
1126
1126
// Bail if any inserts are out of bounds.
1127
+ VectorType *OpTy = cast<VectorType>(Op->getType ());
1127
1128
if (OpTy->getElementCount ().getKnownMinValue () <= InsIdx)
1128
1129
return false ;
1129
1130
// All inserts must have the same index.
@@ -1134,7 +1135,11 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
1134
1135
return false ;
1135
1136
VecCs.push_back (VecC);
1136
1137
ScalarOps.push_back (V);
1137
- } else if (match (Op, m_Constant (VecC))) {
1138
+ } else if (II && isVectorIntrinsicWithScalarOpAtArg (II->getIntrinsicID (),
1139
+ OpNum, &TTI)) {
1140
+ VecCs.push_back (Op.get ());
1141
+ ScalarOps.push_back (Op.get ());
1142
+ } else if (match (Op.get (), m_Constant (VecC))) {
1138
1143
VecCs.push_back (VecC);
1139
1144
ScalarOps.push_back (nullptr );
1140
1145
} else {
@@ -1178,25 +1183,27 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
1178
1183
// Fold the vector constants in the original vectors into a new base vector to
1179
1184
// get more accurate cost modelling.
1180
1185
Value *NewVecC = nullptr ;
1186
+ TargetFolder Folder (*DL);
1181
1187
if (CI)
1182
- NewVecC = ConstantFoldCompareInstOperands (CI->getPredicate (), VecCs[0 ],
1183
- VecCs[1 ], *DL);
1188
+ NewVecC = Folder.FoldCmp (CI->getPredicate (), VecCs[0 ], VecCs[1 ]);
1184
1189
else if (UO)
1185
- NewVecC = ConstantFoldUnaryOpOperand (Opcode, VecCs[0 ], *DL);
1190
+ NewVecC =
1191
+ Folder.FoldUnOpFMF (UO->getOpcode (), VecCs[0 ], UO->getFastMathFlags ());
1186
1192
else if (BO)
1187
- NewVecC = ConstantFoldBinaryOpOperands (Opcode , VecCs[0 ], VecCs[1 ], *DL );
1193
+ NewVecC = Folder. FoldBinOp (BO-> getOpcode () , VecCs[0 ], VecCs[1 ]);
1188
1194
else if (II->arg_size () == 2 )
1189
- NewVecC = ConstantFoldBinaryIntrinsic (II->getIntrinsicID (), VecCs[0 ],
1190
- VecCs[1 ], II->getType (), II );
1195
+ NewVecC = Folder. FoldBinaryIntrinsic (II->getIntrinsicID (), VecCs[0 ],
1196
+ VecCs[1 ], II->getType (), &I );
1191
1197
1192
1198
// Get cost estimate for the insert element. This cost will factor into
1193
1199
// both sequences.
1194
1200
InstructionCost OldCost = VectorOpCost;
1195
1201
InstructionCost NewCost =
1196
1202
ScalarOpCost + TTI.getVectorInstrCost (Instruction::InsertElement, VecTy,
1197
1203
CostKind, *Index, NewVecC);
1198
- for (auto [Op, VecC, Scalar] : zip (Ops, VecCs, ScalarOps)) {
1199
- if (!Scalar)
1204
+ for (auto [Idx, Op, VecC, Scalar] : enumerate(Ops, VecCs, ScalarOps)) {
1205
+ if (!Scalar || (II && isVectorIntrinsicWithScalarOpAtArg (
1206
+ II->getIntrinsicID (), Idx, &TTI)))
1200
1207
continue ;
1201
1208
InstructionCost InsertCost = TTI.getVectorInstrCost (
1202
1209
Instruction::InsertElement, VecTy, CostKind, *Index, VecC, Scalar);
@@ -1240,16 +1247,12 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
1240
1247
1241
1248
// Create a new base vector if the constant folding failed.
1242
1249
if (!NewVecC) {
1243
- SmallVector<Value *> VecCValues;
1244
- VecCValues.reserve (VecCs.size ());
1245
- append_range (VecCValues, VecCs);
1246
1250
if (CI)
1247
1251
NewVecC = Builder.CreateCmp (CI->getPredicate (), VecCs[0 ], VecCs[1 ]);
1248
1252
else if (UO || BO)
1249
- NewVecC = Builder.CreateNAryOp (Opcode, VecCValues );
1253
+ NewVecC = Builder.CreateNAryOp (Opcode, VecCs );
1250
1254
else
1251
- NewVecC =
1252
- Builder.CreateIntrinsic (VecTy, II->getIntrinsicID (), VecCValues);
1255
+ NewVecC = Builder.CreateIntrinsic (VecTy, II->getIntrinsicID (), VecCs);
1253
1256
}
1254
1257
Value *Insert = Builder.CreateInsertElement (NewVecC, Scalar, *Index);
1255
1258
replaceValue (I, *Insert);
0 commit comments