Skip to content

Commit 7931a8f

Browse files
authored
[VectorCombine] Scalarize vector intrinsics with scalar arguments (#146530)
Some intrinsics like llvm.abs or llvm.powi have a scalar argument even when the overloaded type is a vector. This patch handles these in scalarizeOpOrCmp to allow scalarizing them. In the test the leftover vector powi isn't folded away to poison, this should be fixed in a separate patch.
1 parent 05ebb36 commit 7931a8f

File tree

2 files changed

+34
-31
lines changed

2 files changed

+34
-31
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
#include "llvm/ADT/Statistic.h"
2020
#include "llvm/Analysis/AssumptionCache.h"
2121
#include "llvm/Analysis/BasicAliasAnalysis.h"
22-
#include "llvm/Analysis/ConstantFolding.h"
2322
#include "llvm/Analysis/GlobalsModRef.h"
2423
#include "llvm/Analysis/InstSimplifyFolder.h"
2524
#include "llvm/Analysis/Loads.h"
25+
#include "llvm/Analysis/TargetFolder.h"
2626
#include "llvm/Analysis/TargetTransformInfo.h"
2727
#include "llvm/Analysis/ValueTracking.h"
2828
#include "llvm/Analysis/VectorUtils.h"
@@ -1093,12 +1093,14 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
10931093
return false;
10941094

10951095
// TODO: Allow intrinsics with different argument types
1096-
// TODO: Allow intrinsics with scalar arguments
1097-
if (II && (!isTriviallyVectorizable(II->getIntrinsicID()) ||
1098-
!all_of(II->args(), [&II](Value *Arg) {
1099-
return Arg->getType() == II->getType();
1100-
})))
1101-
return false;
1096+
if (II) {
1097+
if (!isTriviallyVectorizable(II->getIntrinsicID()))
1098+
return false;
1099+
for (auto [Idx, Arg] : enumerate(II->args()))
1100+
if (Arg->getType() != II->getType() &&
1101+
!isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Idx, &TTI))
1102+
return false;
1103+
}
11021104

11031105
// Do not convert the vector condition of a vector select into a scalar
11041106
// condition. That may cause problems for codegen because of differences in
@@ -1111,19 +1113,18 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
11111113

11121114
// Match constant vectors or scalars being inserted into constant vectors:
11131115
// vec_op [VecC0 | (inselt VecC0, V0, Index)], ...
1114-
SmallVector<Constant *> VecCs;
1115-
SmallVector<Value *> ScalarOps;
1116+
SmallVector<Value *> VecCs, ScalarOps;
11161117
std::optional<uint64_t> Index;
11171118

11181119
auto Ops = II ? II->args() : I.operands();
1119-
for (Value *Op : Ops) {
1120+
for (auto [OpNum, Op] : enumerate(Ops)) {
11201121
Constant *VecC;
11211122
Value *V;
11221123
uint64_t InsIdx = 0;
1123-
VectorType *OpTy = cast<VectorType>(Op->getType());
1124-
if (match(Op, m_InsertElt(m_Constant(VecC), m_Value(V),
1125-
m_ConstantInt(InsIdx)))) {
1124+
if (match(Op.get(), m_InsertElt(m_Constant(VecC), m_Value(V),
1125+
m_ConstantInt(InsIdx)))) {
11261126
// Bail if any inserts are out of bounds.
1127+
VectorType *OpTy = cast<VectorType>(Op->getType());
11271128
if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
11281129
return false;
11291130
// All inserts must have the same index.
@@ -1134,7 +1135,11 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
11341135
return false;
11351136
VecCs.push_back(VecC);
11361137
ScalarOps.push_back(V);
1137-
} else if (match(Op, m_Constant(VecC))) {
1138+
} else if (II && isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1139+
OpNum, &TTI)) {
1140+
VecCs.push_back(Op.get());
1141+
ScalarOps.push_back(Op.get());
1142+
} else if (match(Op.get(), m_Constant(VecC))) {
11381143
VecCs.push_back(VecC);
11391144
ScalarOps.push_back(nullptr);
11401145
} else {
@@ -1178,25 +1183,27 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
11781183
// Fold the vector constants in the original vectors into a new base vector to
11791184
// get more accurate cost modelling.
11801185
Value *NewVecC = nullptr;
1186+
TargetFolder Folder(*DL);
11811187
if (CI)
1182-
NewVecC = ConstantFoldCompareInstOperands(CI->getPredicate(), VecCs[0],
1183-
VecCs[1], *DL);
1188+
NewVecC = Folder.FoldCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
11841189
else if (UO)
1185-
NewVecC = ConstantFoldUnaryOpOperand(Opcode, VecCs[0], *DL);
1190+
NewVecC =
1191+
Folder.FoldUnOpFMF(UO->getOpcode(), VecCs[0], UO->getFastMathFlags());
11861192
else if (BO)
1187-
NewVecC = ConstantFoldBinaryOpOperands(Opcode, VecCs[0], VecCs[1], *DL);
1193+
NewVecC = Folder.FoldBinOp(BO->getOpcode(), VecCs[0], VecCs[1]);
11881194
else if (II->arg_size() == 2)
1189-
NewVecC = ConstantFoldBinaryIntrinsic(II->getIntrinsicID(), VecCs[0],
1190-
VecCs[1], II->getType(), II);
1195+
NewVecC = Folder.FoldBinaryIntrinsic(II->getIntrinsicID(), VecCs[0],
1196+
VecCs[1], II->getType(), &I);
11911197

11921198
// Get cost estimate for the insert element. This cost will factor into
11931199
// both sequences.
11941200
InstructionCost OldCost = VectorOpCost;
11951201
InstructionCost NewCost =
11961202
ScalarOpCost + TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
11971203
CostKind, *Index, NewVecC);
1198-
for (auto [Op, VecC, Scalar] : zip(Ops, VecCs, ScalarOps)) {
1199-
if (!Scalar)
1204+
for (auto [Idx, Op, VecC, Scalar] : enumerate(Ops, VecCs, ScalarOps)) {
1205+
if (!Scalar || (II && isVectorIntrinsicWithScalarOpAtArg(
1206+
II->getIntrinsicID(), Idx, &TTI)))
12001207
continue;
12011208
InstructionCost InsertCost = TTI.getVectorInstrCost(
12021209
Instruction::InsertElement, VecTy, CostKind, *Index, VecC, Scalar);
@@ -1240,16 +1247,12 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {
12401247

12411248
// Create a new base vector if the constant folding failed.
12421249
if (!NewVecC) {
1243-
SmallVector<Value *> VecCValues;
1244-
VecCValues.reserve(VecCs.size());
1245-
append_range(VecCValues, VecCs);
12461250
if (CI)
12471251
NewVecC = Builder.CreateCmp(CI->getPredicate(), VecCs[0], VecCs[1]);
12481252
else if (UO || BO)
1249-
NewVecC = Builder.CreateNAryOp(Opcode, VecCValues);
1253+
NewVecC = Builder.CreateNAryOp(Opcode, VecCs);
12501254
else
1251-
NewVecC =
1252-
Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), VecCValues);
1255+
NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), VecCs);
12531256
}
12541257
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, *Index);
12551258
replaceValue(I, *Insert);

llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,12 @@ define <vscale x 4 x float> @fma_scalable(float %x, float %y, float %z) {
152152
ret <vscale x 4 x float> %v
153153
}
154154

155-
; TODO: We should be able to scalarize this if we preserve the scalar argument.
156155
define <4 x float> @scalar_argument(float %x) {
157156
; CHECK-LABEL: define <4 x float> @scalar_argument(
158157
; CHECK-SAME: float [[X:%.*]]) {
159-
; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i32 0
160-
; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[X_INSERT]], i32 42)
158+
; CHECK-NEXT: [[V_SCALAR:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 42)
159+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 42)
160+
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x float> [[TMP1]], float [[V_SCALAR]], i64 0
161161
; CHECK-NEXT: ret <4 x float> [[V]]
162162
;
163163
%x.insert = insertelement <4 x float> poison, float %x, i32 0

0 commit comments

Comments
 (0)