Skip to content

Commit be7a107

Browse files
committed
[ARM] Teach the Arm cost model that a Shift can be folded into other instructions
This attempts to teach the cost model in Arm that code such as: %s = shl i32 %a, 3 %a = and i32 %s, %b Can under Arm or Thumb2 become: and r0, r1, r2, lsl #3 So the cost of the shift can essentially be free. To do this without trying to artificially adjust the cost of the "and" instruction, it needs to get the users of the shl and check if they are a type of instruction that the shift can be folded into. And so it needs to have access to the actual instruction in getArithmeticInstrCost, which if available is added as an extra parameter much like getCastInstrCost. We otherwise limit it to shifts with a single user, which should hopefully handle most of the cases. The list of instruction that the shift can be folded into include ADC, ADD, AND, BIC, CMP, EOR, MVN, ORR, ORN, RSB, SBC and SUB. This translates to Add, Sub, And, Or, Xor and ICmp. Differential Revision: https://reviews.llvm.org/D70966
1 parent f008b5b commit be7a107

28 files changed

+157
-102
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -901,12 +901,15 @@ class TargetTransformInfo {
901901
/// \p Args is an optional argument which holds the instruction operands
902902
/// values so the TTI can analyze those values searching for special
903903
/// cases or optimizations based on those values.
904+
/// \p CxtI is the optional original context instruction, if one exists, to
905+
/// provide even more information.
904906
int getArithmeticInstrCost(
905907
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
906908
OperandValueKind Opd2Info = OK_AnyValue,
907909
OperandValueProperties Opd1PropInfo = OP_None,
908910
OperandValueProperties Opd2PropInfo = OP_None,
909-
ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
911+
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
912+
const Instruction *CxtI = nullptr) const;
910913

911914
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
912915
/// The index and subtype parameters are used by the subvector insertion and
@@ -1309,12 +1312,11 @@ class TargetTransformInfo::Concept {
13091312
virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
13101313

13111314
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1312-
virtual unsigned
1313-
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1314-
OperandValueKind Opd2Info,
1315-
OperandValueProperties Opd1PropInfo,
1316-
OperandValueProperties Opd2PropInfo,
1317-
ArrayRef<const Value *> Args) = 0;
1315+
virtual unsigned getArithmeticInstrCost(
1316+
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1317+
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
1318+
OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
1319+
const Instruction *CxtI = nullptr) = 0;
13181320
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
13191321
Type *SubTp) = 0;
13201322
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@@ -1709,14 +1711,15 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
17091711
BlockFrequencyInfo *BFI) override {
17101712
return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
17111713
}
1712-
unsigned
1713-
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1714-
OperandValueKind Opd2Info,
1715-
OperandValueProperties Opd1PropInfo,
1716-
OperandValueProperties Opd2PropInfo,
1717-
ArrayRef<const Value *> Args) override {
1714+
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
1715+
OperandValueKind Opd1Info,
1716+
OperandValueKind Opd2Info,
1717+
OperandValueProperties Opd1PropInfo,
1718+
OperandValueProperties Opd2PropInfo,
1719+
ArrayRef<const Value *> Args,
1720+
const Instruction *CxtI = nullptr) override {
17181721
return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1719-
Opd1PropInfo, Opd2PropInfo, Args);
1722+
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
17201723
}
17211724
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
17221725
Type *SubTp) override {

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,8 @@ class TargetTransformInfoImplBase {
430430
TTI::OperandValueKind Opd2Info,
431431
TTI::OperandValueProperties Opd1PropInfo,
432432
TTI::OperandValueProperties Opd2PropInfo,
433-
ArrayRef<const Value *> Args) {
433+
ArrayRef<const Value *> Args,
434+
const Instruction *CxtI = nullptr) {
434435
return 1;
435436
}
436437

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
633633
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
634634
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
635635
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
636-
ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
636+
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
637+
const Instruction *CxtI = nullptr) {
637638
// Check if any of the operands are vector operands.
638639
const TargetLoweringBase *TLI = getTLI();
639640
int ISD = TLI->InstructionOpcodeToISD(Opcode);

llvm/include/llvm/IR/Instruction.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ class Instruction : public User,
129129
bool isUnaryOp() const { return isUnaryOp(getOpcode()); }
130130
bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
131131
bool isIntDivRem() const { return isIntDivRem(getOpcode()); }
132-
bool isShift() { return isShift(getOpcode()); }
132+
bool isShift() const { return isShift(getOpcode()); }
133133
bool isCast() const { return isCast(getOpcode()); }
134134
bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
135135
bool isExceptionalTerminator() const {

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -592,10 +592,10 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
592592
int TargetTransformInfo::getArithmeticInstrCost(
593593
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
594594
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
595-
OperandValueProperties Opd2PropInfo,
596-
ArrayRef<const Value *> Args) const {
597-
int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
598-
Opd1PropInfo, Opd2PropInfo, Args);
595+
OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
596+
const Instruction *CxtI) const {
597+
int Cost = TTIImpl->getArithmeticInstrCost(
598+
Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI);
599599
assert(Cost >= 0 && "TTI should not produce negative costs!");
600600
return Cost;
601601
}
@@ -1183,7 +1183,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
11831183
Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
11841184
SmallVector<const Value *, 2> Operands(I->operand_values());
11851185
return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1186-
Op1VP, Op2VP, Operands);
1186+
Op1VP, Op2VP, Operands, I);
11871187
}
11881188
case Instruction::FNeg: {
11891189
TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
@@ -1193,7 +1193,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
11931193
Op2VP = OP_None;
11941194
SmallVector<const Value *, 2> Operands(I->operand_values());
11951195
return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1196-
Op1VP, Op2VP, Operands);
1196+
Op1VP, Op2VP, Operands, I);
11971197
}
11981198
case Instruction::Select: {
11991199
const SelectInst *SI = cast<SelectInst>(I);

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,8 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
484484
int AArch64TTIImpl::getArithmeticInstrCost(
485485
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
486486
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
487-
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
487+
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
488+
const Instruction *CxtI) {
488489
// Legalize the type.
489490
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
490491

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
124124
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
125125
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
126126
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
127-
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
127+
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
128+
const Instruction *CxtI = nullptr);
128129

129130
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
130131

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -338,10 +338,13 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
338338
}
339339
}
340340

341-
int GCNTTIImpl::getArithmeticInstrCost(
342-
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
343-
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
344-
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
341+
int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
342+
TTI::OperandValueKind Opd1Info,
343+
TTI::OperandValueKind Opd2Info,
344+
TTI::OperandValueProperties Opd1PropInfo,
345+
TTI::OperandValueProperties Opd2PropInfo,
346+
ArrayRef<const Value *> Args,
347+
const Instruction *CxtI) {
345348
EVT OrigTy = TLI->getValueType(DL, Ty);
346349
if (!OrigTy.isSimple()) {
347350
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
@@ -801,7 +804,7 @@ unsigned GCNTTIImpl::getUserCost(const User *U,
801804
case Instruction::FNeg: {
802805
return getArithmeticInstrCost(I->getOpcode(), I->getType(),
803806
TTI::OK_AnyValue, TTI::OK_AnyValue,
804-
TTI::OP_None, TTI::OP_None, Operands);
807+
TTI::OP_None, TTI::OP_None, Operands, I);
805808
}
806809
default:
807810
break;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,12 +172,13 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
172172
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
173173

174174
int getArithmeticInstrCost(
175-
unsigned Opcode, Type *Ty,
176-
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
177-
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
178-
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
179-
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
180-
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
175+
unsigned Opcode, Type *Ty,
176+
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
177+
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
178+
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
179+
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
180+
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
181+
const Instruction *CxtI = nullptr);
181182

182183
unsigned getCFInstrCost(unsigned Opcode);
183184

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -642,11 +642,13 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
642642
return BaseCost * BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
643643
}
644644

645-
int ARMTTIImpl::getArithmeticInstrCost(
646-
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
647-
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
648-
TTI::OperandValueProperties Opd2PropInfo,
649-
ArrayRef<const Value *> Args) {
645+
int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
646+
TTI::OperandValueKind Op1Info,
647+
TTI::OperandValueKind Op2Info,
648+
TTI::OperandValueProperties Opd1PropInfo,
649+
TTI::OperandValueProperties Opd2PropInfo,
650+
ArrayRef<const Value *> Args,
651+
const Instruction *CxtI) {
650652
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
651653
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
652654

@@ -714,6 +716,33 @@ int ARMTTIImpl::getArithmeticInstrCost(
714716
return Cost;
715717
}
716718

719+
// If this operation is a shift on arm/thumb2, it might well be folded into
720+
// the following instruction, hence having a cost of 0.
721+
auto LooksLikeAFreeShift = [&]() {
722+
if (ST->isThumb1Only() || Ty->isVectorTy())
723+
return false;
724+
725+
if (!CxtI || !CxtI->hasOneUse() || !CxtI->isShift())
726+
return false;
727+
if (Op2Info != TargetTransformInfo::OK_UniformConstantValue)
728+
return false;
729+
730+
// Folded into a ADC/ADD/AND/BIC/CMP/EOR/MVN/ORR/ORN/RSB/SBC/SUB
731+
switch (cast<Instruction>(CxtI->user_back())->getOpcode()) {
732+
case Instruction::Add:
733+
case Instruction::Sub:
734+
case Instruction::And:
735+
case Instruction::Xor:
736+
case Instruction::Or:
737+
case Instruction::ICmp:
738+
return true;
739+
default:
740+
return false;
741+
}
742+
};
743+
if (LooksLikeAFreeShift())
744+
return 0;
745+
717746
int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy()
718747
? ST->getMVEVectorCostFactor()
719748
: 1;

0 commit comments

Comments
 (0)