@@ -329,10 +329,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
329
329
}
330
330
331
331
static const CostKindTblEntry GLMCostTable[] = {
332
- { ISD::FDIV, MVT::f32 , { 18 } }, // divss
333
- { ISD::FDIV, MVT::v4f32, { 35 } }, // divps
334
- { ISD::FDIV, MVT::f64 , { 33 } }, // divsd
335
- { ISD::FDIV, MVT::v2f64, { 65 } }, // divpd
332
+ { ISD::FDIV, MVT::f32 , { 18 , 19 , 1 , 1 } }, // divss
333
+ { ISD::FDIV, MVT::v4f32, { 35 , 36 , 1 , 1 } }, // divps
334
+ { ISD::FDIV, MVT::f64 , { 33 , 34 , 1 , 1 } }, // divsd
335
+ { ISD::FDIV, MVT::v2f64, { 65 , 66 , 1 , 1 } }, // divpd
336
336
};
337
337
338
338
if (ST->useGLMDivSqrtCosts ())
@@ -347,10 +347,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
347
347
{ ISD::FMUL, MVT::f32 , { 1 , 4 , 1 , 1 } }, // mulss
348
348
{ ISD::FMUL, MVT::v2f64, { 4 , 7 , 1 , 1 } }, // mulpd
349
349
{ ISD::FMUL, MVT::v4f32, { 2 , 5 , 1 , 1 } }, // mulps
350
- { ISD::FDIV, MVT::f32 , { 17 } }, // divss
351
- { ISD::FDIV, MVT::v4f32, { 39 } }, // divps
352
- { ISD::FDIV, MVT::f64 , { 32 } }, // divsd
353
- { ISD::FDIV, MVT::v2f64, { 69 } }, // divpd
350
+ { ISD::FDIV, MVT::f32 , { 17 , 19 , 1 , 1 } }, // divss
351
+ { ISD::FDIV, MVT::v4f32, { 39 , 39 , 1 , 6 } }, // divps
352
+ { ISD::FDIV, MVT::f64 , { 32 , 34 , 1 , 1 } }, // divsd
353
+ { ISD::FDIV, MVT::v2f64, { 69 , 69 , 1 , 6 } }, // divpd
354
354
{ ISD::FADD, MVT::v2f64, { 2 , 4 , 1 , 1 } }, // addpd
355
355
{ ISD::FSUB, MVT::v2f64, { 2 , 4 , 1 , 1 } }, // subpd
356
356
// v2i64/v4i64 mul is custom lowered as a series of long:
@@ -717,10 +717,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
717
717
{ ISD::FMUL, MVT::v2f64, { 1 , 4 , 1 , 1 } }, // Skylake from http://www.agner.org/
718
718
{ ISD::FMUL, MVT::f64 , { 1 , 4 , 1 , 1 } }, // Skylake from http://www.agner.org/
719
719
720
- { ISD::FDIV, MVT::f64 , { 4 } }, // Skylake from http://www.agner.org/
721
- { ISD::FDIV, MVT::v2f64, { 4 } }, // Skylake from http://www.agner.org/
722
- { ISD::FDIV, MVT::v4f64, { 8 } }, // Skylake from http://www.agner.org/
723
- { ISD::FDIV, MVT::v8f64, { 16 } }, // Skylake from http://www.agner.org/
720
+ { ISD::FDIV, MVT::f64 , { 4 , 14 , 1 , 1 } }, // Skylake from http://www.agner.org/
721
+ { ISD::FDIV, MVT::v2f64, { 4 , 14 , 1 , 1 } }, // Skylake from http://www.agner.org/
722
+ { ISD::FDIV, MVT::v4f64, { 8 , 14 , 1 , 1 } }, // Skylake from http://www.agner.org/
723
+ { ISD::FDIV, MVT::v8f64, { 16 , 23 , 1 , 3 } }, // Skylake from http://www.agner.org/
724
724
725
725
{ ISD::FNEG, MVT::v16f32, { 1 , 1 , 1 , 2 } }, // Skylake from http://www.agner.org/
726
726
{ ISD::FADD, MVT::v16f32, { 1 , 4 , 1 , 1 } }, // Skylake from http://www.agner.org/
@@ -732,10 +732,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
732
732
{ ISD::FMUL, MVT::v4f32, { 1 , 4 , 1 , 1 } }, // Skylake from http://www.agner.org/
733
733
{ ISD::FMUL, MVT::f32 , { 1 , 4 , 1 , 1 } }, // Skylake from http://www.agner.org/
734
734
735
- { ISD::FDIV, MVT::f32 , { 3 } }, // Skylake from http://www.agner.org/
736
- { ISD::FDIV, MVT::v4f32, { 3 } }, // Skylake from http://www.agner.org/
737
- { ISD::FDIV, MVT::v8f32, { 5 } }, // Skylake from http://www.agner.org/
738
- { ISD::FDIV, MVT::v16f32, { 10 } }, // Skylake from http://www.agner.org/
735
+ { ISD::FDIV, MVT::f32 , { 3 , 11 , 1 , 1 } }, // Skylake from http://www.agner.org/
736
+ { ISD::FDIV, MVT::v4f32, { 3 , 11 , 1 , 1 } }, // Skylake from http://www.agner.org/
737
+ { ISD::FDIV, MVT::v8f32, { 5 , 11 , 1 , 1 } }, // Skylake from http://www.agner.org/
738
+ { ISD::FDIV, MVT::v16f32, { 10 , 18 , 1 , 3 } }, // Skylake from http://www.agner.org/
739
739
};
740
740
741
741
if (ST->hasAVX512 ())
@@ -924,12 +924,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
924
924
{ ISD::FMUL, MVT::v4f64, { 1 , 5 , 1 , 2 } }, // vmulpd
925
925
{ ISD::FMUL, MVT::v8f32, { 1 , 5 , 1 , 2 } }, // vmulps
926
926
927
- { ISD::FDIV, MVT::f32 , { 7 } }, // Haswell from http://www.agner.org/
928
- { ISD::FDIV, MVT::v4f32, { 7 } }, // Haswell from http://www.agner.org/
929
- { ISD::FDIV, MVT::v8f32, { 14 } }, // Haswell from http://www.agner.org/
930
- { ISD::FDIV, MVT::f64 , { 14 } }, // Haswell from http://www.agner.org/
931
- { ISD::FDIV, MVT::v2f64, { 14 } }, // Haswell from http://www.agner.org/
932
- { ISD::FDIV, MVT::v4f64, { 28 } }, // Haswell from http://www.agner.org/
927
+ { ISD::FDIV, MVT::f32 , { 7 , 13 , 1 , 1 } }, // vdivss
928
+ { ISD::FDIV, MVT::v4f32, { 7 , 13 , 1 , 1 } }, // vdivps
929
+ { ISD::FDIV, MVT::v8f32, { 14 , 21 , 1 , 3 } }, // vdivps
930
+ { ISD::FDIV, MVT::f64 , { 14 , 20 , 1 , 1 } }, // vdivsd
931
+ { ISD::FDIV, MVT::v2f64, { 14 , 20 , 1 , 1 } }, // vdivpd
932
+ { ISD::FDIV, MVT::v4f64, { 28 , 35 , 1 , 3 } }, // vdivpd
933
933
};
934
934
935
935
// Look for AVX2 lowering tricks for custom cases.
@@ -1016,12 +1016,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
1016
1016
{ ISD::FMUL, MVT::v4f64, { 4 , 5 , 1 , 2 } }, // BTVER2 from http://www.agner.org/
1017
1017
{ ISD::FMUL, MVT::v8f32, { 2 , 5 , 1 , 2 } }, // BTVER2 from http://www.agner.org/
1018
1018
1019
- { ISD::FDIV, MVT::f32 , { 14 } }, // SNB from http://www.agner.org/
1020
- { ISD::FDIV, MVT::v4f32, { 14 } }, // SNB from http://www.agner.org/
1021
- { ISD::FDIV, MVT::v8f32, { 28 } }, // SNB from http://www.agner.org/
1022
- { ISD::FDIV, MVT::f64 , { 22 } }, // SNB from http://www.agner.org/
1023
- { ISD::FDIV, MVT::v2f64, { 22 } }, // SNB from http://www.agner.org/
1024
- { ISD::FDIV, MVT::v4f64, { 44 } }, // SNB from http://www.agner.org/
1019
+ { ISD::FDIV, MVT::f32 , { 14 , 14 , 1 , 1 } }, // SNB from http://www.agner.org/
1020
+ { ISD::FDIV, MVT::v4f32, { 14 , 14 , 1 , 1 } }, // SNB from http://www.agner.org/
1021
+ { ISD::FDIV, MVT::v8f32, { 28 , 29 , 1 , 3 } }, // SNB from http://www.agner.org/
1022
+ { ISD::FDIV, MVT::f64 , { 22 , 22 , 1 , 1 } }, // SNB from http://www.agner.org/
1023
+ { ISD::FDIV, MVT::v2f64, { 22 , 22 , 1 , 1 } }, // SNB from http://www.agner.org/
1024
+ { ISD::FDIV, MVT::v4f64, { 44 , 45 , 1 , 3 } }, // SNB from http://www.agner.org/
1025
1025
};
1026
1026
1027
1027
if (ST->hasAVX ())
@@ -1045,10 +1045,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
1045
1045
{ ISD::FMUL, MVT::v2f64, { 1 , 5 , 1 , 1 } }, // Nehalem from http://www.agner.org/
1046
1046
{ ISD::FMUL, MVT::v4f32, { 1 , 5 , 1 , 1 } }, // Nehalem from http://www.agner.org/
1047
1047
1048
- { ISD::FDIV, MVT::f32 , { 14 } }, // Nehalem from http://www.agner.org/
1049
- { ISD::FDIV, MVT::v4f32, { 14 } }, // Nehalem from http://www.agner.org/
1050
- { ISD::FDIV, MVT::f64 , { 22 } }, // Nehalem from http://www.agner.org/
1051
- { ISD::FDIV, MVT::v2f64, { 22 } }, // Nehalem from http://www.agner.org/
1048
+ { ISD::FDIV, MVT::f32 , { 14 , 14 , 1 , 1 } }, // Nehalem from http://www.agner.org/
1049
+ { ISD::FDIV, MVT::v4f32, { 14 , 14 , 1 , 1 } }, // Nehalem from http://www.agner.org/
1050
+ { ISD::FDIV, MVT::f64 , { 22 , 22 , 1 , 1 } }, // Nehalem from http://www.agner.org/
1051
+ { ISD::FDIV, MVT::v2f64, { 22 , 22 , 1 , 1 } }, // Nehalem from http://www.agner.org/
1052
1052
1053
1053
{ ISD::MUL, MVT::v2i64, { 6 } } // 3*pmuludq/3*shift/2*add
1054
1054
};
@@ -1116,10 +1116,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
1116
1116
{ ISD::MUL, MVT::v4i32, { 6 } }, // 3*pmuludq/4*shuffle
1117
1117
{ ISD::MUL, MVT::v2i64, { 8 } }, // 3*pmuludq/3*shift/2*add
1118
1118
1119
- { ISD::FDIV, MVT::f32 , { 23 } }, // Pentium IV from http://www.agner.org/
1120
- { ISD::FDIV, MVT::v4f32, { 39 } }, // Pentium IV from http://www.agner.org/
1121
- { ISD::FDIV, MVT::f64 , { 38 } }, // Pentium IV from http://www.agner.org/
1122
- { ISD::FDIV, MVT::v2f64, { 69 } }, // Pentium IV from http://www.agner.org/
1119
+ { ISD::FDIV, MVT::f32 , { 23 , 23 , 1 , 1 } }, // Pentium IV from http://www.agner.org/
1120
+ { ISD::FDIV, MVT::v4f32, { 39 , 39 , 1 , 1 } }, // Pentium IV from http://www.agner.org/
1121
+ { ISD::FDIV, MVT::f64 , { 38 , 38 , 1 , 1 } }, // Pentium IV from http://www.agner.org/
1122
+ { ISD::FDIV, MVT::v2f64, { 69 , 69 , 1 , 1 } }, // Pentium IV from http://www.agner.org/
1123
1123
1124
1124
{ ISD::FNEG, MVT::f32 , { 1 , 1 , 1 , 1 } }, // Pentium IV from http://www.agner.org/
1125
1125
{ ISD::FNEG, MVT::f64 , { 1 , 1 , 1 , 1 } }, // Pentium IV from http://www.agner.org/
@@ -1144,8 +1144,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
1144
1144
return LT.first * KindCost.value ();
1145
1145
1146
1146
static const CostKindTblEntry SSE1CostTable[] = {
1147
- { ISD::FDIV, MVT::f32 , { 17 } }, // Pentium III from http://www.agner.org/
1148
- { ISD::FDIV, MVT::v4f32, { 34 } }, // Pentium III from http://www.agner.org/
1147
+ { ISD::FDIV, MVT::f32 , { 17 , 18 , 1 , 1 } }, // Pentium III from http://www.agner.org/
1148
+ { ISD::FDIV, MVT::v4f32, { 34 , 48 , 1 , 1 } }, // Pentium III from http://www.agner.org/
1149
1149
1150
1150
{ ISD::FNEG, MVT::f32 , { 2 , 2 , 1 , 2 } }, // Pentium III from http://www.agner.org/
1151
1151
{ ISD::FNEG, MVT::v4f32, { 2 , 2 , 1 , 2 } }, // Pentium III from http://www.agner.org/
@@ -1189,7 +1189,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
1189
1189
{ ISD::FADD, MVT::f64 , { 2 , 3 , 1 , 1 } }, // (x87)
1190
1190
{ ISD::FSUB, MVT::f64 , { 2 , 3 , 1 , 1 } }, // (x87)
1191
1191
{ ISD::FMUL, MVT::f64 , { 2 , 5 , 1 , 1 } }, // (x87)
1192
- { ISD::FDIV, MVT::f64 , { 38 } }, // (x87)
1192
+ { ISD::FDIV, MVT::f64 , { 38 , 38 , 1 , 1 } }, // (x87)
1193
1193
};
1194
1194
1195
1195
if (const auto *Entry = CostTableLookup (X86CostTbl, ISD, LT.second ))
@@ -5649,6 +5649,15 @@ bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
5649
5649
return TLI->isOperationLegal (IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT);
5650
5650
}
5651
5651
5652
+ bool X86TTIImpl::isExpensiveToSpeculativelyExecute (const Instruction* I) {
5653
+ // FDIV is always expensive, even if it has a very low uop count.
5654
+ // TODO: Still necessary for recent CPUs with low latency/throughput fdiv?
5655
+ if (I->getOpcode () == Instruction::FDiv)
5656
+ return true ;
5657
+
5658
+ return BaseT::isExpensiveToSpeculativelyExecute (I);
5659
+ }
5660
+
5652
5661
bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero (Type *Ty) {
5653
5662
return false ;
5654
5663
}
0 commit comments