Skip to content

Commit 5aee272

Browse files
committed
[CostModel][X86] Add CostKinds handling for fdiv ops
This was achieved with an updated version of the 'cost-tables vs llvm-mca' script D103695 As we're using 'typical' worst case values, not all cost entries come from a single CPU - e.g. the latency/throughput from haswell but the size-latency(uops) from zen1/alderlake-e due to 'double pumping' As the uop count (used for TCK_SizeAndLatency) for divss/divps is typically so low, we need to override isExpensiveToSpeculativelyExecute to ensure we keep fdiv calls behind branches - although for some very recent cpu targets it might not be necessary any more and could be relaxed.
1 parent 22e1f66 commit 5aee272

File tree

4 files changed

+212
-59
lines changed

4 files changed

+212
-59
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 48 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -329,10 +329,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
329329
}
330330

331331
static const CostKindTblEntry GLMCostTable[] = {
332-
{ ISD::FDIV, MVT::f32, { 18 } }, // divss
333-
{ ISD::FDIV, MVT::v4f32, { 35 } }, // divps
334-
{ ISD::FDIV, MVT::f64, { 33 } }, // divsd
335-
{ ISD::FDIV, MVT::v2f64, { 65 } }, // divpd
332+
{ ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } }, // divss
333+
{ ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } }, // divps
334+
{ ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } }, // divsd
335+
{ ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } }, // divpd
336336
};
337337

338338
if (ST->useGLMDivSqrtCosts())
@@ -347,10 +347,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
347347
{ ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } }, // mulss
348348
{ ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } }, // mulpd
349349
{ ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } }, // mulps
350-
{ ISD::FDIV, MVT::f32, { 17 } }, // divss
351-
{ ISD::FDIV, MVT::v4f32, { 39 } }, // divps
352-
{ ISD::FDIV, MVT::f64, { 32 } }, // divsd
353-
{ ISD::FDIV, MVT::v2f64, { 69 } }, // divpd
350+
{ ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } }, // divss
351+
{ ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } }, // divps
352+
{ ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } }, // divsd
353+
{ ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } }, // divpd
354354
{ ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } }, // addpd
355355
{ ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } }, // subpd
356356
// v2i64/v4i64 mul is custom lowered as a series of long:
@@ -717,10 +717,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
717717
{ ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
718718
{ ISD::FMUL, MVT::f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
719719

720-
{ ISD::FDIV, MVT::f64, { 4 } }, // Skylake from http://www.agner.org/
721-
{ ISD::FDIV, MVT::v2f64, { 4 } }, // Skylake from http://www.agner.org/
722-
{ ISD::FDIV, MVT::v4f64, { 8 } }, // Skylake from http://www.agner.org/
723-
{ ISD::FDIV, MVT::v8f64, { 16 } }, // Skylake from http://www.agner.org/
720+
{ ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } }, // Skylake from http://www.agner.org/
721+
{ ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } }, // Skylake from http://www.agner.org/
722+
{ ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } }, // Skylake from http://www.agner.org/
723+
{ ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } }, // Skylake from http://www.agner.org/
724724

725725
{ ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } }, // Skylake from http://www.agner.org/
726726
{ ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
@@ -732,10 +732,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
732732
{ ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
733733
{ ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
734734

735-
{ ISD::FDIV, MVT::f32, { 3 } }, // Skylake from http://www.agner.org/
736-
{ ISD::FDIV, MVT::v4f32, { 3 } }, // Skylake from http://www.agner.org/
737-
{ ISD::FDIV, MVT::v8f32, { 5 } }, // Skylake from http://www.agner.org/
738-
{ ISD::FDIV, MVT::v16f32, { 10 } }, // Skylake from http://www.agner.org/
735+
{ ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } }, // Skylake from http://www.agner.org/
736+
{ ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } }, // Skylake from http://www.agner.org/
737+
{ ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } }, // Skylake from http://www.agner.org/
738+
{ ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } }, // Skylake from http://www.agner.org/
739739
};
740740

741741
if (ST->hasAVX512())
@@ -924,12 +924,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
924924
{ ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } }, // vmulpd
925925
{ ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } }, // vmulps
926926

927-
{ ISD::FDIV, MVT::f32, { 7 } }, // Haswell from http://www.agner.org/
928-
{ ISD::FDIV, MVT::v4f32, { 7 } }, // Haswell from http://www.agner.org/
929-
{ ISD::FDIV, MVT::v8f32, { 14 } }, // Haswell from http://www.agner.org/
930-
{ ISD::FDIV, MVT::f64, { 14 } }, // Haswell from http://www.agner.org/
931-
{ ISD::FDIV, MVT::v2f64, { 14 } }, // Haswell from http://www.agner.org/
932-
{ ISD::FDIV, MVT::v4f64, { 28 } }, // Haswell from http://www.agner.org/
927+
{ ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } }, // vdivss
928+
{ ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } }, // vdivps
929+
{ ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } }, // vdivps
930+
{ ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } }, // vdivsd
931+
{ ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } }, // vdivpd
932+
{ ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } }, // vdivpd
933933
};
934934

935935
// Look for AVX2 lowering tricks for custom cases.
@@ -1016,12 +1016,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
10161016
{ ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } }, // BTVER2 from http://www.agner.org/
10171017
{ ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } }, // BTVER2 from http://www.agner.org/
10181018

1019-
{ ISD::FDIV, MVT::f32, { 14 } }, // SNB from http://www.agner.org/
1020-
{ ISD::FDIV, MVT::v4f32, { 14 } }, // SNB from http://www.agner.org/
1021-
{ ISD::FDIV, MVT::v8f32, { 28 } }, // SNB from http://www.agner.org/
1022-
{ ISD::FDIV, MVT::f64, { 22 } }, // SNB from http://www.agner.org/
1023-
{ ISD::FDIV, MVT::v2f64, { 22 } }, // SNB from http://www.agner.org/
1024-
{ ISD::FDIV, MVT::v4f64, { 44 } }, // SNB from http://www.agner.org/
1019+
{ ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
1020+
{ ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
1021+
{ ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } }, // SNB from http://www.agner.org/
1022+
{ ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
1023+
{ ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
1024+
{ ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } }, // SNB from http://www.agner.org/
10251025
};
10261026

10271027
if (ST->hasAVX())
@@ -1045,10 +1045,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
10451045
{ ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } }, // Nehalem from http://www.agner.org/
10461046
{ ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } }, // Nehalem from http://www.agner.org/
10471047

1048-
{ ISD::FDIV, MVT::f32, { 14 } }, // Nehalem from http://www.agner.org/
1049-
{ ISD::FDIV, MVT::v4f32, { 14 } }, // Nehalem from http://www.agner.org/
1050-
{ ISD::FDIV, MVT::f64, { 22 } }, // Nehalem from http://www.agner.org/
1051-
{ ISD::FDIV, MVT::v2f64, { 22 } }, // Nehalem from http://www.agner.org/
1048+
{ ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } }, // Nehalem from http://www.agner.org/
1049+
{ ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } }, // Nehalem from http://www.agner.org/
1050+
{ ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } }, // Nehalem from http://www.agner.org/
1051+
{ ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } }, // Nehalem from http://www.agner.org/
10521052

10531053
{ ISD::MUL, MVT::v2i64, { 6 } } // 3*pmuludq/3*shift/2*add
10541054
};
@@ -1116,10 +1116,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
11161116
{ ISD::MUL, MVT::v4i32, { 6 } }, // 3*pmuludq/4*shuffle
11171117
{ ISD::MUL, MVT::v2i64, { 8 } }, // 3*pmuludq/3*shift/2*add
11181118

1119-
{ ISD::FDIV, MVT::f32, { 23 } }, // Pentium IV from http://www.agner.org/
1120-
{ ISD::FDIV, MVT::v4f32, { 39 } }, // Pentium IV from http://www.agner.org/
1121-
{ ISD::FDIV, MVT::f64, { 38 } }, // Pentium IV from http://www.agner.org/
1122-
{ ISD::FDIV, MVT::v2f64, { 69 } }, // Pentium IV from http://www.agner.org/
1119+
{ ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } }, // Pentium IV from http://www.agner.org/
1120+
{ ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } }, // Pentium IV from http://www.agner.org/
1121+
{ ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } }, // Pentium IV from http://www.agner.org/
1122+
{ ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } }, // Pentium IV from http://www.agner.org/
11231123

11241124
{ ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } }, // Pentium IV from http://www.agner.org/
11251125
{ ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } }, // Pentium IV from http://www.agner.org/
@@ -1144,8 +1144,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
11441144
return LT.first * KindCost.value();
11451145

11461146
static const CostKindTblEntry SSE1CostTable[] = {
1147-
{ ISD::FDIV, MVT::f32, { 17 } }, // Pentium III from http://www.agner.org/
1148-
{ ISD::FDIV, MVT::v4f32, { 34 } }, // Pentium III from http://www.agner.org/
1147+
{ ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } }, // Pentium III from http://www.agner.org/
1148+
{ ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } }, // Pentium III from http://www.agner.org/
11491149

11501150
{ ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } }, // Pentium III from http://www.agner.org/
11511151
{ ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } }, // Pentium III from http://www.agner.org/
@@ -1189,7 +1189,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
11891189
{ ISD::FADD, MVT::f64, { 2, 3, 1, 1 } }, // (x87)
11901190
{ ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } }, // (x87)
11911191
{ ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } }, // (x87)
1192-
{ ISD::FDIV, MVT::f64, { 38 } }, // (x87)
1192+
{ ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } }, // (x87)
11931193
};
11941194

11951195
if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, LT.second))
@@ -5649,6 +5649,15 @@ bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
56495649
return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT);
56505650
}
56515651

5652+
bool X86TTIImpl::isExpensiveToSpeculativelyExecute(const Instruction* I) {
5653+
// FDIV is always expensive, even if it has a very low uop count.
5654+
// TODO: Still necessary for recent CPUs with low latency/throughput fdiv?
5655+
if (I->getOpcode() == Instruction::FDiv)
5656+
return true;
5657+
5658+
return BaseT::isExpensiveToSpeculativelyExecute(I);
5659+
}
5660+
56525661
bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
56535662
return false;
56545663
}

llvm/lib/Target/X86/X86TargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
254254
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
255255
const SmallBitVector &OpcodeMask) const;
256256
bool hasDivRemOp(Type *DataType, bool IsSigned);
257+
bool isExpensiveToSpeculativelyExecute(const Instruction *I);
257258
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
258259
bool areInlineCompatible(const Function *Caller,
259260
const Function *Callee) const;

llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll

Lines changed: 87 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -511,16 +511,93 @@ define i32 @fmul(i32 %arg) {
511511
}
512512

513513
define i32 @fdiv(i32 %arg) {
514-
; CHECK-LABEL: 'fdiv'
515-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = fdiv float undef, undef
516-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fdiv <4 x float> undef, undef
517-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
518-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef
519-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef
520-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef
521-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
522-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef
523-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
514+
; SSE1-LABEL: 'fdiv'
515+
; SSE1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = fdiv float undef, undef
516+
; SSE1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = fdiv <4 x float> undef, undef
517+
; SSE1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = fdiv <8 x float> undef, undef
518+
; SSE1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16F32 = fdiv <16 x float> undef, undef
519+
; SSE1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %F64 = fdiv double undef, undef
520+
; SSE1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2F64 = fdiv <2 x double> undef, undef
521+
; SSE1-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4F64 = fdiv <4 x double> undef, undef
522+
; SSE1-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V8F64 = fdiv <8 x double> undef, undef
523+
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
524+
;
525+
; SSE2-LABEL: 'fdiv'
526+
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %F32 = fdiv float undef, undef
527+
; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
528+
; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
529+
; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
530+
; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %F64 = fdiv double undef, undef
531+
; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
532+
; SSE2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
533+
; SSE2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
534+
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
535+
;
536+
; SSE42-LABEL: 'fdiv'
537+
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
538+
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
539+
; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = fdiv <8 x float> undef, undef
540+
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = fdiv <16 x float> undef, undef
541+
; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
542+
; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
543+
; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = fdiv <4 x double> undef, undef
544+
; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = fdiv <8 x double> undef, undef
545+
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
546+
;
547+
; AVX1-LABEL: 'fdiv'
548+
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef
549+
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef
550+
; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8F32 = fdiv <8 x float> undef, undef
551+
; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16F32 = fdiv <16 x float> undef, undef
552+
; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef
553+
; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef
554+
; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4F64 = fdiv <4 x double> undef, undef
555+
; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8F64 = fdiv <8 x double> undef, undef
556+
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
557+
;
558+
; AVX2-LABEL: 'fdiv'
559+
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %F32 = fdiv float undef, undef
560+
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = fdiv <4 x float> undef, undef
561+
; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fdiv <8 x float> undef, undef
562+
; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16F32 = fdiv <16 x float> undef, undef
563+
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F64 = fdiv double undef, undef
564+
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = fdiv <2 x double> undef, undef
565+
; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4F64 = fdiv <4 x double> undef, undef
566+
; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8F64 = fdiv <8 x double> undef, undef
567+
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
568+
;
569+
; AVX512-LABEL: 'fdiv'
570+
; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %F32 = fdiv float undef, undef
571+
; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fdiv <4 x float> undef, undef
572+
; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8F32 = fdiv <8 x float> undef, undef
573+
; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F32 = fdiv <16 x float> undef, undef
574+
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = fdiv double undef, undef
575+
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = fdiv <2 x double> undef, undef
576+
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = fdiv <4 x double> undef, undef
577+
; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = fdiv <8 x double> undef, undef
578+
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
579+
;
580+
; SLM-LABEL: 'fdiv'
581+
; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = fdiv float undef, undef
582+
; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4F32 = fdiv <4 x float> undef, undef
583+
; SLM-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8F32 = fdiv <8 x float> undef, undef
584+
; SLM-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V16F32 = fdiv <16 x float> undef, undef
585+
; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = fdiv double undef, undef
586+
; SLM-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V2F64 = fdiv <2 x double> undef, undef
587+
; SLM-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V4F64 = fdiv <4 x double> undef, undef
588+
; SLM-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V8F64 = fdiv <8 x double> undef, undef
589+
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
590+
;
591+
; GLM-LABEL: 'fdiv'
592+
; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = fdiv float undef, undef
593+
; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4F32 = fdiv <4 x float> undef, undef
594+
; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F32 = fdiv <8 x float> undef, undef
595+
; GLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16F32 = fdiv <16 x float> undef, undef
596+
; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = fdiv double undef, undef
597+
; GLM-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V2F64 = fdiv <2 x double> undef, undef
598+
; GLM-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V4F64 = fdiv <4 x double> undef, undef
599+
; GLM-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V8F64 = fdiv <8 x double> undef, undef
600+
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
524601
;
525602
%F32 = fdiv float undef, undef
526603
%V4F32 = fdiv <4 x float> undef, undef

0 commit comments

Comments
 (0)