Skip to content

Commit bddbd40

Browse files
committed
[X86] Fix fdiv throughput/latency/uops counts
Matches znver1/2 numbers from AMD SoG + Agner - no additional uops for folded instructions and znver1 double pumps 256-bit vectors Matches skylake/icelake throughput numbers from Intel AoM + Agner/instlatx64 Noticed while adding fdiv CostKinds support
1 parent f43c814 commit bddbd40

25 files changed

+294
-510
lines changed

llvm/lib/Target/X86/X86SchedIceLake.td

Lines changed: 4 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -286,12 +286,12 @@ defm : ICXWriteResPair<WriteFMul64Y, [ICXPort01], 4, [1], 1, 7>;
286286
defm : ICXWriteResPair<WriteFMul64Z, [ICXPort05], 4, [1], 1, 7>;
287287

288288
defm : ICXWriteResPair<WriteFDiv, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
289-
//defm : ICXWriteResPair<WriteFDivX, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
289+
defm : ICXWriteResPair<WriteFDivX, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
290290
defm : ICXWriteResPair<WriteFDivY, [ICXPort0,ICXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles.
291291
defm : ICXWriteResPair<WriteFDivZ, [ICXPort0,ICXPort5,ICXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles.
292-
//defm : ICXWriteResPair<WriteFDiv64, [ICXPort0,ICXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
293-
//defm : ICXWriteResPair<WriteFDiv64X, [ICXPort0,ICXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles.
294-
//defm : ICXWriteResPair<WriteFDiv64Y, [ICXPort0,ICXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles.
292+
defm : ICXWriteResPair<WriteFDiv64, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division.
293+
defm : ICXWriteResPair<WriteFDiv64X, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles.
294+
defm : ICXWriteResPair<WriteFDiv64Y, [ICXPort0,ICXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles.
295295
defm : ICXWriteResPair<WriteFDiv64Z, [ICXPort0,ICXPort5,ICXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles.
296296

297297
defm : ICXWriteResPair<WriteFSqrt, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
@@ -1893,13 +1893,6 @@ def ICXWriteResGroup157 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,
18931893
}
18941894
def: InstRW<[ICXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
18951895

1896-
def ICXWriteResGroup159 : SchedWriteRes<[ICXPort0,ICXFPDivider]> {
1897-
let Latency = 11;
1898-
let NumMicroOps = 1;
1899-
let ResourceCycles = [1,3];
1900-
}
1901-
def : SchedAlias<WriteFDivX, ICXWriteResGroup159>; // TODO - convert to ZnWriteResFpuPair
1902-
19031896
def ICXWriteResGroup160 : SchedWriteRes<[ICXPort0,ICXPort23]> {
19041897
let Latency = 11;
19051898
let NumMicroOps = 2;
@@ -2063,21 +2056,6 @@ def ICXWriteResGroup183 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
20632056
def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2W128rm(b?)",
20642057
"VPERMT2W128rm(b?)")>;
20652058

2066-
def ICXWriteResGroup184 : SchedWriteRes<[ICXPort0,ICXFPDivider]> {
2067-
let Latency = 14;
2068-
let NumMicroOps = 1;
2069-
let ResourceCycles = [1,3];
2070-
}
2071-
def : SchedAlias<WriteFDiv64, ICXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
2072-
def : SchedAlias<WriteFDiv64X, ICXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
2073-
2074-
def ICXWriteResGroup184_1 : SchedWriteRes<[ICXPort0,ICXFPDivider]> {
2075-
let Latency = 14;
2076-
let NumMicroOps = 1;
2077-
let ResourceCycles = [1,5];
2078-
}
2079-
def : SchedAlias<WriteFDiv64Y, ICXWriteResGroup184_1>; // TODO - convert to ZnWriteResFpuPair
2080-
20812059
def ICXWriteResGroup187 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
20822060
let Latency = 14;
20832061
let NumMicroOps = 3;
@@ -2150,13 +2128,6 @@ def ICXWriteResGroup200 : SchedWriteRes<[ICXPort1, ICXPort05, ICXPort6]> {
21502128
}
21512129
def: InstRW<[ICXWriteResGroup200], (instrs VZEROALL)>;
21522130

2153-
def ICXWriteResGroup201 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
2154-
let Latency = 17;
2155-
let NumMicroOps = 2;
2156-
let ResourceCycles = [1,1,5];
2157-
}
2158-
def : SchedAlias<WriteFDivXLd, ICXWriteResGroup201>; // TODO - convert to ZnWriteResFpuPair
2159-
21602131
def ICXWriteResGroup202 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156]> {
21612132
let Latency = 17;
21622133
let NumMicroOps = 15;
@@ -2185,13 +2156,6 @@ def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort237,ICXPort06
21852156
}
21862157
def: InstRW<[ICXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>;
21872158

2188-
def ICXWriteResGroup209 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
2189-
let Latency = 19;
2190-
let NumMicroOps = 2;
2191-
let ResourceCycles = [1,1,4];
2192-
}
2193-
def : SchedAlias<WriteFDiv64Ld, ICXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair
2194-
21952159
def ICXWriteResGroup211 : SchedWriteRes<[ICXPort23,ICXPort01]> {
21962160
let Latency = 22;
21972161
let NumMicroOps = 4;
@@ -2213,13 +2177,6 @@ def ICXWriteResGroup215 : SchedWriteRes<[ICXPort0]> {
22132177
}
22142178
def: InstRW<[ICXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
22152179

2216-
def ICXWriteResGroup216 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
2217-
let Latency = 20;
2218-
let NumMicroOps = 2;
2219-
let ResourceCycles = [1,1,4];
2220-
}
2221-
def : SchedAlias<WriteFDiv64XLd, ICXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
2222-
22232180
def ICXWriteGatherEVEX2 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
22242181
let Latency = 17;
22252182
let NumMicroOps = 5; // 2 uops perform multiple loads
@@ -2270,13 +2227,6 @@ def ICXWriteResGroup220 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort0156]> {
22702227
}
22712228
def: InstRW<[ICXWriteResGroup220], (instrs MWAITrr)>;
22722229

2273-
def ICXWriteResGroup222 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
2274-
let Latency = 21;
2275-
let NumMicroOps = 2;
2276-
let ResourceCycles = [1,1,8];
2277-
}
2278-
def : SchedAlias<WriteFDiv64YLd, ICXWriteResGroup222>; // TODO - convert to ZnWriteResFpuPair
2279-
22802230
def ICXWriteResGroup223 : SchedWriteRes<[ICXPort0,ICXPort23]> {
22812231
let Latency = 22;
22822232
let NumMicroOps = 2;

llvm/lib/Target/X86/X86SchedSkylakeClient.td

Lines changed: 4 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,12 @@ defm : SKLWriteResPair<WriteFMul64Y, [SKLPort01], 4, [1], 1, 7>;
278278
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
279279

280280
defm : SKLWriteResPair<WriteFDiv, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 5>; // Floating point division.
281-
//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>;
281+
defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>;
282282
defm : SKLWriteResPair<WriteFDivY, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>;
283283
defm : X86WriteResPairUnsupported<WriteFDivZ>;
284-
//defm : SKLWriteResPair<WriteFDiv64, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 5>; // Floating point double division.
285-
//defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>;
286-
//defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>;
284+
defm : SKLWriteResPair<WriteFDiv64, [SKLPort0,SKLFPDivider], 14, [1,4], 1, 5>; // Floating point double division.
285+
defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,4], 1, 6>;
286+
defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,8], 1, 7>;
287287
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
288288

289289
defm : SKLWriteResPair<WriteFSqrt, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
@@ -1385,13 +1385,6 @@ def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,
13851385
}
13861386
def: InstRW<[SKLWriteResGroup143], (instregex "XCHG(8|16|32|64)rm")>;
13871387

1388-
def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
1389-
let Latency = 11;
1390-
let NumMicroOps = 1;
1391-
let ResourceCycles = [1,3];
1392-
}
1393-
def : SchedAlias<WriteFDivX, SKLWriteResGroup145>; // TODO - convert to ZnWriteResFpuPair
1394-
13951388
def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> {
13961389
let Latency = 11;
13971390
let NumMicroOps = 2;
@@ -1487,21 +1480,6 @@ def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
14871480
}
14881481
def: InstRW<[SKLWriteResGroup163], (instrs VCVTDQ2PDYrm)>;
14891482

1490-
def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
1491-
let Latency = 14;
1492-
let NumMicroOps = 1;
1493-
let ResourceCycles = [1,3];
1494-
}
1495-
def : SchedAlias<WriteFDiv64, SKLWriteResGroup166>; // TODO - convert to ZnWriteResFpuPair
1496-
def : SchedAlias<WriteFDiv64X, SKLWriteResGroup166>; // TODO - convert to ZnWriteResFpuPair
1497-
1498-
def SKLWriteResGroup166_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> {
1499-
let Latency = 14;
1500-
let NumMicroOps = 1;
1501-
let ResourceCycles = [1,5];
1502-
}
1503-
def : SchedAlias<WriteFDiv64Y, SKLWriteResGroup166_1>; // TODO - convert to ZnWriteResFpuPair
1504-
15051483
def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
15061484
let Latency = 14;
15071485
let NumMicroOps = 3;
@@ -1544,13 +1522,6 @@ def SKLWriteResGroup178 : SchedWriteRes<[SKLPort0156]> {
15441522
}
15451523
def: InstRW<[SKLWriteResGroup178], (instrs VZEROALL)>;
15461524

1547-
def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
1548-
let Latency = 17;
1549-
let NumMicroOps = 2;
1550-
let ResourceCycles = [1,1,5];
1551-
}
1552-
def : SchedAlias<WriteFDivXLd, SKLWriteResGroup179>; // TODO - convert to ZnWriteResFpuPair
1553-
15541525
def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> {
15551526
let Latency = 17;
15561527
let NumMicroOps = 15;
@@ -1572,27 +1543,13 @@ def SKLWriteResGroup185 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06
15721543
}
15731544
def: InstRW<[SKLWriteResGroup185], (instregex "RCR(8|16|32|64)mCL")>;
15741545

1575-
def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
1576-
let Latency = 19;
1577-
let NumMicroOps = 2;
1578-
let ResourceCycles = [1,1,4];
1579-
}
1580-
def : SchedAlias<WriteFDiv64Ld, SKLWriteResGroup186>; // TODO - convert to ZnWriteResFpuPair
1581-
15821546
def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> {
15831547
let Latency = 20;
15841548
let NumMicroOps = 1;
15851549
let ResourceCycles = [1];
15861550
}
15871551
def: InstRW<[SKLWriteResGroup189], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
15881552

1589-
def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
1590-
let Latency = 20;
1591-
let NumMicroOps = 2;
1592-
let ResourceCycles = [1,1,4];
1593-
}
1594-
def : SchedAlias<WriteFDiv64XLd, SKLWriteResGroup190>; // TODO - convert to ZnWriteResFpuPair
1595-
15961553
def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
15971554
let Latency = 20;
15981555
let NumMicroOps = 8;
@@ -1607,13 +1564,6 @@ def SKLWriteResGroup193 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort0156]> {
16071564
}
16081565
def: InstRW<[SKLWriteResGroup193], (instrs MWAITrr)>;
16091566

1610-
def SKLWriteResGroup195 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> {
1611-
let Latency = 21;
1612-
let NumMicroOps = 2;
1613-
let ResourceCycles = [1,1,8];
1614-
}
1615-
def : SchedAlias<WriteFDiv64YLd, SKLWriteResGroup195>; // TODO - convert to ZnWriteResFpuPair
1616-
16171567
def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> {
16181568
let Latency = 22;
16191569
let NumMicroOps = 2;

llvm/lib/Target/X86/X86SchedSkylakeServer.td

Lines changed: 4 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,12 @@ defm : SKXWriteResPair<WriteFMul64Y, [SKXPort01], 4, [1], 1, 7>;
278278
defm : SKXWriteResPair<WriteFMul64Z, [SKXPort05], 4, [1], 1, 7>;
279279

280280
defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
281-
//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
281+
defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
282282
defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles.
283283
defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles.
284-
//defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
285-
//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles.
286-
//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles.
284+
defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division.
285+
defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles.
286+
defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles.
287287
defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles.
288288

289289
defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
@@ -1874,13 +1874,6 @@ def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,
18741874
}
18751875
def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
18761876

1877-
def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
1878-
let Latency = 11;
1879-
let NumMicroOps = 1;
1880-
let ResourceCycles = [1,3];
1881-
}
1882-
def : SchedAlias<WriteFDivX, SKXWriteResGroup159>; // TODO - convert to ZnWriteResFpuPair
1883-
18841877
def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> {
18851878
let Latency = 11;
18861879
let NumMicroOps = 2;
@@ -2044,21 +2037,6 @@ def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
20442037
def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2W128rm(b?)",
20452038
"VPERMT2W128rm(b?)")>;
20462039

2047-
def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
2048-
let Latency = 14;
2049-
let NumMicroOps = 1;
2050-
let ResourceCycles = [1,3];
2051-
}
2052-
def : SchedAlias<WriteFDiv64, SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
2053-
def : SchedAlias<WriteFDiv64X, SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
2054-
2055-
def SKXWriteResGroup184_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> {
2056-
let Latency = 14;
2057-
let NumMicroOps = 1;
2058-
let ResourceCycles = [1,5];
2059-
}
2060-
def : SchedAlias<WriteFDiv64Y, SKXWriteResGroup184_1>; // TODO - convert to ZnWriteResFpuPair
2061-
20622040
def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
20632041
let Latency = 14;
20642042
let NumMicroOps = 3;
@@ -2131,13 +2109,6 @@ def SKXWriteResGroup200 : SchedWriteRes<[SKXPort1, SKXPort05, SKXPort6]> {
21312109
}
21322110
def: InstRW<[SKXWriteResGroup200], (instrs VZEROALL)>;
21332111

2134-
def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
2135-
let Latency = 17;
2136-
let NumMicroOps = 2;
2137-
let ResourceCycles = [1,1,5];
2138-
}
2139-
def : SchedAlias<WriteFDivXLd, SKXWriteResGroup201>; // TODO - convert to ZnWriteResFpuPair
2140-
21412112
def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> {
21422113
let Latency = 17;
21432114
let NumMicroOps = 15;
@@ -2166,13 +2137,6 @@ def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06
21662137
}
21672138
def: InstRW<[SKXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>;
21682139

2169-
def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
2170-
let Latency = 19;
2171-
let NumMicroOps = 2;
2172-
let ResourceCycles = [1,1,4];
2173-
}
2174-
def : SchedAlias<WriteFDiv64Ld, SKXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair
2175-
21762140
def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort01]> {
21772141
let Latency = 22;
21782142
let NumMicroOps = 4;
@@ -2194,13 +2158,6 @@ def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> {
21942158
}
21952159
def: InstRW<[SKXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
21962160

2197-
def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
2198-
let Latency = 20;
2199-
let NumMicroOps = 2;
2200-
let ResourceCycles = [1,1,4];
2201-
}
2202-
def : SchedAlias<WriteFDiv64XLd, SKXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
2203-
22042161
def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
22052162
let Latency = 17;
22062163
let NumMicroOps = 5; // 2 uops perform multiple loads
@@ -2251,13 +2208,6 @@ def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> {
22512208
}
22522209
def: InstRW<[SKXWriteResGroup220], (instrs MWAITrr)>;
22532210

2254-
def SKXWriteResGroup222 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
2255-
let Latency = 21;
2256-
let NumMicroOps = 2;
2257-
let ResourceCycles = [1,1,8];
2258-
}
2259-
def : SchedAlias<WriteFDiv64YLd, SKXWriteResGroup222>; // TODO - convert to ZnWriteResFpuPair
2260-
22612211
def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> {
22622212
let Latency = 22;
22632213
let NumMicroOps = 2;

llvm/lib/Target/X86/X86ScheduleZnver1.td

Lines changed: 6 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -319,13 +319,13 @@ defm : ZnWriteResFpuPair<WriteCvtI2SD, [ZnFPU3], 5>;
319319
defm : ZnWriteResFpuPair<WriteCvtI2PD, [ZnFPU3], 5>;
320320
defm : ZnWriteResFpuPair<WriteCvtI2PDY, [ZnFPU3], 5>;
321321
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
322-
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
323-
defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>;
324-
//defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>;
322+
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 10, [3]>;
323+
defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 10, [3]>;
324+
defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 10, [6], 2>;
325325
defm : X86WriteResPairUnsupported<WriteFDivZ>;
326-
defm : ZnWriteResFpuPair<WriteFDiv64, [ZnFPU3], 15>;
327-
defm : ZnWriteResFpuPair<WriteFDiv64X, [ZnFPU3], 15>;
328-
//defm : ZnWriteResFpuPair<WriteFDiv64Y, [ZnFPU3], 15>;
326+
defm : ZnWriteResFpuPair<WriteFDiv64, [ZnFPU3], 13, [5]>;
327+
defm : ZnWriteResFpuPair<WriteFDiv64X, [ZnFPU3], 13, [5]>;
328+
defm : ZnWriteResFpuPair<WriteFDiv64Y, [ZnFPU3], 15, [9], 2>;
329329
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
330330
defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
331331
defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
@@ -1430,40 +1430,6 @@ def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>;
14301430

14311431
//-- Arithmetic instructions --//
14321432

1433-
// VDIVPS.
1434-
// TODO - convert to ZnWriteResFpuPair
1435-
// y,y,y.
1436-
def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> {
1437-
let Latency = 12;
1438-
let ResourceCycles = [12];
1439-
}
1440-
def : SchedAlias<WriteFDivY, ZnWriteVDIVPSYr>;
1441-
1442-
// y,y,m256.
1443-
def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1444-
let Latency = 19;
1445-
let NumMicroOps = 2;
1446-
let ResourceCycles = [1, 19];
1447-
}
1448-
def : SchedAlias<WriteFDivYLd, ZnWriteVDIVPSYLd>;
1449-
1450-
// VDIVPD.
1451-
// TODO - convert to ZnWriteResFpuPair
1452-
// y,y,y.
1453-
def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> {
1454-
let Latency = 15;
1455-
let ResourceCycles = [15];
1456-
}
1457-
def : SchedAlias<WriteFDiv64Y, ZnWriteVDIVPDY>;
1458-
1459-
// y,y,m256.
1460-
def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1461-
let Latency = 22;
1462-
let NumMicroOps = 2;
1463-
let ResourceCycles = [1,22];
1464-
}
1465-
def : SchedAlias<WriteFDiv64YLd, ZnWriteVDIVPDYLd>;
1466-
14671433
// DPPS.
14681434
// x,x,i / v,v,v,i.
14691435
def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>;

0 commit comments

Comments
 (0)