Skip to content

Commit e6c9cd9

Browse files
authored
[BOLT] Drop parsing sample PC when processing LBR perf data (llvm#123420)
Remove options to generate autofdo data (unused) and `use-event-pc` (not beneficial). Cuts down perf2bolt time for 11GB perf.data by 40s (11:10->10:30).
1 parent 485c80e commit e6c9cd9

File tree

2 files changed

+9
-104
lines changed

2 files changed

+9
-104
lines changed

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ class DataAggregator : public DataReader {
8080
private:
8181
struct PerfBranchSample {
8282
SmallVector<LBREntry, 32> LBR;
83-
uint64_t PC;
8483
};
8584

8685
struct PerfBasicSample {
@@ -334,9 +333,6 @@ class DataAggregator : public DataReader {
334333
/// Process all branch events.
335334
void processBranchEvents();
336335

337-
/// This member function supports generating data for AutoFDO LLVM tools.
338-
std::error_code writeAutoFDOData(StringRef OutputFilename);
339-
340336
/// Parse the full output generated by perf script to report non-LBR samples.
341337
std::error_code parseBasicEvents();
342338

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 9 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,6 @@ TimeAggregator("time-aggr",
108108
cl::ZeroOrMore,
109109
cl::cat(AggregatorCategory));
110110

111-
static cl::opt<bool>
112-
UseEventPC("use-event-pc",
113-
cl::desc("use event PC in combination with LBR sampling"),
114-
cl::cat(AggregatorCategory));
115-
116-
static cl::opt<bool> WriteAutoFDOData(
117-
"autofdo", cl::desc("generate autofdo textual data instead of bolt data"),
118-
cl::cat(AggregatorCategory));
119-
120111
} // namespace opts
121112

122113
namespace {
@@ -187,15 +178,13 @@ void DataAggregator::start() {
187178
/*Wait = */false);
188179
} else if (!opts::ITraceAggregation.empty()) {
189180
std::string ItracePerfScriptArgs = llvm::formatv(
190-
"script -F pid,ip,brstack --itrace={0}", opts::ITraceAggregation);
181+
"script -F pid,brstack --itrace={0}", opts::ITraceAggregation);
191182
launchPerfProcess("branch events with itrace", MainEventsPPI,
192183
ItracePerfScriptArgs.c_str(),
193184
/*Wait = */ false);
194185
} else {
195-
launchPerfProcess("branch events",
196-
MainEventsPPI,
197-
"script -F pid,ip,brstack",
198-
/*Wait = */false);
186+
launchPerfProcess("branch events", MainEventsPPI, "script -F pid,brstack",
187+
/*Wait = */ false);
199188
}
200189

201190
// Note: we launch script for mem events regardless of the option, as the
@@ -381,67 +370,6 @@ void DataAggregator::parsePreAggregated() {
381370
}
382371
}
383372

384-
std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
385-
outs() << "PERF2BOLT: writing data for autofdo tools...\n";
386-
NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
387-
TimerGroupDesc, opts::TimeAggregator);
388-
389-
std::error_code EC;
390-
raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
391-
if (EC)
392-
return EC;
393-
394-
// Format:
395-
// number of unique traces
396-
// from_1-to_1:count_1
397-
// from_2-to_2:count_2
398-
// ......
399-
// from_n-to_n:count_n
400-
// number of unique sample addresses
401-
// addr_1:count_1
402-
// addr_2:count_2
403-
// ......
404-
// addr_n:count_n
405-
// number of unique LBR entries
406-
// src_1->dst_1:count_1
407-
// src_2->dst_2:count_2
408-
// ......
409-
// src_n->dst_n:count_n
410-
411-
const uint64_t FirstAllocAddress = this->BC->FirstAllocAddress;
412-
413-
// AutoFDO addresses are relative to the first allocated loadable program
414-
// segment
415-
auto filterAddress = [&FirstAllocAddress](uint64_t Address) -> uint64_t {
416-
if (Address < FirstAllocAddress)
417-
return 0;
418-
return Address - FirstAllocAddress;
419-
};
420-
421-
OutFile << FallthroughLBRs.size() << "\n";
422-
for (const auto &[Trace, Info] : FallthroughLBRs) {
423-
OutFile << formatv("{0:x-}-{1:x-}:{2}\n", filterAddress(Trace.From),
424-
filterAddress(Trace.To),
425-
Info.InternCount + Info.ExternCount);
426-
}
427-
428-
OutFile << BasicSamples.size() << "\n";
429-
for (const auto [PC, HitCount] : BasicSamples)
430-
OutFile << formatv("{0:x-}:{1}\n", filterAddress(PC), HitCount);
431-
432-
OutFile << BranchLBRs.size() << "\n";
433-
for (const auto &[Trace, Info] : BranchLBRs) {
434-
OutFile << formatv("{0:x-}->{1:x-}:{2}\n", filterAddress(Trace.From),
435-
filterAddress(Trace.To), Info.TakenCount);
436-
}
437-
438-
outs() << "PERF2BOLT: wrote " << FallthroughLBRs.size() << " unique traces, "
439-
<< BasicSamples.size() << " sample addresses and " << BranchLBRs.size()
440-
<< " unique branches to " << OutputFilename << "\n";
441-
442-
return std::error_code();
443-
}
444-
445373
void DataAggregator::filterBinaryMMapInfo() {
446374
if (opts::FilterPID) {
447375
auto MMapInfoIter = BinaryMMapInfo.find(opts::FilterPID);
@@ -583,15 +511,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
583511
(opts::BasicAggregation && parseBasicEvents()))
584512
errs() << "PERF2BOLT: failed to parse samples\n";
585513

586-
// We can finish early if the goal is just to generate data for autofdo
587-
if (opts::WriteAutoFDOData) {
588-
if (std::error_code EC = writeAutoFDOData(opts::OutputFilename))
589-
errs() << "Error writing autofdo data to file: " << EC.message() << "\n";
590-
591-
deleteTempFiles();
592-
exit(0);
593-
}
594-
595514
// Special handling for memory events
596515
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
597516
return Error::success();
@@ -1158,14 +1077,6 @@ ErrorOr<DataAggregator::PerfBranchSample> DataAggregator::parseBranchSample() {
11581077
return make_error_code(errc::no_such_process);
11591078
}
11601079

1161-
while (checkAndConsumeFS()) {
1162-
}
1163-
1164-
ErrorOr<uint64_t> PCRes = parseHexField(FieldSeparator, true);
1165-
if (std::error_code EC = PCRes.getError())
1166-
return EC;
1167-
Res.PC = PCRes.get();
1168-
11691080
if (checkAndConsumeNewLine())
11701081
return Res;
11711082

@@ -1472,9 +1383,9 @@ std::error_code DataAggregator::printLBRHeatMap() {
14721383
uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14731384
bool NeedsSkylakeFix) {
14741385
uint64_t NumTraces{0};
1475-
// LBRs are stored in reverse execution order. NextPC refers to the next
1476-
// recorded executed PC.
1477-
uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
1386+
// LBRs are stored in reverse execution order. NextLBR refers to the next
1387+
// executed branch record.
1388+
const LBREntry *NextLBR = nullptr;
14781389
uint32_t NumEntry = 0;
14791390
for (const LBREntry &LBR : Sample.LBR) {
14801391
++NumEntry;
@@ -1486,10 +1397,10 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14861397
// chronological order)
14871398
if (NeedsSkylakeFix && NumEntry <= 2)
14881399
continue;
1489-
if (NextPC) {
1400+
if (NextLBR) {
14901401
// Record fall-through trace.
14911402
const uint64_t TraceFrom = LBR.To;
1492-
const uint64_t TraceTo = NextPC;
1403+
const uint64_t TraceTo = NextLBR->From;
14931404
const BinaryFunction *TraceBF =
14941405
getBinaryFunctionContainingAddress(TraceFrom);
14951406
if (TraceBF && TraceBF->containsAddress(TraceTo)) {
@@ -1524,7 +1435,7 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
15241435
}
15251436
++NumTraces;
15261437
}
1527-
NextPC = LBR.From;
1438+
NextLBR = &LBR;
15281439

15291440
uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
15301441
uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
@@ -1561,8 +1472,6 @@ std::error_code DataAggregator::parseBranchEvents() {
15611472
++NumSamples;
15621473

15631474
PerfBranchSample &Sample = SampleRes.get();
1564-
if (opts::WriteAutoFDOData)
1565-
++BasicSamples[Sample.PC];
15661475

15671476
if (Sample.LBR.empty()) {
15681477
++NumSamplesNoLBR;

0 commit comments

Comments
 (0)