Skip to content

Commit 8e31e0a

Browse files
feat: Add support for measuring per-TU indexing time (#152)
1 parent 1c4bbbe commit 8e31e0a

File tree

10 files changed

+187
-14
lines changed

10 files changed

+187
-14
lines changed

indexer/CliOptions.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ struct CliOptions {
2828
std::string scipClangExecutablePath;
2929
std::string temporaryOutputDir;
3030
std::string indexOutputPath;
31+
std::string statsFilePath;
3132

3233
std::chrono::seconds receiveTimeout;
3334
uint32_t numWorkers;
@@ -49,6 +50,8 @@ struct CliOptions {
4950

5051
std::string workerMode;
5152

53+
bool measureStatistics;
54+
5255
std::string preprocessorHistoryLogPath;
5356

5457
// An opaque ID provided by the driver for a worker to identify the

indexer/Driver.cc

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "indexer/Path.h"
4747
#include "indexer/RAII.h"
4848
#include "indexer/ScipExtras.h"
49+
#include "indexer/Statistics.h"
4950
#include "indexer/Timer.h"
5051

5152
namespace boost_ip = boost::interprocess;
@@ -116,6 +117,7 @@ struct DriverOptions {
116117
RootPath projectRootPath;
117118
AbsolutePath compdbPath;
118119
AbsolutePath indexOutputPath;
120+
AbsolutePath statsFilePath;
119121
size_t numWorkers;
120122
std::chrono::seconds receiveTimeout;
121123
bool deterministic;
@@ -132,7 +134,7 @@ struct DriverOptions {
132134
explicit DriverOptions(std::string driverId, const CliOptions &cliOpts)
133135
: workerExecutablePath(),
134136
projectRootPath(AbsolutePath("/"), RootKind::Project), compdbPath(),
135-
indexOutputPath(), numWorkers(cliOpts.numWorkers),
137+
indexOutputPath(), statsFilePath(), numWorkers(cliOpts.numWorkers),
136138
receiveTimeout(cliOpts.receiveTimeout),
137139
deterministic(cliOpts.deterministic),
138140
preprocessorRecordHistoryFilterRegex(
@@ -151,12 +153,12 @@ struct DriverOptions {
151153
RootPath{AbsolutePath{std::move(cwd)}, RootKind::Project};
152154

153155
auto setAbsolutePath = [this](const std::string &path, AbsolutePath &out) {
154-
if (llvm::sys::path::is_absolute(path)) {
155-
out = AbsolutePath(std::string(path));
156-
return;
157-
}
158-
out = this->projectRootPath.makeAbsolute(
159-
RootRelativePathRef(path, RootKind::Project));
156+
out = path.empty()
157+
? AbsolutePath()
158+
: (llvm::sys::path::is_absolute(path)
159+
? AbsolutePath(std::string(path))
160+
: this->projectRootPath.makeAbsolute(
161+
RootRelativePathRef(path, RootKind::Project)));
160162
};
161163

162164
// Strictly speaking, there is a TOCTOU problem here, as scip-clang
@@ -183,6 +185,7 @@ struct DriverOptions {
183185

184186
setAbsolutePath(cliOpts.indexOutputPath, this->indexOutputPath);
185187
setAbsolutePath(cliOpts.compdbPath, this->compdbPath);
188+
setAbsolutePath(cliOpts.statsFilePath, this->statsFilePath);
186189

187190
auto makeDirs = [](const StdPath &path, const char *name) {
188191
std::error_code error;
@@ -223,6 +226,9 @@ struct DriverOptions {
223226
if (this->deterministic) {
224227
args.push_back("--deterministic");
225228
}
229+
if (!this->statsFilePath.asStringRef().empty()) {
230+
args.push_back("--measure-statistics");
231+
}
226232
if (!this->preprocessorRecordHistoryFilterRegex.empty()) {
227233
args.push_back(fmt::format("--preprocessor-record-history-filter={}",
228234
this->preprocessorRecordHistoryFilterRegex));
@@ -346,6 +352,10 @@ class Scheduler final {
346352
public:
347353
using Process = boost::process::child;
348354

355+
const absl::flat_hash_map<JobId, IndexJob> &getJobMap() const {
356+
return this->allJobList;
357+
}
358+
349359
void checkInvariants() const {
350360
ENFORCE(
351361
this->wipJobs.size() + this->idleWorkers.size() == this->workers.size(),
@@ -566,6 +576,7 @@ class Driver {
566576
Scheduler scheduler;
567577
FileIndexingPlanner planner;
568578

579+
std::vector<std::pair<JobId, IndexingStatistics>> allStatistics;
569580
std::vector<AbsolutePath> indexPartPaths;
570581

571582
/// Total number of commands in the compilation database.
@@ -595,6 +606,36 @@ class Driver {
595606
}
596607
}
597608

609+
void emitStatsFile() {
610+
if (this->options.statsFilePath.asStringRef().empty()) {
611+
return;
612+
}
613+
std::vector<std::pair<uint32_t, StatsEntry>> perJobStats{};
614+
auto &jobMap = this->scheduler.getJobMap();
615+
for (auto &pair : this->allStatistics) {
616+
auto &[jobId, stats] = pair;
617+
auto semaJobId = JobId::newTask(jobId.taskId());
618+
auto it = jobMap.find(semaJobId);
619+
ENFORCE(it != jobMap.end());
620+
ENFORCE(it->second.kind == IndexJob::Kind::SemanticAnalysis);
621+
perJobStats.emplace_back(
622+
jobId.taskId(),
623+
StatsEntry{it->second.semanticAnalysis.command.Filename,
624+
std::move(stats)});
625+
}
626+
absl::c_sort(perJobStats, [](const auto &p1, const auto &p2) -> bool {
627+
ENFORCE(p1.first != p2.first,
628+
"got multiple StatEntry values for the same TU");
629+
return p1.first < p2.first;
630+
});
631+
std::vector<StatsEntry> stats{};
632+
for (auto &pair : perJobStats) {
633+
stats.emplace_back(std::move(pair.second));
634+
}
635+
StatsEntry::emitAll(std::move(stats),
636+
this->options.statsFilePath.asStringRef());
637+
}
638+
598639
void run() {
599640
ManualTimer total, indexing, merging;
600641
unsigned numTus;
@@ -607,6 +648,8 @@ class Driver {
607648
TIME_IT(merging, this->emitScipIndex());
608649
spdlog::debug("indexing complete; driver shutting down now, kthxbai");
609650
});
651+
this->emitStatsFile();
652+
610653
using secs = std::chrono::seconds;
611654
fmt::print("Finished indexing {} translation units in {:.1f}s (indexing: "
612655
"{:.1f}s, merging: {:.1f}s).\n",
@@ -822,11 +865,17 @@ class Driver {
822865
}));
823866
break;
824867
}
825-
case IndexJob::Kind::EmitIndex:
868+
case IndexJob::Kind::EmitIndex: {
869+
auto &result = response.result.emitIndex;
870+
if (!this->options.statsFilePath.asStringRef().empty()) {
871+
this->allStatistics.emplace_back(response.jobId,
872+
std::move(result.statistics));
873+
}
826874
this->indexPartPaths.emplace_back(
827875
std::move(response.result.emitIndex.indexPartPath));
828876
break;
829877
}
878+
}
830879
}
831880

832881
void processOneJobResult() {

indexer/IpcMessages.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,12 @@ bool fromJSON(const llvm::json::Value &jsonValue, HashValue &h,
147147
return false;
148148
}
149149

150-
DERIVE_SERIALIZE_1_NEWTYPE(scip_clang::EmitIndexJobResult, indexPartPath)
150+
DERIVE_SERIALIZE_1_NEWTYPE(scip_clang::IndexingStatistics, totalTimeMicros)
151151
DERIVE_SERIALIZE_1_NEWTYPE(scip_clang::EmitIndexJobDetails, filesToBeIndexed)
152152
DERIVE_SERIALIZE_1_NEWTYPE(scip_clang::IpcTestMessage, content)
153153
DERIVE_SERIALIZE_1_NEWTYPE(scip_clang::SemanticAnalysisJobDetails, command)
154154

155+
DERIVE_SERIALIZE_2(scip_clang::EmitIndexJobResult, statistics, indexPartPath)
155156
DERIVE_SERIALIZE_2(scip_clang::PreprocessedFileInfo, path, hashValue)
156157
DERIVE_SERIALIZE_2(scip_clang::PreprocessedFileInfoMulti, path, hashValues)
157158
DERIVE_SERIALIZE_2(scip_clang::IndexJobRequest, id, job)

indexer/IpcMessages.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,13 @@ struct SemanticAnalysisJobResult {
148148
};
149149
SERIALIZABLE(SemanticAnalysisJobResult)
150150

151+
struct IndexingStatistics {
152+
uint64_t totalTimeMicros;
153+
};
154+
SERIALIZABLE(IndexingStatistics)
155+
151156
struct EmitIndexJobResult {
157+
IndexingStatistics statistics;
152158
AbsolutePath indexPartPath;
153159
};
154160
SERIALIZABLE(EmitIndexJobResult)

indexer/Statistics.cc

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#include <string_view>
2+
#include <vector>
3+
4+
#include "llvm/Support/JSON.h"
5+
6+
#include "indexer/Statistics.h"
7+
8+
namespace scip_clang {
9+
10+
llvm::json::Value toJSON(const StatsEntry &entry) {
11+
auto &stats = entry.stats;
12+
return llvm::json::Object{
13+
{"filepath", entry.path},
14+
{"stats",
15+
llvm::json::Object{
16+
{"total_time_s", double(stats.totalTimeMicros) / 1'000'000.0},
17+
}}};
18+
}
19+
20+
// static
21+
void StatsEntry::emitAll(std::vector<StatsEntry> &&stats,
22+
std::string_view path) {
23+
std::error_code error;
24+
llvm::raw_fd_ostream out(path, error);
25+
ENFORCE(!error);
26+
llvm::json::OStream jsonStream(out);
27+
jsonStream.array([&]() {
28+
for (auto &statEntry : stats) {
29+
jsonStream.value(llvm::json::Value(statEntry));
30+
}
31+
});
32+
jsonStream.flush();
33+
}
34+
35+
} // namespace scip_clang

indexer/Statistics.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#ifndef SCIP_CLANG_STATISTICS_H
2+
#define SCIP_CLANG_STATISTICS_H
3+
4+
#include "llvm/Support/JSON.h"
5+
6+
#include "indexer/IpcMessages.h"
7+
8+
namespace scip_clang {
9+
10+
struct StatsEntry {
11+
std::string path;
12+
IndexingStatistics stats;
13+
14+
static void emitAll(std::vector<StatsEntry> &&stats, std::string_view path);
15+
};
16+
17+
llvm::json::Value toJSON(const StatsEntry &entry);
18+
19+
} // namespace scip_clang
20+
21+
#endif

indexer/Worker.cc

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "indexer/Logging.h"
5252
#include "indexer/Path.h"
5353
#include "indexer/ScipExtras.h"
54+
#include "indexer/Statistics.h"
5455
#include "indexer/SymbolFormatter.h"
5556
#include "indexer/Timer.h"
5657
#include "indexer/Worker.h"
@@ -1072,15 +1073,17 @@ WorkerOptions WorkerOptions::fromCliOptions(const CliOptions &cliOptions) {
10721073
RootKind::Project};
10731074
WorkerMode mode;
10741075
IpcOptions ipcOptions;
1075-
StdPath compdbPath;
1076+
StdPath compdbPath{};
10761077
StdPath indexOutputPath{};
1078+
StdPath statsFilePath{};
10771079
if (cliOptions.workerMode == "ipc") {
10781080
mode = WorkerMode::Ipc;
10791081
ipcOptions = cliOptions.ipcOptions();
10801082
} else if (cliOptions.workerMode == "compdb") {
10811083
mode = WorkerMode::Compdb;
10821084
compdbPath = StdPath(cliOptions.compdbPath);
10831085
indexOutputPath = StdPath(cliOptions.indexOutputPath);
1086+
statsFilePath = StdPath(cliOptions.statsFilePath);
10841087
} else {
10851088
ENFORCE(cliOptions.workerMode == "testing");
10861089
mode = WorkerMode::Testing;
@@ -1090,8 +1093,10 @@ WorkerOptions WorkerOptions::fromCliOptions(const CliOptions &cliOptions) {
10901093
ipcOptions,
10911094
compdbPath,
10921095
indexOutputPath,
1096+
statsFilePath,
10931097
cliOptions.logLevel,
10941098
cliOptions.deterministic,
1099+
cliOptions.measureStatistics,
10951100
PreprocessorHistoryRecordingOptions{
10961101
cliOptions.preprocessorRecordHistoryFilterRegex,
10971102
cliOptions.preprocessorHistoryLogPath, false, ""},
@@ -1101,7 +1106,7 @@ WorkerOptions WorkerOptions::fromCliOptions(const CliOptions &cliOptions) {
11011106

11021107
Worker::Worker(WorkerOptions &&options)
11031108
: options(std::move(options)), messageQueues(), compileCommands(),
1104-
commandIndex(0), recorder() {
1109+
commandIndex(0), recorder(), statistics() {
11051110
switch (this->options.mode) {
11061111
case WorkerMode::Ipc:
11071112
this->messageQueues = std::make_unique<MessageQueuePair>(
@@ -1121,6 +1126,9 @@ Worker::Worker(WorkerOptions &&options)
11211126
break;
11221127
}
11231128

1129+
// All initialization unrelated to preprocessor recording should be
1130+
// completed here.
1131+
11241132
auto &recordingOptions = this->options.recordingOptions;
11251133
HeaderFilter filter(std::string(recordingOptions.filterRegex));
11261134
if (filter.isIdentity()) {
@@ -1222,6 +1230,9 @@ void Worker::sendResult(JobId requestId, IndexJobResult &&result) {
12221230

12231231
Worker::ReceiveStatus Worker::processTranslationUnitAndRespond(
12241232
IndexJobRequest &&semanticAnalysisRequest) {
1233+
ManualTimer indexingTimer{};
1234+
indexingTimer.start();
1235+
12251236
SemanticAnalysisJobResult semaResult{};
12261237
auto semaRequestId = semanticAnalysisRequest.id;
12271238
auto tuMainFilePath =
@@ -1281,12 +1292,21 @@ Worker::ReceiveStatus Worker::processTranslationUnitAndRespond(
12811292
emitIndexRequestId.taskId(),
12821293
this->ipcOptions().workerId));
12831294
this->emitIndex(std::move(scipIndex), outputPath);
1295+
indexingTimer.stop();
1296+
1297+
this->statistics.totalTimeMicros =
1298+
uint64_t(indexingTimer.value<std::chrono::microseconds>());
12841299

12851300
if (this->options.mode == WorkerMode::Compdb) {
1301+
if (!this->options.statsFilePath.empty()) {
1302+
StatsEntry::emitAll({StatsEntry{tuMainFilePath, this->statistics}},
1303+
this->options.statsFilePath.c_str());
1304+
}
12861305
return ReceiveStatus::OK;
12871306
}
12881307

1289-
EmitIndexJobResult emitIndexResult{AbsolutePath{outputPath.string()}};
1308+
EmitIndexJobResult emitIndexResult{this->statistics,
1309+
AbsolutePath{outputPath.string()}};
12901310

12911311
this->sendResult(emitIndexRequestId,
12921312
IndexJobResult{.kind = IndexJob::Kind::EmitIndex,

indexer/Worker.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,11 @@ struct WorkerOptions {
5757
IpcOptions ipcOptions; // only valid if mode == Ipc
5858
StdPath compdbPath; // only valid if mode == Compdb
5959
StdPath indexOutputPath; // only valid if mode == Compdb
60+
StdPath statsFilePath; // only valid if mode == Compdb
6061

6162
spdlog::level::level_enum logLevel;
6263
bool deterministic;
64+
bool measureStatistics;
6365
PreprocessorHistoryRecordingOptions recordingOptions;
6466
StdPath temporaryOutputDir;
6567
std::string workerFault;
@@ -97,6 +99,8 @@ class Worker final {
9799
PreprocessorHistoryRecorder>>
98100
recorder;
99101

102+
IndexingStatistics statistics;
103+
100104
public:
101105
Worker(WorkerOptions &&options);
102106
void run();

indexer/main.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,13 @@ static scip_clang::CliOptions parseArguments(int argc, char *argv[]) {
5050
cxxopts::value<std::string>(cliOptions.temporaryOutputDir));
5151
parser.add_options("")("h,help", "Show help text", cxxopts::value<bool>());
5252
// TODO(def: add-version): Add a --version flag
53+
parser.add_options("Advanced")(
54+
"print-statistics-path",
55+
"Print indexing related statistics in JSON format."
56+
" Caution: Timing information for individual TUs should not be compared"
57+
" directly across runs, as non-determinism may affect the number of files"
58+
" skipped by individual indexing jobs.",
59+
cxxopts::value<std::string>(cliOptions.statsFilePath));
5360
parser.add_options("Advanced")(
5461
"receive-timeout-seconds",
5562
"How long should the driver wait for a worker before marking it as timed out?",
@@ -75,6 +82,10 @@ static scip_clang::CliOptions parseArguments(int argc, char *argv[]) {
7582
"help-all",
7683
"Show all command-line flags, including internal ones and ones for testing.",
7784
cxxopts::value<bool>());
85+
parser.add_options("Advanced")(
86+
"measure-statistics",
87+
"[worker-only] Measure various statistics related to indexing",
88+
cxxopts::value<bool>(cliOptions.measureStatistics));
7889
parser.add_options("Internal")(
7990
"preprocessor-history-log-path",
8091
"[worker-only] Path to log preprocessor history, if applicable.",

0 commit comments

Comments
 (0)