Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit bac0864

Browse files
author
Theodoros Theodoridis
committed
[Autotuner] Create jobs only for new candidates
Candidates that survive across generations need not be benchmarked again and thus no compilation and gpu jobs have to be created for them.
1 parent 6cba414 commit bac0864

File tree

3 files changed

+63
-43
lines changed

3 files changed

+63
-43
lines changed

tc/autotuner/autotuner-inl.h

Lines changed: 57 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -248,52 +248,68 @@ void TuningHarness<Backend>::runOneIteration(
248248
CHECK(executors_.empty());
249249
CHECK(configurations_.empty());
250250
auto& candidates = searchStrategy.candidatesOfStep(step);
251-
// Initialize for this round
252-
currentCompilationJob_.store(0);
253-
numEvaluations_.store(0);
254-
Printer printer(
255-
iteration,
256-
step,
257-
candidates.size(),
258-
currentCompilationJob_,
259-
numEvaluations_);
260-
auto logIterations = FLAGS_tuner_gen_log_generations;
261-
ScopeGuard sgPrinter([logIterations, &printer]() {
262-
printer.stop();
263-
if (logIterations) {
264-
printer.printAll();
265-
}
251+
auto firstNew = std::partition(
252+
candidates.begin(),
253+
candidates.end(),
254+
[](const std::unique_ptr<CandidateConfiguration>& c) {
255+
return c->runtime != Duration::zero();
256+
});
257+
GeneticSearch::Population newCandidates(
258+
std::distance(firstNew, candidates.end()));
259+
std::move(firstNew, candidates.end(), newCandidates.begin());
260+
ScopeGuard candidatesSG([&]() {
261+
std::move(newCandidates.begin(), newCandidates.end(), firstNew);
266262
});
267263

268-
// Just spawn and join new threads for each iteration
269-
std::vector<std::thread> cpuCompilationThreads;
270-
cpuCompilationThreads.reserve(FLAGS_tuner_threads);
271-
ScopeGuard sgCompilationThreads([&cpuCompilationThreads]() {
272-
for (auto& cpuCompilationThread : cpuCompilationThreads) {
273-
cpuCompilationThread.join();
274-
}
275-
});
276-
for (size_t i = 0; i < FLAGS_tuner_threads; ++i) {
277-
cpuCompilationThreads.emplace_back(
278-
[this, &candidates]() { this->doCompile(candidates); });
279-
}
264+
if (not newCandidates.empty()) {
265+
auto populationSize = newCandidates.size();
266+
// Initialize for this round
267+
currentCompilationJob_.store(0);
268+
numEvaluations_.store(0);
269+
Printer printer(
270+
iteration,
271+
step,
272+
populationSize,
273+
currentCompilationJob_,
274+
numEvaluations_);
275+
auto logIterations = FLAGS_tuner_gen_log_generations;
276+
ScopeGuard sgPrinter([logIterations, &printer]() {
277+
printer.stop();
278+
if (logIterations) {
279+
printer.printAll();
280+
}
281+
});
280282

281-
// Just spawn and join new threads for each device
282-
std::vector<std::thread> workerThreads;
283-
workerThreads.reserve(devices.size());
284-
LOG_IF(INFO, tc::FLAGS_debug_tuner)
285-
<< "Start evaluation: " << devices.size() << " " << executors_.size()
286-
<< " " << configurations_.size();
287-
ScopeGuard sgDeviceWorkerThreads([&workerThreads]() {
288-
for (auto& workerThread : workerThreads) {
289-
workerThread.join();
283+
// Just spawn and join new threads for each iteration
284+
std::vector<std::thread> cpuCompilationThreads;
285+
cpuCompilationThreads.reserve(FLAGS_tuner_threads);
286+
ScopeGuard sgCompilationThreads([&cpuCompilationThreads]() {
287+
for (auto& cpuCompilationThread : cpuCompilationThreads) {
288+
cpuCompilationThread.join();
289+
}
290+
});
291+
for (size_t i = 0; i < FLAGS_tuner_threads; ++i) {
292+
cpuCompilationThreads.emplace_back(
293+
[this, &newCandidates]() { this->doCompile(newCandidates); });
290294
}
291-
});
292-
auto populationSize = candidates.size();
293-
for (auto device : devices) {
294-
workerThreads.emplace_back([this, device, populationSize, &printer]() {
295-
this->doEvaluate(device, populationSize, printer);
295+
296+
// Just spawn and join new threads for each device
297+
std::vector<std::thread> workerThreads;
298+
workerThreads.reserve(devices.size());
299+
LOG_IF(INFO, tc::FLAGS_debug_tuner)
300+
<< "Start evaluation: " << devices.size() << " "
301+
<< executors_.size() << " " << configurations_.size();
302+
ScopeGuard sgDeviceWorkerThreads([&workerThreads]() {
303+
for (auto& workerThread : workerThreads) {
304+
workerThread.join();
305+
}
296306
});
307+
for (auto device : devices) {
308+
workerThreads.emplace_back(
309+
[this, device, populationSize, &printer]() {
310+
this->doEvaluate(device, populationSize, printer);
311+
});
312+
}
297313
}
298314
}
299315
searchStrategy.finishStep(step);

tc/autotuner/genetic_search.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,6 @@ void GeneticSearch::generateSelectionPool() {
348348
if (resetPopulationIfNotEnoughCandidates()) {
349349
return;
350350
}
351-
breed();
352351
selectionPool.clear();
353352
selectionPool.emplace_back(make_unique<CandidateConfiguration>(lastBestConf));
354353
breed();
@@ -366,7 +365,8 @@ void GeneticSearch::selectSurvivors() {
366365
selectionPool.begin() + std::min(selectionPool.size(), maxPopulationSize),
367366
std::back_inserter(population),
368367
[](const std::unique_ptr<CandidateConfiguration>& c) {
369-
return make_unique<CandidateConfiguration>(c->configuration);
368+
CHECK(c);
369+
return make_unique<CandidateConfiguration>(*c);
370370
});
371371

372372
if (selectionPool.size() < maxPopulationSize) {

tc/core/utils/time.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ struct Duration {
7070
return lhs.val_ == rhs.val_;
7171
}
7272

73+
friend inline bool operator!=(const Duration& lhs, const Duration& rhs) {
74+
return lhs.val_ != rhs.val_;
75+
}
76+
7377
private:
7478
std::chrono::microseconds val_;
7579
};

0 commit comments

Comments
 (0)