Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 0336ede

Browse files
author
Theodoros Theodoridis
committed
[genetic tuning] Create jobs only for new candidates
Candidates that survive across generations need not be benchmarked again and thus no compilation and gpu jobs have to be created for them.
1 parent f21e7c3 commit 0336ede

File tree

2 files changed

+68
-48
lines changed

2 files changed

+68
-48
lines changed

src/autotuner/genetic_search.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,8 @@ void GeneticSearch::selectSurvivors() {
402402
std::min(selectionPool.size(), kMaxPopulationSize),
403403
std::back_inserter(population),
404404
[](const std::unique_ptr<CandidateConfiguration>& c) {
405-
return make_unique<CandidateConfiguration>(c->configuration);
405+
CHECK(c);
406+
return make_unique<CandidateConfiguration>(*c);
406407
});
407408

408409
if (selectionPool.size() < kMaxPopulationSize) {

src/autotuner/genetic_tuning_harness.cc

Lines changed: 66 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ void GeneticTunerHarness::doCompile(
313313
if (current >= population.size()) {
314314
break;
315315
}
316+
316317
auto& pConf = population.at(current);
317318
auto options = makeOptions(*pConf);
318319
try {
@@ -433,7 +434,7 @@ void GeneticTunerHarness::doGpuWork(
433434
LOG_LINE_BY_LINE(INFO, ssInfo);
434435
}
435436

436-
auto runtimes =
437+
std::vector<Duration> runtimes =
437438
retrieveCachedRuntimes(engine, kKernelName_, inputs, outputs, options);
438439
if (runtimes.empty()) {
439440
try {
@@ -527,59 +528,77 @@ void GeneticTunerHarness::runOneGeneration(size_t generation) {
527528

528529
auto setUpJobsAndRun = [&](GeneticSearch::Population& population,
529530
const std::string& printerText) {
530-
// Initialize for this round
531-
currentCompilationJob_.store(0);
532-
numEvaluations_.store(0);
533-
readyToEvaluate_.resize(0);
534-
for (size_t i = 0; i < population.size(); ++i) {
535-
readyToEvaluate_.emplace_back();
536-
readyToEvaluate_[i].store(false);
537-
}
538-
Printer printer(
539-
printerText,
540-
readyToEvaluate_.size(),
541-
currentCompilationJob_,
542-
numEvaluations_);
543-
auto logGenerations = FLAGS_tuner_gen_log_generations;
544-
ScopeGuard sgPrinter([logGenerations, &printer]() {
545-
printer.stop();
546-
if (logGenerations) {
547-
printer.printAll();
548-
}
549-
});
550-
551-
// Just spawn and join new threads for each generation
552-
std::vector<std::thread> cpuCompilationThreads;
553-
cpuCompilationThreads.reserve(FLAGS_tuner_threads);
554-
ScopeGuard sgCompilationThreads([&cpuCompilationThreads]() {
555-
for (auto& cpuCompilationThread : cpuCompilationThreads) {
556-
cpuCompilationThread.join();
531+
// Most candidates should have been evaluated during the previous
532+
// generation's selection phase.
533+
// There are two exceptions:
534+
// 1) the 1st generation
535+
// 2) too many invalid configurations were previously encounted and the
536+
// valid ones were not enough to form a new generation.
537+
auto firstNew = std::partition(
538+
population.begin(),
539+
population.end(),
540+
[](const std::unique_ptr<CandidateConfiguration>& c) {
541+
return c->runtime != Duration::zero();
542+
});
543+
GeneticSearch::Population newCandidates(
544+
std::distance(firstNew, population.end()));
545+
std::move(firstNew, population.end(), newCandidates.begin());
546+
{
547+
// Initialize for this round
548+
currentCompilationJob_.store(0);
549+
numEvaluations_.store(0);
550+
readyToEvaluate_.resize(0);
551+
for (size_t i = 0; i < newCandidates.size(); ++i) {
552+
readyToEvaluate_.emplace_back();
553+
readyToEvaluate_[i].store(false);
557554
}
558-
});
559-
for (int i = 0; i < FLAGS_tuner_threads; ++i) {
560-
cpuCompilationThreads.emplace_back([this, &engine, &population]() {
561-
this->doCompile(engine, population);
555+
Printer printer(
556+
printerText,
557+
readyToEvaluate_.size(),
558+
currentCompilationJob_,
559+
numEvaluations_);
560+
auto logGenerations = FLAGS_tuner_gen_log_generations;
561+
ScopeGuard sgPrinter([logGenerations, &printer]() {
562+
printer.stop();
563+
if (logGenerations) {
564+
printer.printAll();
565+
}
562566
});
563-
}
564567

565-
// Just spawn and join new threads for each generation
566-
std::vector<std::thread> gpuWorkerThreads;
567-
gpuWorkerThreads.reserve(gpus.size());
568-
ScopeGuard sgGpuWorkerThreads([&gpuWorkerThreads]() {
569-
for (auto& gpuWorkerThread : gpuWorkerThreads) {
570-
gpuWorkerThread.join();
568+
// Just spawn and join new threads for each generation
569+
std::vector<std::thread> cpuCompilationThreads;
570+
cpuCompilationThreads.reserve(FLAGS_tuner_threads);
571+
ScopeGuard sgCompilationThreads([&cpuCompilationThreads]() {
572+
for (auto& cpuCompilationThread : cpuCompilationThreads) {
573+
cpuCompilationThread.join();
574+
}
575+
});
576+
for (int i = 0; i < FLAGS_tuner_threads; ++i) {
577+
cpuCompilationThreads.emplace_back([this, &engine, &newCandidates]() {
578+
this->doCompile(engine, newCandidates);
579+
});
580+
}
581+
582+
// Just spawn and join new threads for each generation
583+
std::vector<std::thread> gpuWorkerThreads;
584+
gpuWorkerThreads.reserve(gpus.size());
585+
ScopeGuard sgGpuWorkerThreads([&gpuWorkerThreads]() {
586+
for (auto& gpuWorkerThread : gpuWorkerThreads) {
587+
gpuWorkerThread.join();
588+
}
589+
});
590+
for (auto gpu : gpus) {
591+
gpuWorkerThreads.emplace_back(
592+
[this, gpu, &engine, &newCandidates, &printer]() {
593+
this->doGpuWork(gpu, engine, newCandidates, printer);
594+
});
571595
}
572-
});
573-
for (auto gpu : gpus) {
574-
gpuWorkerThreads.emplace_back(
575-
[this, gpu, &engine, &population, &printer]() {
576-
this->doGpuWork(gpu, engine, population, printer);
577-
});
578596
}
579597
// At this point everything is synchronized because out of scope, done
598+
std::move(newCandidates.begin(), newCandidates.end(), firstNew);
580599
};
581-
std::cout << "Generation " << generation << std::endl;
582-
setUpJobsAndRun(tuner_->population, "Population");
600+
std::cout << "Generation " << generation << ':' << std::endl;
601+
setUpJobsAndRun(tuner_->population, "New Candidates");
583602
tuner_->generateSelectionPool();
584603
setUpJobsAndRun(tuner_->selectionPool, "Selection Pool");
585604
tuner_->selectSurvivors();

0 commit comments

Comments
 (0)