Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit b6bcef2

Browse files
author
Theodoros Theodoridis
committed
[genetic tuning] Create jobs only for new candidates
Candidates that survive across generations need not be benchmarked again and thus no compilation and gpu jobs have to be created for them.
1 parent 02a7aaa commit b6bcef2

File tree

2 files changed

+68
-48
lines changed

2 files changed

+68
-48
lines changed

src/autotuner/genetic_search.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,8 @@ void GeneticSearch::selectSurvivors() {
402402
std::min(selectionPool.size(), kMaxPopulationSize),
403403
std::back_inserter(population),
404404
[](const std::unique_ptr<CandidateConfiguration>& c) {
405-
return make_unique<CandidateConfiguration>(c->configuration);
405+
CHECK(c);
406+
return make_unique<CandidateConfiguration>(*c);
406407
});
407408

408409
if (selectionPool.size() < kMaxPopulationSize) {

src/autotuner/genetic_tuning_harness.cc

Lines changed: 66 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ void GeneticTunerHarness::doCompile(
313313
if (current >= population.size()) {
314314
break;
315315
}
316+
316317
auto& pConf = population.at(current);
317318
auto options = makeOptions(*pConf);
318319
try {
@@ -431,7 +432,7 @@ void GeneticTunerHarness::doGpuWork(
431432
LOG_LINE_BY_LINE(INFO, ssInfo);
432433
}
433434

434-
auto runtimes =
435+
std::vector<Duration> runtimes =
435436
retrieveCachedRuntimes(engine, kKernelName_, inputs, outputs, options);
436437
if (runtimes.empty()) {
437438
try {
@@ -525,59 +526,77 @@ void GeneticTunerHarness::runOneGeneration(size_t generation) {
525526

526527
auto setUpJobsAndRun = [&](GeneticSearch::Population& population,
527528
const std::string& printerText) {
528-
// Initialize for this round
529-
currentCompilationJob_.store(0);
530-
numEvaluations_.store(0);
531-
readyToEvaluate_.resize(0);
532-
for (size_t i = 0; i < population.size(); ++i) {
533-
readyToEvaluate_.emplace_back();
534-
readyToEvaluate_[i].store(false);
535-
}
536-
Printer printer(
537-
printerText,
538-
readyToEvaluate_.size(),
539-
currentCompilationJob_,
540-
numEvaluations_);
541-
auto logGenerations = FLAGS_tuner_gen_log_generations;
542-
ScopeGuard sgPrinter([logGenerations, &printer]() {
543-
printer.stop();
544-
if (logGenerations) {
545-
printer.printAll();
546-
}
547-
});
548-
549-
// Just spawn and join new threads for each generation
550-
std::vector<std::thread> cpuCompilationThreads;
551-
cpuCompilationThreads.reserve(FLAGS_tuner_threads);
552-
ScopeGuard sgCompilationThreads([&cpuCompilationThreads]() {
553-
for (auto& cpuCompilationThread : cpuCompilationThreads) {
554-
cpuCompilationThread.join();
529+
// Most candidates should have been evaluated during the previous
530+
// generation's selection phase.
531+
// There are two exceptions:
532+
// 1) the 1st generation
533+
// 2) too many invalid configurations were previously encounted and the
534+
// valid ones were not enough to form a new generation.
535+
auto firstNew = std::partition(
536+
population.begin(),
537+
population.end(),
538+
[](const std::unique_ptr<CandidateConfiguration>& c) {
539+
return c->runtime != Duration::zero();
540+
});
541+
GeneticSearch::Population newCandidates(
542+
std::distance(firstNew, population.end()));
543+
std::move(firstNew, population.end(), newCandidates.begin());
544+
{
545+
// Initialize for this round
546+
currentCompilationJob_.store(0);
547+
numEvaluations_.store(0);
548+
readyToEvaluate_.resize(0);
549+
for (size_t i = 0; i < newCandidates.size(); ++i) {
550+
readyToEvaluate_.emplace_back();
551+
readyToEvaluate_[i].store(false);
555552
}
556-
});
557-
for (int i = 0; i < FLAGS_tuner_threads; ++i) {
558-
cpuCompilationThreads.emplace_back([this, &engine, &population]() {
559-
this->doCompile(engine, population);
553+
Printer printer(
554+
printerText,
555+
readyToEvaluate_.size(),
556+
currentCompilationJob_,
557+
numEvaluations_);
558+
auto logGenerations = FLAGS_tuner_gen_log_generations;
559+
ScopeGuard sgPrinter([logGenerations, &printer]() {
560+
printer.stop();
561+
if (logGenerations) {
562+
printer.printAll();
563+
}
560564
});
561-
}
562565

563-
// Just spawn and join new threads for each generation
564-
std::vector<std::thread> gpuWorkerThreads;
565-
gpuWorkerThreads.reserve(gpus.size());
566-
ScopeGuard sgGpuWorkerThreads([&gpuWorkerThreads]() {
567-
for (auto& gpuWorkerThread : gpuWorkerThreads) {
568-
gpuWorkerThread.join();
566+
// Just spawn and join new threads for each generation
567+
std::vector<std::thread> cpuCompilationThreads;
568+
cpuCompilationThreads.reserve(FLAGS_tuner_threads);
569+
ScopeGuard sgCompilationThreads([&cpuCompilationThreads]() {
570+
for (auto& cpuCompilationThread : cpuCompilationThreads) {
571+
cpuCompilationThread.join();
572+
}
573+
});
574+
for (int i = 0; i < FLAGS_tuner_threads; ++i) {
575+
cpuCompilationThreads.emplace_back([this, &engine, &newCandidates]() {
576+
this->doCompile(engine, newCandidates);
577+
});
578+
}
579+
580+
// Just spawn and join new threads for each generation
581+
std::vector<std::thread> gpuWorkerThreads;
582+
gpuWorkerThreads.reserve(gpus.size());
583+
ScopeGuard sgGpuWorkerThreads([&gpuWorkerThreads]() {
584+
for (auto& gpuWorkerThread : gpuWorkerThreads) {
585+
gpuWorkerThread.join();
586+
}
587+
});
588+
for (auto gpu : gpus) {
589+
gpuWorkerThreads.emplace_back(
590+
[this, gpu, &engine, &newCandidates, &printer]() {
591+
this->doGpuWork(gpu, engine, newCandidates, printer);
592+
});
569593
}
570-
});
571-
for (auto gpu : gpus) {
572-
gpuWorkerThreads.emplace_back(
573-
[this, gpu, &engine, &population, &printer]() {
574-
this->doGpuWork(gpu, engine, population, printer);
575-
});
576594
}
577595
// At this point everything is synchronized because out of scope, done
596+
std::move(newCandidates.begin(), newCandidates.end(), firstNew);
578597
};
579-
std::cout << "Generation " << generation << std::endl;
580-
setUpJobsAndRun(tuner_->population, "Population");
598+
std::cout << "Generation " << generation << ':' << std::endl;
599+
setUpJobsAndRun(tuner_->population, "New Candidates");
581600
tuner_->generateSelectionPool();
582601
setUpJobsAndRun(tuner_->selectionPool, "Selection Pool");
583602
tuner_->selectSurvivors();

0 commit comments

Comments
 (0)