Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit aa13b92

Browse files
author
Theodoros Theodoridis
committed
[genetic tuning] Create jobs only for new candidates
Candidates that survive across generations need not be benchmarked again and thus no compilation and gpu jobs have to be created for them.
1 parent 44641c9 commit aa13b92

File tree

2 files changed

+68
-48
lines changed

2 files changed

+68
-48
lines changed

src/autotuner/genetic_search.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,8 @@ void GeneticSearch::selectSurvivors() {
403403
std::min(selectionPool.size(), kMaxPopulationSize),
404404
std::back_inserter(population),
405405
[](const std::unique_ptr<CandidateConfiguration>& c) {
406-
return make_unique<CandidateConfiguration>(c->configuration);
406+
CHECK(c);
407+
return make_unique<CandidateConfiguration>(*c);
407408
});
408409

409410
if (selectionPool.size() < kMaxPopulationSize) {

src/autotuner/genetic_tuning_harness.cc

Lines changed: 66 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ void GeneticTunerHarness::doCompile(
314314
if (current >= population.size()) {
315315
break;
316316
}
317+
317318
auto& pConf = population.at(current);
318319
auto options = makeOptions(*pConf);
319320
try {
@@ -432,7 +433,7 @@ void GeneticTunerHarness::doGpuWork(
432433
LOG_LINE_BY_LINE(INFO, ssInfo);
433434
}
434435

435-
auto runtimes =
436+
std::vector<Duration> runtimes =
436437
retrieveCachedRuntimes(engine, kKernelName_, inputs, outputs, options);
437438
if (runtimes.empty()) {
438439
try {
@@ -526,59 +527,77 @@ void GeneticTunerHarness::runOneGeneration(size_t generation) {
526527

527528
auto setUpJobsAndRun = [&](GeneticSearch::Population& population,
528529
const std::string& printerText) {
529-
// Initialize for this round
530-
currentCompilationJob_.store(0);
531-
numEvaluations_.store(0);
532-
readyToEvaluate_.resize(0);
533-
for (size_t i = 0; i < population.size(); ++i) {
534-
readyToEvaluate_.emplace_back();
535-
readyToEvaluate_[i].store(false);
536-
}
537-
Printer printer(
538-
printerText,
539-
readyToEvaluate_.size(),
540-
currentCompilationJob_,
541-
numEvaluations_);
542-
auto logGenerations = FLAGS_tuner_gen_log_generations;
543-
ScopeGuard sgPrinter([logGenerations, &printer]() {
544-
printer.stop();
545-
if (logGenerations) {
546-
printer.printAll();
547-
}
548-
});
549-
550-
// Just spawn and join new threads for each generation
551-
std::vector<std::thread> cpuCompilationThreads;
552-
cpuCompilationThreads.reserve(FLAGS_tuner_threads);
553-
ScopeGuard sgCompilationThreads([&cpuCompilationThreads]() {
554-
for (auto& cpuCompilationThread : cpuCompilationThreads) {
555-
cpuCompilationThread.join();
530+
// Most candidates should have been evaluated during the previous
531+
// generation's selection phase.
532+
// There are two exceptions:
533+
// 1) the 1st generation
534+
// 2) too many invalid configurations were previously encounted and the
535+
// valid ones were not enough to form a new generation.
536+
auto firstNew = std::partition(
537+
population.begin(),
538+
population.end(),
539+
[](const std::unique_ptr<CandidateConfiguration>& c) {
540+
return c->runtime != Duration::zero();
541+
});
542+
GeneticSearch::Population newCandidates(
543+
std::distance(firstNew, population.end()));
544+
std::move(firstNew, population.end(), newCandidates.begin());
545+
{
546+
// Initialize for this round
547+
currentCompilationJob_.store(0);
548+
numEvaluations_.store(0);
549+
readyToEvaluate_.resize(0);
550+
for (size_t i = 0; i < newCandidates.size(); ++i) {
551+
readyToEvaluate_.emplace_back();
552+
readyToEvaluate_[i].store(false);
556553
}
557-
});
558-
for (int i = 0; i < FLAGS_tuner_threads; ++i) {
559-
cpuCompilationThreads.emplace_back([this, &engine, &population]() {
560-
this->doCompile(engine, population);
554+
Printer printer(
555+
printerText,
556+
readyToEvaluate_.size(),
557+
currentCompilationJob_,
558+
numEvaluations_);
559+
auto logGenerations = FLAGS_tuner_gen_log_generations;
560+
ScopeGuard sgPrinter([logGenerations, &printer]() {
561+
printer.stop();
562+
if (logGenerations) {
563+
printer.printAll();
564+
}
561565
});
562-
}
563566

564-
// Just spawn and join new threads for each generation
565-
std::vector<std::thread> gpuWorkerThreads;
566-
gpuWorkerThreads.reserve(gpus.size());
567-
ScopeGuard sgGpuWorkerThreads([&gpuWorkerThreads]() {
568-
for (auto& gpuWorkerThread : gpuWorkerThreads) {
569-
gpuWorkerThread.join();
567+
// Just spawn and join new threads for each generation
568+
std::vector<std::thread> cpuCompilationThreads;
569+
cpuCompilationThreads.reserve(FLAGS_tuner_threads);
570+
ScopeGuard sgCompilationThreads([&cpuCompilationThreads]() {
571+
for (auto& cpuCompilationThread : cpuCompilationThreads) {
572+
cpuCompilationThread.join();
573+
}
574+
});
575+
for (int i = 0; i < FLAGS_tuner_threads; ++i) {
576+
cpuCompilationThreads.emplace_back([this, &engine, &newCandidates]() {
577+
this->doCompile(engine, newCandidates);
578+
});
579+
}
580+
581+
// Just spawn and join new threads for each generation
582+
std::vector<std::thread> gpuWorkerThreads;
583+
gpuWorkerThreads.reserve(gpus.size());
584+
ScopeGuard sgGpuWorkerThreads([&gpuWorkerThreads]() {
585+
for (auto& gpuWorkerThread : gpuWorkerThreads) {
586+
gpuWorkerThread.join();
587+
}
588+
});
589+
for (auto gpu : gpus) {
590+
gpuWorkerThreads.emplace_back(
591+
[this, gpu, &engine, &newCandidates, &printer]() {
592+
this->doGpuWork(gpu, engine, newCandidates, printer);
593+
});
570594
}
571-
});
572-
for (auto gpu : gpus) {
573-
gpuWorkerThreads.emplace_back(
574-
[this, gpu, &engine, &population, &printer]() {
575-
this->doGpuWork(gpu, engine, population, printer);
576-
});
577595
}
578596
// At this point everything is synchronized because out of scope, done
597+
std::move(newCandidates.begin(), newCandidates.end(), firstNew);
579598
};
580-
std::cout << "Generation " << generation << std::endl;
581-
setUpJobsAndRun(tuner_->population, "Population");
599+
std::cout << "Generation " << generation << ':' << std::endl;
600+
setUpJobsAndRun(tuner_->population, "New Candidates");
582601
tuner_->generateSelectionPool();
583602
setUpJobsAndRun(tuner_->selectionPool, "Selection Pool");
584603
tuner_->selectSurvivors();

0 commit comments

Comments
 (0)