Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 88b4f1a

Browse files
author
Theodoros Theodoridis
committed
[genetic search] Switch to (mu,lamda) selection
Previously each generation had mu candidates and generated mu children which all survided. This meant that really bad candidates that were randomly generated would survive across generations. With this change, lambda (typically larger thatn mu) children are generated and the best mu survive. The previous behaviour is a special case in which lambda = mu.
1 parent 36db8ea commit 88b4f1a

File tree

12 files changed

+168
-92
lines changed

12 files changed

+168
-92
lines changed

examples/tensordot.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ int main(int argc, char** argv) {
9696
::gflags::ParseCommandLineFlags(&argc, &argv, true);
9797
::google::InitGoogleLogging(argv[0]);
9898
setAtenSeed(tc::initRandomSeed(), at::Backend::CUDA);
99-
tc::FLAGS_tuner_gen_number_elites = FLAGS_number_elites;
10099
tc::FLAGS_tuner_gen_generations = FLAGS_generations;
101100
tc::FLAGS_tuner_gen_pop_size = FLAGS_pop_size;
102101
tc::FLAGS_tuner_threads = FLAGS_threads;

include/tc/autotuner/genetic_search.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ class GeneticSearch {
7070
size_t n,
7171
uint8_t crossOverRate,
7272
uint8_t mutationRate,
73-
size_t numberElites);
73+
size_t matingPoolSize,
74+
size_t selectionPoolSize);
7475

7576
/**
7677
* confs are used to seed the first generation, the rest of the population is
@@ -92,15 +93,22 @@ class GeneticSearch {
9293
size_t n,
9394
uint8_t crossOverRate,
9495
uint8_t mutationRate,
95-
size_t numberElites);
96+
size_t matingPoolSize,
97+
size_t selectionPoolSize);
9698

97-
void updateParameters();
99+
void generateSelectionPool();
100+
void selectSurvivors();
98101

99102
private:
100103
std::vector<TuningConfiguration> stochasticUniversalSampling(
101104
const std::vector<double>& fitness) const;
105+
102106
void breed();
103107

108+
void updateBestCandidate(const TuningConfiguration& c);
109+
110+
void resetPopulationIfNotEnoughCandidates();
111+
104112
TuningConfiguration crossover(
105113
TuningConfiguration&,
106114
TuningConfiguration&,
@@ -113,12 +121,13 @@ class GeneticSearch {
113121
using Population = std::vector<std::unique_ptr<CandidateConfiguration>>;
114122

115123
Population population;
124+
Population selectionPool;
116125
TuningConfiguration lastBestConf;
117126
const size_t kMaxPopulationSize;
118127
const size_t kMatingPoolSize;
128+
const size_t kSelectionPoolSize;
119129
const uint8_t kCrossOverRate;
120130
const uint8_t kMutationRate;
121-
const size_t kNumberElites;
122131

123132
/*
124133
* c++11 seeding is (apparently) not of the highest quality:

include/tc/autotuner/genetic_tuning_harness.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ class GeneticTunerHarness {
3838
size_t n,
3939
uint8_t crossoverRate,
4040
uint8_t mutationRate,
41-
size_t numberElites,
41+
size_t matingPoolSize,
42+
size_t selectionPoolSize,
4243
lang::TreeRef tc,
4344
std::string kernelName,
4445
const std::unordered_map<size_t, std::vector<const DLTensor*>>& inputs,
@@ -66,12 +67,16 @@ class GeneticTunerHarness {
6667
size_t bestTimeSoFar);
6768

6869
/// Helper function to delegate compiling on the cpu to different threads
69-
template <typename ExecutorType>
70-
void doCompile(ExecutorType& engine);
70+
template <typename ExecutorType, typename Population>
71+
void doCompile(ExecutorType& engine, Population& population);
7172

7273
/// Helper function to delegate running on the gpu to different threads
73-
template <typename ExecutorType>
74-
void doGpuWork(size_t gpu, ExecutorType& engine, Printer& printer);
74+
template <typename ExecutorType, typename Population>
75+
void doGpuWork(
76+
size_t gpu,
77+
ExecutorType& engine,
78+
Population& population,
79+
Printer& printer);
7580

7681
/// Make options from conf
7782
tc::CudaMappingOptions makeOptions(const CandidateConfiguration& conf);
@@ -90,7 +95,8 @@ class GeneticTunerHarness {
9095
const size_t kMaxPopulationSize;
9196
const uint8_t kCrossOverRate;
9297
const uint8_t kMutationRate;
93-
const size_t kNumberElites;
98+
const size_t kMatingPoolSize;
99+
const size_t kSelectionPoolSize;
94100

95101
TuningConfiguration configuration;
96102

include/tc/autotuner/utils/printer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ namespace autotune {
3333
class Printer {
3434
public:
3535
Printer(
36-
size_t generation,
36+
std::string prefix,
3737
size_t total,
3838
const std::atomic_size_t& currentCompilationJob,
3939
const std::atomic_size_t& numEvaluations);
@@ -47,7 +47,7 @@ class Printer {
4747
private:
4848
void printLoop();
4949

50-
size_t generation_;
50+
std::string prefix_;
5151
std::vector<Duration> runtimes_;
5252
mutable std::mutex runtimesMtx_;
5353

include/tc/core/flags.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,11 @@ DECLARE_uint32(benchmark_iterations);
4040

4141
// Used in autotuning
4242
DECLARE_uint32(tuner_gen_pop_size);
43+
DECLARE_uint32(tuner_gen_mating_pool_size);
44+
DECLARE_uint32(tuner_gen_selection_pool_size);
4345
DECLARE_uint32(tuner_gen_crossover_rate);
4446
DECLARE_uint32(tuner_gen_mutation_rate);
4547
DECLARE_uint32(tuner_gen_generations);
46-
DECLARE_uint32(tuner_gen_number_elites);
4748
DECLARE_uint32(tuner_threads);
4849
DECLARE_string(tuner_gpus);
4950
DECLARE_bool(tuner_print_best);

src/autotuner/genetic_autotuner.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ llvm::Optional<CudaMappingOptions> GeneticAutotuner::tune(
118118
FLAGS_tuner_gen_pop_size,
119119
FLAGS_tuner_gen_crossover_rate,
120120
FLAGS_tuner_gen_mutation_rate,
121-
FLAGS_tuner_gen_number_elites,
121+
FLAGS_tuner_gen_mating_pool_size,
122+
FLAGS_tuner_gen_selection_pool_size,
122123
tcNameMap_.at(tcName),
123124
tcName,
124125
inputs,

src/autotuner/genetic_search.cc

Lines changed: 75 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,8 @@ void dropInvalidConfigurations(GeneticSearch::Population& population) {
162162
} // namespace
163163

164164
#define VALIDATE() \
165-
CHECK_LT(kNumberElites, kMaxPopulationSize); \
165+
CHECK_LT(kMaxPopulationSize, kMatingPoolSize); \
166+
CHECK_LT(kMaxPopulationSize, kSelectionPoolSize); \
166167
CHECK(kMutationRate >= 0 and kMutationRate <= 100) \
167168
<< "the mutation rate (" << kMutationRate \
168169
<< ") should be in the [0,100] interval"; \
@@ -189,14 +190,15 @@ GeneticSearch::GeneticSearch(
189190
size_t n,
190191
uint8_t crossOverRate,
191192
uint8_t mutationRate,
192-
size_t numberElites)
193+
size_t matingPoolSize,
194+
size_t selectionPoolSize)
193195
: population(),
194196
lastBestConf(confs[0]),
195197
kMaxPopulationSize(n),
196-
kMatingPoolSize(n * 3),
198+
kMatingPoolSize(matingPoolSize),
199+
kSelectionPoolSize(selectionPoolSize),
197200
kCrossOverRate(crossOverRate),
198201
kMutationRate(mutationRate),
199-
kNumberElites(numberElites),
200202
rng{std::random_device{}()} {
201203
restoreRngState(rng);
202204
VALIDATE();
@@ -222,14 +224,15 @@ GeneticSearch::GeneticSearch(
222224
size_t n,
223225
uint8_t crossOverRate,
224226
uint8_t mutationRate,
225-
size_t numberElites)
227+
size_t matingPoolSize,
228+
size_t selectionPoolSize)
226229
: population(),
227230
lastBestConf(conf),
228231
kMaxPopulationSize(n),
229-
kMatingPoolSize(n * 3),
232+
kMatingPoolSize(matingPoolSize),
233+
kSelectionPoolSize(selectionPoolSize),
230234
kCrossOverRate(crossOverRate),
231235
kMutationRate(mutationRate),
232-
kNumberElites(numberElites),
233236
rng{std::random_device{}()} {
234237
restoreRngState(rng);
235238
VALIDATE();
@@ -301,13 +304,6 @@ void GeneticSearch::breed() {
301304
auto matingPool =
302305
stochasticUniversalSampling(computeAccumulatedFitness(population));
303306

304-
Population new_population;
305-
new_population.reserve(kMatingPoolSize);
306-
for (size_t c = 0; c < kNumberElites; ++c) {
307-
new_population.push_back(
308-
make_unique<CandidateConfiguration>(population.at(c)->configuration));
309-
}
310-
311307
auto select = [&]() -> TuningConfiguration& {
312308
auto idx = std::uniform_int_distribution<size_t>{
313309
size_t(0), matingPool.size() - 1}(rng);
@@ -323,45 +319,20 @@ void GeneticSearch::breed() {
323319
return dist(rng);
324320
};
325321

326-
while (new_population.size() < kMaxPopulationSize) {
322+
while (selectionPool.size() < kSelectionPoolSize) {
327323
if (shouldCrossOver()) {
328324
auto parent1 = select();
329325
auto parent2 = select();
330326
auto parent3 = select();
331-
new_population.emplace_back(make_unique<CandidateConfiguration>(
327+
selectionPool.emplace_back(make_unique<CandidateConfiguration>(
332328
crossover(parent1, parent2, parent3)));
333329
} else {
334-
new_population.emplace_back(
335-
make_unique<CandidateConfiguration>(select()));
330+
selectionPool.emplace_back(make_unique<CandidateConfiguration>(select()));
336331
}
337332
}
338-
population = std::move(new_population);
339333
}
340334

341-
void GeneticSearch::updateParameters() {
342-
dropInvalidConfigurations(population);
343-
344-
// Sort population before taking any decision
345-
std::sort(
346-
population.begin(),
347-
population.end(),
348-
[](const std::unique_ptr<CandidateConfiguration>& a,
349-
const std::unique_ptr<CandidateConfiguration>& b) {
350-
checkRuntimeRecorded(a->runtime);
351-
checkRuntimeRecorded(b->runtime);
352-
return a->runtime < b->runtime;
353-
});
354-
355-
// Update failsafe lastBestConf
356-
lastBestConf =
357-
population.size() > 0 ? population.front()->configuration : lastBestConf;
358-
if (FLAGS_tuner_print_best) {
359-
CudaMappingOptions options(
360-
CudaMappingOptions::makeSingleThreadCudaMappingOptions());
361-
lastBestConf.applyToCudaMappingOptions(options);
362-
LOG(INFO) << "Best so far:\n" << options;
363-
}
364-
335+
void GeneticSearch::resetPopulationIfNotEnoughCandidates() {
365336
if (population.size() < kMinCandidatesForBreeding) {
366337
LOG_IF(ERROR, FLAGS_debug_tuner)
367338
<< population.size() << " out of " << kMaxPopulationSize
@@ -380,12 +351,70 @@ void GeneticSearch::updateParameters() {
380351
// Don't lose the first one which was the best from before
381352
CHECK_LT(0, population.size());
382353
randomizePopulation(population.begin() + 1, population.end(), rng);
383-
return;
384354
}
355+
}
385356

357+
namespace {
358+
void sortByRuntime(GeneticSearch::Population& population) {
359+
std::sort(
360+
population.begin(),
361+
population.end(),
362+
[](const std::unique_ptr<CandidateConfiguration>& a,
363+
const std::unique_ptr<CandidateConfiguration>& b) {
364+
checkRuntimeRecorded(a->runtime);
365+
checkRuntimeRecorded(b->runtime);
366+
return a->runtime < b->runtime;
367+
});
368+
}
369+
} // namespace
370+
371+
void GeneticSearch::updateBestCandidate(const TuningConfiguration& c) {
372+
lastBestConf = c;
373+
if (FLAGS_tuner_print_best) {
374+
CudaMappingOptions options(
375+
CudaMappingOptions::makeSingleThreadCudaMappingOptions());
376+
lastBestConf.applyToCudaMappingOptions(options);
377+
LOG(INFO) << "Best so far:\n" << options;
378+
}
379+
}
380+
381+
void GeneticSearch::generateSelectionPool() {
382+
dropInvalidConfigurations(population);
383+
sortByRuntime(population);
384+
updateBestCandidate(
385+
population.size() > 0 ? population.front()->configuration : lastBestConf);
386+
resetPopulationIfNotEnoughCandidates();
386387
breed();
387-
for (int i = kNumberElites; i < population.size(); ++i) {
388-
mutate(*population[i], kMutationRate, kMutateIterations, rng);
388+
selectionPool.clear();
389+
selectionPool.emplace_back(make_unique<CandidateConfiguration>(lastBestConf));
390+
breed();
391+
for (size_t i = 1; i < selectionPool.size(); ++i) {
392+
mutate(*selectionPool[i], kMutationRate, kMutateIterations, rng);
393+
}
394+
}
395+
396+
void GeneticSearch::selectSurvivors() {
397+
dropInvalidConfigurations(selectionPool);
398+
sortByRuntime(selectionPool);
399+
population.clear();
400+
std::transform(
401+
selectionPool.begin(),
402+
selectionPool.begin() +
403+
std::min(selectionPool.size(), kMaxPopulationSize),
404+
std::back_inserter(population),
405+
[](const std::unique_ptr<CandidateConfiguration>& c) {
406+
return make_unique<CandidateConfiguration>(c->configuration);
407+
});
408+
409+
if (selectionPool.size() < kMaxPopulationSize) {
410+
auto numberMissing = kMaxPopulationSize - selectionPool.size();
411+
412+
for (size_t i = 0; i < numberMissing; ++i) {
413+
selectionPool.emplace_back(
414+
make_unique<CandidateConfiguration>(lastBestConf));
415+
}
416+
randomizePopulation(
417+
selectionPool.end() - numberMissing, selectionPool.end(), rng);
389418
}
390419
}
391420

0 commit comments

Comments
 (0)