Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 43697eb

Browse files
author
Theodoros Theodoridis
committed
[Autotuner] Add support for multi-step search
The previous implementation assumed that a search strategy's iteration consist of only one step, however it is possible that multiple steps that depend on each other (and are not full "iterations") are necessary.
1 parent 34542a5 commit 43697eb

File tree

5 files changed

+27
-10
lines changed

5 files changed

+27
-10
lines changed

tc/autotuner/autotuner-inl.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,16 @@ TuningHarness<Backend>::bestMappingOptions() const {
7979
}
8080

8181
template <typename Backend>
82-
template <typename SearchStrategy>
83-
void TuningHarness<Backend>::doCompile(SearchStrategy& searchStrategy) {
82+
template <typename Candidates>
83+
void TuningHarness<Backend>::doCompile(Candidates& candidates) {
8484
// Atomically fetch and add the next job until there are no jobs left
8585
while (true) {
8686
auto current = currentCompilationJob_.fetch_add(1);
87-
if (current >= searchStrategy.population.size()) {
87+
if (current >= candidates.size()) {
8888
break;
8989
}
9090
std::unique_ptr<typename Backend::ExecutorType> pExecutor(nullptr);
91-
auto pConf = searchStrategy.population.at(current).get();
91+
auto pConf = candidates.at(current).get();
9292
auto options = makeOptions<Backend>(baseMapping_, *pConf);
9393
try {
9494
if (FLAGS_debug_tuner) {
@@ -245,14 +245,15 @@ void TuningHarness<Backend>::runOneIteration(
245245
auto devices = detail::parseDevices<Backend>(FLAGS_tuner_devices);
246246
CHECK(executors_.empty());
247247
CHECK(configurations_.empty());
248-
249-
{
248+
for (uint64_t step = 0; step < searchStrategy.stepsPerIteration; ++step) {
249+
auto& candidates = searchStrategy.candidatesOfStep(step);
250250
// Initialize for this round
251251
currentCompilationJob_.store(0);
252252
numEvaluations_.store(0);
253253
Printer printer(
254254
iteration,
255-
searchStrategy.population.size(),
255+
step,
256+
candidates.size(),
256257
currentCompilationJob_,
257258
numEvaluations_);
258259
auto logIterations = FLAGS_tuner_gen_log_generations;
@@ -273,7 +274,7 @@ void TuningHarness<Backend>::runOneIteration(
273274
});
274275
for (size_t i = 0; i < FLAGS_tuner_threads; ++i) {
275276
cpuCompilationThreads.emplace_back(
276-
[this, &searchStrategy]() { this->doCompile(searchStrategy); });
277+
[this, &candidates]() { this->doCompile(candidates); });
277278
}
278279

279280
// Just spawn and join new threads for each device
@@ -287,12 +288,13 @@ void TuningHarness<Backend>::runOneIteration(
287288
workerThread.join();
288289
}
289290
});
290-
auto populationSize = searchStrategy.population.size();
291+
auto populationSize = candidates.size();
291292
for (auto device : devices) {
292293
workerThreads.emplace_back([this, device, populationSize, &printer]() {
293294
this->doEvaluate(device, populationSize, printer);
294295
});
295296
}
297+
searchStrategy.finishStep(step);
296298
}
297299

298300
// At this point everything is synchronized because out of scope, done

tc/autotuner/genetic_search.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,13 @@ void GeneticSearch::updateParameters() {
307307
}
308308
}
309309

310+
GeneticSearch::Population& GeneticSearch::candidatesOfStep(uint64_t step) {
311+
if (step != 0) {
312+
throw std::invalid_argument("GeneticSearch has only one step");
313+
}
314+
return population;
315+
}
316+
310317
} // namespace autotune
311318
} // namespace tc
312319

tc/autotuner/genetic_search.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,17 @@ class GeneticSearch {
9292

9393
using Population = std::vector<std::unique_ptr<CandidateConfiguration>>;
9494

95+
Population& candidatesOfStep(uint64_t);
96+
void finishStep(uint64_t) {}
97+
9598
Population population;
9699
TuningConfiguration lastBestConf;
97100
const size_t numGenerations;
98101
const size_t maxPopulationSize;
99102
const uint8_t crossOverRate;
100103
const uint8_t mutationRate;
101104
const size_t numberElites;
105+
const size_t stepsPerIteration = 1;
102106

103107
/*
104108
* c++11 seeding is (apparently) not of the highest quality:

tc/autotuner/utils.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ void Printer::printLoop() {
6464
std::this_thread::sleep_for(std::chrono::seconds(1));
6565

6666
std::stringstream ss;
67-
ss << "Iteration " << iteration_;
67+
ss << "Iteration.Step " << iteration_ << '.' << step_;
6868
ss << "\tJobs(Compiled, Evaluated)/total ("
6969
<< std::min(total_, currentCompilationJob_.load()) << ", "
7070
<< std::min(total_, numEvaluations_.load()) << ")/" << total_;
@@ -100,10 +100,12 @@ void Printer::printLoop() {
100100

101101
Printer::Printer(
102102
size_t iteration,
103+
size_t step,
103104
size_t total,
104105
const std::atomic_size_t& currentCompilationJob,
105106
const std::atomic_size_t& numEvaluations)
106107
: iteration_(iteration),
108+
step_(step),
107109
printerThread_([this]() { printLoop(); }),
108110
total_(total),
109111
currentCompilationJob_(currentCompilationJob),

tc/autotuner/utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class Printer {
4747
public:
4848
Printer(
4949
size_t iteration,
50+
size_t step,
5051
size_t total,
5152
const std::atomic_size_t& currentCompilationJob,
5253
const std::atomic_size_t& numEvaluations);
@@ -61,6 +62,7 @@ class Printer {
6162
void printLoop();
6263

6364
size_t iteration_;
65+
size_t step_;
6466
std::vector<Duration> runtimes_;
6567
mutable std::mutex runtimesMtx_;
6668

0 commit comments

Comments
 (0)