Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 627be38

Browse files
author
Theodoros Theodoridis
committed
[genetic tuning] Use cached runtimes if available
Some candidates survive across across generations. Their runtimes are stored in the options cache. Previously those candidates would be benchmarked everytime they were encountered. Now the runtimes are restored from the caches instead.
1 parent 629b232 commit 627be38

File tree

1 file changed

+80
-44
lines changed

1 file changed

+80
-44
lines changed

src/autotuner/genetic_tuning_harness.cc

Lines changed: 80 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,39 @@ void GeneticTunerHarness::doCompile(
347347
}
348348
}
349349

350+
namespace {
351+
std::vector<const DLTensor*> toConstDlpackTensors(
352+
const std::vector<DLTensor*>& v) {
353+
std::vector<const DLTensor*> out(v.begin(), v.end());
354+
return out;
355+
}
356+
} // namespace
357+
358+
template <typename ExecutorType>
359+
std::vector<Duration> retrieveCachedRuntimes(
360+
ExecutorType& engine,
361+
const std::string& id,
362+
const std::vector<const DLTensor*>& inputs,
363+
const std::vector<DLTensor*>& outputs,
364+
const MappingOptions& options) {
365+
if (not OptionsCache::cacheEnabled()) {
366+
return {};
367+
}
368+
auto cache = OptionsCache::getCache();
369+
auto allResults = cache->retrieveOptionsAndRuntimes(
370+
id, inputs, toConstDlpackTensors(outputs));
371+
auto wantedResult = std::find_if(
372+
allResults.begin(),
373+
allResults.end(),
374+
[&options](const OptionsCache::RetrievalResult& r) {
375+
return r.options == options;
376+
});
377+
if (wantedResult == allResults.end()) {
378+
return {};
379+
}
380+
return wantedResult->recordedRuntimes;
381+
}
382+
350383
template <typename ExecutorType, typename Population>
351384
void GeneticTunerHarness::doGpuWork(
352385
size_t gpu,
@@ -400,53 +433,56 @@ void GeneticTunerHarness::doGpuWork(
400433
LOG_LINE_BY_LINE(INFO, ssInfo);
401434
}
402435

403-
std::vector<Duration> runtimes;
404-
try {
405-
size_t bestTimeSoFar;
406-
{
407-
std::lock_guard<std::mutex> lock(bestTimeMtx_);
408-
bestTimeSoFar = bestTime_;
409-
}
410-
auto prune =
411-
warmupOrPrune(engine, outputs, inputs, handle, bestTimeSoFar);
412-
if (prune) {
436+
auto runtimes =
437+
retrieveCachedRuntimes(engine, kKernelName_, inputs, outputs, options);
438+
if (runtimes.empty()) {
439+
try {
440+
size_t bestTimeSoFar;
441+
{
442+
std::lock_guard<std::mutex> lock(bestTimeMtx_);
443+
bestTimeSoFar = bestTime_;
444+
}
445+
auto prune =
446+
warmupOrPrune(engine, outputs, inputs, handle, bestTimeSoFar);
447+
if (prune) {
448+
pConf->invalid = true;
449+
continue;
450+
} else {
451+
runtimes.reserve(kReducedBenchmarkIterations);
452+
for (size_t i = 0; i < kReducedBenchmarkIterations; ++i) {
453+
runtimes.push_back(engine.run(handle, inputs, outputs, true));
454+
}
455+
engine.clear(handle);
456+
}
457+
} catch (std::exception& e) {
458+
if (FLAGS_debug_tuner) {
459+
LOG(WARNING) << "Runtime error gpu " << gpu << ": " << e.what();
460+
std::stringstream ssWarning;
461+
MappingOptionsCppPrinter warningPrinter(ssWarning);
462+
warningPrinter << options;
463+
LOG(WARNING) << "Aborted execution on gpu " << gpu;
464+
LOG_LINE_BY_LINE(WARNING, ssWarning);
465+
}
466+
while (cudaGetLastError() != cudaSuccess) {
467+
// In case of errors in the generated, we cannot rely on deviceReset
468+
// to set the GPU in a clean state. So instead we just pop and discard
469+
// all the errors accumulated on the GPU until we get to a clean slate
470+
// (i.e. cudaSuccess).
471+
;
472+
}
473+
try {
474+
// Some errors, such as illegal memory access, cannot be recovered
475+
// from without a cudaDeviceReset (i.e. because user protection) In
476+
// those cases we have no choice than to fail hard.
477+
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
478+
} catch (const std::exception& e) {
479+
LOG(FATAL) << "[CUDA][FATAL] cuda error on gpu " << gpu << ": "
480+
<< e.what() << "\n"
481+
<< MappingOptionsAsCpp(options);
482+
}
413483
pConf->invalid = true;
414484
continue;
415-
} else {
416-
runtimes.reserve(kReducedBenchmarkIterations);
417-
for (size_t i = 0; i < kReducedBenchmarkIterations; ++i) {
418-
runtimes.push_back(engine.run(handle, inputs, outputs, true));
419-
}
420-
engine.clear(handle);
421-
}
422-
} catch (std::exception& e) {
423-
if (FLAGS_debug_tuner) {
424-
LOG(WARNING) << "Runtime error gpu " << gpu << ": " << e.what();
425-
std::stringstream ssWarning;
426-
MappingOptionsCppPrinter warningPrinter(ssWarning);
427-
warningPrinter << options;
428-
LOG(WARNING) << "Aborted execution on gpu " << gpu;
429-
LOG_LINE_BY_LINE(WARNING, ssWarning);
430-
}
431-
while (cudaGetLastError() != cudaSuccess) {
432-
// In case of errors in the generated, we cannot rely on deviceReset to
433-
// set the GPU in a clean state. So instead we just pop and discard all
434-
// the errors accumulated on the GPU until we get to a clean slate
435-
// (i.e. cudaSuccess).
436-
;
437485
}
438-
try {
439-
// Some errors, such as illegal memory access, cannot be recovered from
440-
// without a cudaDeviceReset (i.e. because user protection)
441-
// In those cases we have no choice than to fail hard.
442-
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
443-
} catch (const std::exception& e) {
444-
LOG(FATAL) << "[CUDA][FATAL] cuda error on gpu " << gpu << ": "
445-
<< e.what() << "\n"
446-
<< MappingOptionsAsCpp(options);
447-
}
448-
pConf->invalid = true;
449-
continue;
450486
}
451487

452488
auto prof = median(runtimes);

0 commit comments

Comments
 (0)