Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 02a7aaa

Browse files
author
Theodoros Theodoridis
committed
[genetic tuning] Use cached runtimes if available
Some candidates survive across across generations. Their runtimes are stored in the options cache. Previously those candidates would be benchmarked everytime they were encountered. Now the runtimes are restored from the caches instead.
1 parent 5007ea4 commit 02a7aaa

File tree

1 file changed

+80
-42
lines changed

1 file changed

+80
-42
lines changed

src/autotuner/genetic_tuning_harness.cc

Lines changed: 80 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,39 @@ void GeneticTunerHarness::doCompile(
345345
}
346346
}
347347

348+
namespace {
349+
std::vector<const DLTensor*> toConstDlpackTensors(
350+
const std::vector<DLTensor*>& v) {
351+
std::vector<const DLTensor*> out(v.begin(), v.end());
352+
return out;
353+
}
354+
} // namespace
355+
356+
template <typename ExecutorType>
357+
std::vector<Duration> retrieveCachedRuntimes(
358+
ExecutorType& engine,
359+
const std::string& id,
360+
const std::vector<const DLTensor*>& inputs,
361+
const std::vector<DLTensor*>& outputs,
362+
const MappingOptions& options) {
363+
if (not OptionsCache::cacheEnabled()) {
364+
return {};
365+
}
366+
auto cache = OptionsCache::getCache();
367+
auto allResults = cache->retrieveOptionsAndRuntimes(
368+
id, inputs, toConstDlpackTensors(outputs));
369+
auto wantedResult = std::find_if(
370+
allResults.begin(),
371+
allResults.end(),
372+
[&options](const OptionsCache::RetrievalResult& r) {
373+
return r.options == options;
374+
});
375+
if (wantedResult == allResults.end()) {
376+
return {};
377+
}
378+
return wantedResult->recordedRuntimes;
379+
}
380+
348381
template <typename ExecutorType, typename Population>
349382
void GeneticTunerHarness::doGpuWork(
350383
size_t gpu,
@@ -398,51 +431,56 @@ void GeneticTunerHarness::doGpuWork(
398431
LOG_LINE_BY_LINE(INFO, ssInfo);
399432
}
400433

401-
std::vector<Duration> runtimes;
402-
try {
403-
size_t bestTimeSoFar;
404-
{
405-
std::lock_guard<std::mutex> lock(bestTimeMtx_);
406-
bestTimeSoFar = bestTime_;
407-
}
408-
auto prune =
409-
warmupOrPrune(engine, outputs, inputs, handle, bestTimeSoFar);
410-
if (prune) {
434+
auto runtimes =
435+
retrieveCachedRuntimes(engine, kKernelName_, inputs, outputs, options);
436+
if (runtimes.empty()) {
437+
try {
438+
size_t bestTimeSoFar;
439+
{
440+
std::lock_guard<std::mutex> lock(bestTimeMtx_);
441+
bestTimeSoFar = bestTime_;
442+
}
443+
auto prune =
444+
warmupOrPrune(engine, outputs, inputs, handle, bestTimeSoFar);
445+
if (prune) {
446+
pConf->invalid = true;
447+
continue;
448+
} else {
449+
runtimes.reserve(kReducedBenchmarkIterations);
450+
for (size_t i = 0; i < kReducedBenchmarkIterations; ++i) {
451+
runtimes.push_back(engine.run(handle, inputs, outputs, true));
452+
}
453+
engine.clear(handle);
454+
}
455+
} catch (std::exception& e) {
456+
if (FLAGS_debug_tuner) {
457+
LOG(WARNING) << "Runtime error gpu " << gpu << ": " << e.what();
458+
std::stringstream ssWarning;
459+
MappingOptionsCppPrinter warningPrinter(ssWarning);
460+
warningPrinter << options;
461+
LOG(WARNING) << "Aborted execution on gpu " << gpu;
462+
LOG_LINE_BY_LINE(WARNING, ssWarning);
463+
}
464+
while (cudaGetLastError() != cudaSuccess) {
465+
// In case of errors in the generated, we cannot rely on deviceReset
466+
// to set the GPU in a clean state. So instead we just pop and discard
467+
// all the errors accumulated on the GPU until we get to a clean slate
468+
// (i.e. cudaSuccess).
469+
;
470+
}
471+
try {
472+
// Some errors, such as illegal memory access, cannot be recovered
473+
// from without a cudaDeviceReset (i.e. because user protection) In
474+
// those cases we have no choice than to fail hard.
475+
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
476+
} catch (const std::exception& e) {
477+
LOG(FATAL) << "[CUDA][FATAL] cuda error on gpu " << gpu << ": "
478+
<< e.what() << "\n"
479+
<< MappingOptionsAsCpp(options);
480+
}
411481
pConf->invalid = true;
412482
continue;
413-
} else {
414-
runtimes.reserve(kReducedBenchmarkIterations);
415-
for (size_t i = 0; i < kReducedBenchmarkIterations; ++i) {
416-
runtimes.push_back(engine.run(handle, inputs, outputs, true));
417-
}
418-
engine.clear(handle);
419483
}
420-
} catch (std::exception& e) {
421-
LOG(WARNING) << "Runtime error gpu " << gpu << ": " << e.what();
422-
std::stringstream ssWarning;
423-
MappingOptionsCppPrinter warningPrinter(ssWarning);
424-
warningPrinter << options;
425-
LOG(WARNING) << "Aborted execution on gpu " << gpu;
426-
LOG_LINE_BY_LINE(WARNING, ssWarning);
427-
while (cudaGetLastError() != cudaSuccess) {
428-
// In case of errors in the generated, we cannot rely on deviceReset to
429-
// set the GPU in a clean state. So instead we just pop and discard all
430-
// the errors accumulated on the GPU until we get to a clean slate
431-
// (i.e. cudaSuccess).
432-
;
433-
}
434-
try {
435-
// Some errors, such as illegal memory access, cannot be recovered from
436-
// without a cudaDeviceReset (i.e. because user protection)
437-
// In those cases we have no choice than to fail hard.
438-
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
439-
} catch (const std::exception& e) {
440-
LOG(FATAL) << "[CUDA][FATAL] cuda error on gpu " << gpu << ": "
441-
<< e.what() << "\n"
442-
<< MappingOptionsAsCpp(options);
443-
}
444-
pConf->invalid = true;
445-
continue;
446484
}
447485

448486
auto prof = median(runtimes);

0 commit comments

Comments
 (0)