Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 44641c9

Browse files
author
Theodoros Theodoridis
committed
[genetic tuning] Use cached runtimes if available
Some candidates survive across across generations. Their runtimes are stored in the options cache. Previously those candidates would be benchmarked everytime they were encountered. Now the runtimes are restored from the caches instead.
1 parent a202c48 commit 44641c9

File tree

1 file changed

+80
-42
lines changed

1 file changed

+80
-42
lines changed

src/autotuner/genetic_tuning_harness.cc

Lines changed: 80 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,39 @@ void GeneticTunerHarness::doCompile(
346346
}
347347
}
348348

349+
namespace {
350+
std::vector<const DLTensor*> toConstDlpackTensors(
351+
const std::vector<DLTensor*>& v) {
352+
std::vector<const DLTensor*> out(v.begin(), v.end());
353+
return out;
354+
}
355+
} // namespace
356+
357+
template <typename ExecutorType>
358+
std::vector<Duration> retrieveCachedRuntimes(
359+
ExecutorType& engine,
360+
const std::string& id,
361+
const std::vector<const DLTensor*>& inputs,
362+
const std::vector<DLTensor*>& outputs,
363+
const CudaMappingOptions& options) {
364+
if (not OptionsCache::cacheEnabled()) {
365+
return {};
366+
}
367+
auto cache = OptionsCache::getCache();
368+
auto allResults = cache->retrieveOptionsAndRuntimes(
369+
id, inputs, toConstDlpackTensors(outputs));
370+
auto wantedResult = std::find_if(
371+
allResults.begin(),
372+
allResults.end(),
373+
[&options](const OptionsCache::RetrievalResult& r) {
374+
return r.options == options;
375+
});
376+
if (wantedResult == allResults.end()) {
377+
return {};
378+
}
379+
return wantedResult->recordedRuntimes;
380+
}
381+
349382
template <typename ExecutorType, typename Population>
350383
void GeneticTunerHarness::doGpuWork(
351384
size_t gpu,
@@ -399,51 +432,56 @@ void GeneticTunerHarness::doGpuWork(
399432
LOG_LINE_BY_LINE(INFO, ssInfo);
400433
}
401434

402-
std::vector<Duration> runtimes;
403-
try {
404-
size_t bestTimeSoFar;
405-
{
406-
std::lock_guard<std::mutex> lock(bestTimeMtx_);
407-
bestTimeSoFar = bestTime_;
408-
}
409-
auto prune =
410-
warmupOrPrune(engine, outputs, inputs, handle, bestTimeSoFar);
411-
if (prune) {
435+
auto runtimes =
436+
retrieveCachedRuntimes(engine, kKernelName_, inputs, outputs, options);
437+
if (runtimes.empty()) {
438+
try {
439+
size_t bestTimeSoFar;
440+
{
441+
std::lock_guard<std::mutex> lock(bestTimeMtx_);
442+
bestTimeSoFar = bestTime_;
443+
}
444+
auto prune =
445+
warmupOrPrune(engine, outputs, inputs, handle, bestTimeSoFar);
446+
if (prune) {
447+
pConf->invalid = true;
448+
continue;
449+
} else {
450+
runtimes.reserve(kReducedBenchmarkIterations);
451+
for (size_t i = 0; i < kReducedBenchmarkIterations; ++i) {
452+
runtimes.push_back(engine.run(handle, inputs, outputs, true));
453+
}
454+
engine.clear(handle);
455+
}
456+
} catch (std::exception& e) {
457+
if (FLAGS_debug_tuner) {
458+
LOG(WARNING) << "Runtime error gpu " << gpu << ": " << e.what();
459+
std::stringstream ssWarning;
460+
CudaMappingOptionsCppPrinter warningPrinter(ssWarning);
461+
warningPrinter << options;
462+
LOG(WARNING) << "Aborted execution on gpu " << gpu;
463+
LOG_LINE_BY_LINE(WARNING, ssWarning);
464+
}
465+
while (cudaGetLastError() != cudaSuccess) {
466+
// In case of errors in the generated, we cannot rely on deviceReset
467+
// to set the GPU in a clean state. So instead we just pop and discard
468+
// all the errors accumulated on the GPU until we get to a clean slate
469+
// (i.e. cudaSuccess).
470+
;
471+
}
472+
try {
473+
// Some errors, such as illegal memory access, cannot be recovered
474+
// from without a cudaDeviceReset (i.e. because user protection) In
475+
// those cases we have no choice than to fail hard.
476+
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
477+
} catch (const std::exception& e) {
478+
LOG(FATAL) << "[CUDA][FATAL] cuda error on gpu " << gpu << ": "
479+
<< e.what() << "\n"
480+
<< CudaMappingOptionsAsCpp(options);
481+
}
412482
pConf->invalid = true;
413483
continue;
414-
} else {
415-
runtimes.reserve(kReducedBenchmarkIterations);
416-
for (size_t i = 0; i < kReducedBenchmarkIterations; ++i) {
417-
runtimes.push_back(engine.run(handle, inputs, outputs, true));
418-
}
419-
engine.clear(handle);
420484
}
421-
} catch (std::exception& e) {
422-
LOG(WARNING) << "Runtime error gpu " << gpu << ": " << e.what();
423-
std::stringstream ssWarning;
424-
CudaMappingOptionsCppPrinter warningPrinter(ssWarning);
425-
warningPrinter << options;
426-
LOG(WARNING) << "Aborted execution on gpu " << gpu;
427-
LOG_LINE_BY_LINE(WARNING, ssWarning);
428-
while (cudaGetLastError() != cudaSuccess) {
429-
// In case of errors in the generated, we cannot rely on deviceReset to
430-
// set the GPU in a clean state. So instead we just pop and discard all
431-
// the errors accumulated on the GPU until we get to a clean slate
432-
// (i.e. cudaSuccess).
433-
;
434-
}
435-
try {
436-
// Some errors, such as illegal memory access, cannot be recovered from
437-
// without a cudaDeviceReset (i.e. because user protection)
438-
// In those cases we have no choice than to fail hard.
439-
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
440-
} catch (const std::exception& e) {
441-
LOG(FATAL) << "[CUDA][FATAL] cuda error on gpu " << gpu << ": "
442-
<< e.what() << "\n"
443-
<< CudaMappingOptionsAsCpp(options);
444-
}
445-
pConf->invalid = true;
446-
continue;
447485
}
448486

449487
auto prof = median(runtimes);

0 commit comments

Comments
 (0)