@@ -347,6 +347,39 @@ void GeneticTunerHarness::doCompile(
347
347
}
348
348
}
349
349
350
+ namespace {
351
+ std::vector<const DLTensor*> toConstDlpackTensors (
352
+ const std::vector<DLTensor*>& v) {
353
+ std::vector<const DLTensor*> out (v.begin (), v.end ());
354
+ return out;
355
+ }
356
+ } // namespace
357
+
358
+ template <typename ExecutorType>
359
+ std::vector<Duration> retrieveCachedRuntimes (
360
+ ExecutorType& engine,
361
+ const std::string& id,
362
+ const std::vector<const DLTensor*>& inputs,
363
+ const std::vector<DLTensor*>& outputs,
364
+ const MappingOptions& options) {
365
+ if (not OptionsCache::cacheEnabled ()) {
366
+ return {};
367
+ }
368
+ auto cache = OptionsCache::getCache ();
369
+ auto allResults = cache->retrieveOptionsAndRuntimes (
370
+ id, inputs, toConstDlpackTensors (outputs));
371
+ auto wantedResult = std::find_if (
372
+ allResults.begin (),
373
+ allResults.end (),
374
+ [&options](const OptionsCache::RetrievalResult& r) {
375
+ return r.options == options;
376
+ });
377
+ if (wantedResult == allResults.end ()) {
378
+ return {};
379
+ }
380
+ return wantedResult->recordedRuntimes ;
381
+ }
382
+
350
383
template <typename ExecutorType, typename Population>
351
384
void GeneticTunerHarness::doGpuWork (
352
385
size_t gpu,
@@ -400,53 +433,56 @@ void GeneticTunerHarness::doGpuWork(
400
433
LOG_LINE_BY_LINE (INFO, ssInfo);
401
434
}
402
435
403
- std::vector<Duration> runtimes;
404
- try {
405
- size_t bestTimeSoFar;
406
- {
407
- std::lock_guard<std::mutex> lock (bestTimeMtx_);
408
- bestTimeSoFar = bestTime_;
409
- }
410
- auto prune =
411
- warmupOrPrune (engine, outputs, inputs, handle, bestTimeSoFar);
412
- if (prune) {
436
+ auto runtimes =
437
+ retrieveCachedRuntimes (engine, kKernelName_ , inputs, outputs, options);
438
+ if (runtimes.empty ()) {
439
+ try {
440
+ size_t bestTimeSoFar;
441
+ {
442
+ std::lock_guard<std::mutex> lock (bestTimeMtx_);
443
+ bestTimeSoFar = bestTime_;
444
+ }
445
+ auto prune =
446
+ warmupOrPrune (engine, outputs, inputs, handle, bestTimeSoFar);
447
+ if (prune) {
448
+ pConf->invalid = true ;
449
+ continue ;
450
+ } else {
451
+ runtimes.reserve (kReducedBenchmarkIterations );
452
+ for (size_t i = 0 ; i < kReducedBenchmarkIterations ; ++i) {
453
+ runtimes.push_back (engine.run (handle, inputs, outputs, true ));
454
+ }
455
+ engine.clear (handle);
456
+ }
457
+ } catch (std::exception& e) {
458
+ if (FLAGS_debug_tuner) {
459
+ LOG (WARNING) << " Runtime error gpu " << gpu << " : " << e.what ();
460
+ std::stringstream ssWarning;
461
+ MappingOptionsCppPrinter warningPrinter (ssWarning);
462
+ warningPrinter << options;
463
+ LOG (WARNING) << " Aborted execution on gpu " << gpu;
464
+ LOG_LINE_BY_LINE (WARNING, ssWarning);
465
+ }
466
+ while (cudaGetLastError () != cudaSuccess) {
467
+ // In case of errors in the generated, we cannot rely on deviceReset
468
+ // to set the GPU in a clean state. So instead we just pop and discard
469
+ // all the errors accumulated on the GPU until we get to a clean slate
470
+ // (i.e. cudaSuccess).
471
+ ;
472
+ }
473
+ try {
474
+ // Some errors, such as illegal memory access, cannot be recovered
475
+ // from without a cudaDeviceReset (i.e. because user protection) In
476
+ // those cases we have no choice than to fail hard.
477
+ TC_CUDA_RUNTIMEAPI_ENFORCE (cudaDeviceSynchronize ());
478
+ } catch (const std::exception& e) {
479
+ LOG (FATAL) << " [CUDA][FATAL] cuda error on gpu " << gpu << " : "
480
+ << e.what () << " \n "
481
+ << MappingOptionsAsCpp (options);
482
+ }
413
483
pConf->invalid = true ;
414
484
continue ;
415
- } else {
416
- runtimes.reserve (kReducedBenchmarkIterations );
417
- for (size_t i = 0 ; i < kReducedBenchmarkIterations ; ++i) {
418
- runtimes.push_back (engine.run (handle, inputs, outputs, true ));
419
- }
420
- engine.clear (handle);
421
- }
422
- } catch (std::exception& e) {
423
- if (FLAGS_debug_tuner) {
424
- LOG (WARNING) << " Runtime error gpu " << gpu << " : " << e.what ();
425
- std::stringstream ssWarning;
426
- MappingOptionsCppPrinter warningPrinter (ssWarning);
427
- warningPrinter << options;
428
- LOG (WARNING) << " Aborted execution on gpu " << gpu;
429
- LOG_LINE_BY_LINE (WARNING, ssWarning);
430
- }
431
- while (cudaGetLastError () != cudaSuccess) {
432
- // In case of errors in the generated, we cannot rely on deviceReset to
433
- // set the GPU in a clean state. So instead we just pop and discard all
434
- // the errors accumulated on the GPU until we get to a clean slate
435
- // (i.e. cudaSuccess).
436
- ;
437
485
}
438
- try {
439
- // Some errors, such as illegal memory access, cannot be recovered from
440
- // without a cudaDeviceReset (i.e. because user protection)
441
- // In those cases we have no choice than to fail hard.
442
- TC_CUDA_RUNTIMEAPI_ENFORCE (cudaDeviceSynchronize ());
443
- } catch (const std::exception& e) {
444
- LOG (FATAL) << " [CUDA][FATAL] cuda error on gpu " << gpu << " : "
445
- << e.what () << " \n "
446
- << MappingOptionsAsCpp (options);
447
- }
448
- pConf->invalid = true ;
449
- continue ;
450
486
}
451
487
452
488
auto prof = median (runtimes);
0 commit comments