@@ -345,6 +345,39 @@ void GeneticTunerHarness::doCompile(
345
345
}
346
346
}
347
347
348
+ namespace {
349
+ std::vector<const DLTensor*> toConstDlpackTensors (
350
+ const std::vector<DLTensor*>& v) {
351
+ std::vector<const DLTensor*> out (v.begin (), v.end ());
352
+ return out;
353
+ }
354
+ } // namespace
355
+
356
+ template <typename ExecutorType>
357
+ std::vector<Duration> retrieveCachedRuntimes (
358
+ ExecutorType& engine,
359
+ const std::string& id,
360
+ const std::vector<const DLTensor*>& inputs,
361
+ const std::vector<DLTensor*>& outputs,
362
+ const MappingOptions& options) {
363
+ if (not OptionsCache::cacheEnabled ()) {
364
+ return {};
365
+ }
366
+ auto cache = OptionsCache::getCache ();
367
+ auto allResults = cache->retrieveOptionsAndRuntimes (
368
+ id, inputs, toConstDlpackTensors (outputs));
369
+ auto wantedResult = std::find_if (
370
+ allResults.begin (),
371
+ allResults.end (),
372
+ [&options](const OptionsCache::RetrievalResult& r) {
373
+ return r.options == options;
374
+ });
375
+ if (wantedResult == allResults.end ()) {
376
+ return {};
377
+ }
378
+ return wantedResult->recordedRuntimes ;
379
+ }
380
+
348
381
template <typename ExecutorType, typename Population>
349
382
void GeneticTunerHarness::doGpuWork (
350
383
size_t gpu,
@@ -398,51 +431,56 @@ void GeneticTunerHarness::doGpuWork(
398
431
LOG_LINE_BY_LINE (INFO, ssInfo);
399
432
}
400
433
401
- std::vector<Duration> runtimes;
402
- try {
403
- size_t bestTimeSoFar;
404
- {
405
- std::lock_guard<std::mutex> lock (bestTimeMtx_);
406
- bestTimeSoFar = bestTime_;
407
- }
408
- auto prune =
409
- warmupOrPrune (engine, outputs, inputs, handle, bestTimeSoFar);
410
- if (prune) {
434
+ auto runtimes =
435
+ retrieveCachedRuntimes (engine, kKernelName_ , inputs, outputs, options);
436
+ if (runtimes.empty ()) {
437
+ try {
438
+ size_t bestTimeSoFar;
439
+ {
440
+ std::lock_guard<std::mutex> lock (bestTimeMtx_);
441
+ bestTimeSoFar = bestTime_;
442
+ }
443
+ auto prune =
444
+ warmupOrPrune (engine, outputs, inputs, handle, bestTimeSoFar);
445
+ if (prune) {
446
+ pConf->invalid = true ;
447
+ continue ;
448
+ } else {
449
+ runtimes.reserve (kReducedBenchmarkIterations );
450
+ for (size_t i = 0 ; i < kReducedBenchmarkIterations ; ++i) {
451
+ runtimes.push_back (engine.run (handle, inputs, outputs, true ));
452
+ }
453
+ engine.clear (handle);
454
+ }
455
+ } catch (std::exception& e) {
456
+ if (FLAGS_debug_tuner) {
457
+ LOG (WARNING) << " Runtime error gpu " << gpu << " : " << e.what ();
458
+ std::stringstream ssWarning;
459
+ MappingOptionsCppPrinter warningPrinter (ssWarning);
460
+ warningPrinter << options;
461
+ LOG (WARNING) << " Aborted execution on gpu " << gpu;
462
+ LOG_LINE_BY_LINE (WARNING, ssWarning);
463
+ }
464
+ while (cudaGetLastError () != cudaSuccess) {
465
+ // In case of errors in the generated, we cannot rely on deviceReset
466
+ // to set the GPU in a clean state. So instead we just pop and discard
467
+ // all the errors accumulated on the GPU until we get to a clean slate
468
+ // (i.e. cudaSuccess).
469
+ ;
470
+ }
471
+ try {
472
+ // Some errors, such as illegal memory access, cannot be recovered
473
+ // from without a cudaDeviceReset (i.e. because user protection) In
474
+ // those cases we have no choice than to fail hard.
475
+ TC_CUDA_RUNTIMEAPI_ENFORCE (cudaDeviceSynchronize ());
476
+ } catch (const std::exception& e) {
477
+ LOG (FATAL) << " [CUDA][FATAL] cuda error on gpu " << gpu << " : "
478
+ << e.what () << " \n "
479
+ << MappingOptionsAsCpp (options);
480
+ }
411
481
pConf->invalid = true ;
412
482
continue ;
413
- } else {
414
- runtimes.reserve (kReducedBenchmarkIterations );
415
- for (size_t i = 0 ; i < kReducedBenchmarkIterations ; ++i) {
416
- runtimes.push_back (engine.run (handle, inputs, outputs, true ));
417
- }
418
- engine.clear (handle);
419
483
}
420
- } catch (std::exception& e) {
421
- LOG (WARNING) << " Runtime error gpu " << gpu << " : " << e.what ();
422
- std::stringstream ssWarning;
423
- MappingOptionsCppPrinter warningPrinter (ssWarning);
424
- warningPrinter << options;
425
- LOG (WARNING) << " Aborted execution on gpu " << gpu;
426
- LOG_LINE_BY_LINE (WARNING, ssWarning);
427
- while (cudaGetLastError () != cudaSuccess) {
428
- // In case of errors in the generated, we cannot rely on deviceReset to
429
- // set the GPU in a clean state. So instead we just pop and discard all
430
- // the errors accumulated on the GPU until we get to a clean slate
431
- // (i.e. cudaSuccess).
432
- ;
433
- }
434
- try {
435
- // Some errors, such as illegal memory access, cannot be recovered from
436
- // without a cudaDeviceReset (i.e. because user protection)
437
- // In those cases we have no choice than to fail hard.
438
- TC_CUDA_RUNTIMEAPI_ENFORCE (cudaDeviceSynchronize ());
439
- } catch (const std::exception& e) {
440
- LOG (FATAL) << " [CUDA][FATAL] cuda error on gpu " << gpu << " : "
441
- << e.what () << " \n "
442
- << MappingOptionsAsCpp (options);
443
- }
444
- pConf->invalid = true ;
445
- continue ;
446
484
}
447
485
448
486
auto prof = median (runtimes);
0 commit comments