Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 8b31fcc

Browse files
Theodoros TheodoridisSven Verdoolaege
authored andcommitted
GeneticTunerHarness: do not convert durations to size_t
1 parent ce8d2f2 commit 8b31fcc

File tree

2 files changed

+15
-13
lines changed

2 files changed

+15
-13
lines changed

tc/autotuner/genetic_tuning_harness.cc

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,9 @@ std::vector<size_t> parseGpus() {
208208
LOG(GSTREAM) << line; \
209209
}
210210

211+
constexpr size_t GeneticTunerHarness::kEarlyPruneFactor;
212+
constexpr size_t GeneticTunerHarness::kCatastrophicPerfFactor;
213+
211214
// This function is ran on a single pre-determined GPU, in a single thread
212215
// It takes the input/output DLTensor objects that reside on that GPU
213216
//
@@ -222,7 +225,7 @@ bool GeneticTunerHarness::warmupOrPrune(
222225
const std::vector<DLTensor*>& outputs,
223226
const std::vector<const DLTensor*>& inputs,
224227
size_t handle,
225-
size_t bestTimeSoFar) {
228+
Duration bestTimeSoFar) {
226229
// Pruning based on number of threads: if you don't hit at least k warps
227230
// (default k = 8; 256 total threads, controlled by
228231
// FLAGS_tuner_min_launch_total_threads) then it's likely the kernel is not
@@ -276,10 +279,8 @@ bool GeneticTunerHarness::warmupOrPrune(
276279
}
277280

278281
// 1.b.
279-
constexpr size_t kCatastrophicPerfFactor = 100;
280-
if (bestTimeSoFar < std::numeric_limits<size_t>::max() and
281-
prof >= std::chrono::microseconds(
282-
(kCatastrophicPerfFactor * bestTimeSoFar))) {
282+
if (bestTimeSoFar < Duration::max() and
283+
prof >= kCatastrophicPerfFactor * bestTimeSoFar) {
283284
return true;
284285
}
285286

@@ -291,8 +292,8 @@ bool GeneticTunerHarness::warmupOrPrune(
291292
// 2. After reasonable warmup, look at the performance and prune with
292293
// kEarlyPruneFactor
293294
prof = engine.run(handle, inputs, outputs, true);
294-
if (bestTimeSoFar < std::numeric_limits<size_t>::max() and
295-
prof >= std::chrono::microseconds((kEarlyPruneFactor * bestTimeSoFar))) {
295+
if (bestTimeSoFar < Duration::max() and
296+
prof >= kEarlyPruneFactor * bestTimeSoFar) {
296297
return true;
297298
}
298299

@@ -394,7 +395,7 @@ void GeneticTunerHarness::doGpuWork(
394395

395396
std::vector<Duration> runtimes;
396397
try {
397-
size_t bestTimeSoFar;
398+
Duration bestTimeSoFar;
398399
{
399400
std::lock_guard<std::mutex> lock(bestTimeMtx_);
400401
bestTimeSoFar = bestTime_;
@@ -451,8 +452,8 @@ void GeneticTunerHarness::doGpuWork(
451452
// Save best time under lock
452453
{
453454
std::lock_guard<std::mutex> lock(bestTimeMtx_);
454-
if (prof_us < bestTime_) {
455-
bestTime_ = prof_us;
455+
if (prof < bestTime_) {
456+
bestTime_ = prof;
456457
bestCudaMappingOptions_ = options;
457458
}
458459
}

tc/autotuner/genetic_tuning_harness.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class GeneticTunerHarness {
6363
const std::vector<DLTensor*>& outputs,
6464
const std::vector<const DLTensor*>& inputs,
6565
size_t handle,
66-
size_t bestTimeSoFar);
66+
Duration bestTimeSoFar);
6767

6868
/// Helper function to delegate compiling on the cpu to different threads
6969
template <typename ExecutorType>
@@ -85,7 +85,8 @@ class GeneticTunerHarness {
8585
public:
8686
static constexpr int kReducedWarmupIterations = 2;
8787
static constexpr int kReducedBenchmarkIterations = 10;
88-
static constexpr int kEarlyPruneFactor = 5;
88+
static constexpr size_t kEarlyPruneFactor = 5;
89+
static constexpr size_t kCatastrophicPerfFactor = 100;
8990

9091
const size_t kMaxPopulationSize;
9192
const uint8_t kCrossOverRate;
@@ -96,7 +97,7 @@ class GeneticTunerHarness {
9697

9798
private:
9899
std::mutex bestTimeMtx_;
99-
size_t bestTime_ = std::numeric_limits<size_t>::max();
100+
Duration bestTime_ = Duration::max();
100101
CudaMappingOptions bestCudaMappingOptions_;
101102

102103
const lang::TreeRef kTc_;

0 commit comments

Comments
 (0)