Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 54a5b8c

Browse files
nicolasvasilacheftynse
authored andcommitted
Drop useless proto validation in benchmarks
1 parent a9057d9 commit 54a5b8c

File tree

1 file changed

+0
-114
lines changed

1 file changed

+0
-114
lines changed

tc/benchmarks/benchmark_fixture.h

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ DEFINE_bool(
5959
"Test on other platforms than we claim perf results for");
6060
DEFINE_bool(autotune, false, "Enable autotuning");
6161
DEFINE_string(save_tuner_proto_prefix, "/tmp", "Enable autotuning");
62-
DEFINE_bool(validate_proto, false, "whether to load options from proto");
6362

6463
struct Benchmark : public ::testing::Test {
6564
void SetUp() {
@@ -230,119 +229,6 @@ struct Benchmark : public ::testing::Test {
230229
std::cout << "\n---------------------------------------------------------";
231230
std::cout << "\n\n";
232231

233-
#undef GET_US
234-
}
235-
236-
// Will disappear soon
237-
public:
238-
void validateProto(
239-
std::string cacheFilename,
240-
const std::string& tc,
241-
const std::string& name,
242-
const std::vector<at::Tensor>& inputs,
243-
CheckFunction check_fun = [](const std::vector<at::Tensor>&,
244-
const std::vector<at::Tensor>&) {
245-
return true;
246-
}) {
247-
std::cout << "Validating proto from: "
248-
<< tc::makeOptionsFilename(cacheFilename) << std::endl;
249-
250-
using CudaOptionsCache =
251-
tc::autotune::Autotuner<tc::CudaBackend, tc::autotune::GeneticSearch>::
252-
OptionsCacheType;
253-
CudaOptionsCache optionsCache;
254-
optionsCache.loadCacheFromFile(cacheFilename + ".options");
255-
tc::FLAGS_tuner_gen_restore_number = 1;
256-
257-
auto mappingOptions = [&]() {
258-
auto inputDLTensors = tc::aten::makeDLConstTensors(inputs);
259-
auto outputDLTensors = tc::aten::inferOutputTensorInfo(tc, name, inputs);
260-
return optionsCache.getTopKOptions(
261-
lang::canonicalTc(tc),
262-
tc::makeTensorInfoVector(tc::extractRawPtrs(inputDLTensors)),
263-
tc::makeTensorInfoVector(tc::extractRawPtrs(outputDLTensors)),
264-
tc::CudaGPUInfo::GPUInfo().getCudaDeviceStr(),
265-
1);
266-
}();
267-
268-
CHECK_GT(mappingOptions.size(), 0)
269-
<< "No mapping options for " << tc << " in loaded cache";
270-
auto pExecutor =
271-
tc::aten::compile<tc::CudaBackend>(tc, name, inputs, mappingOptions[0]);
272-
auto outputs = tc::aten::prepareOutputs(tc, name, inputs);
273-
tc::aten::run(*pExecutor, inputs, outputs);
274-
EXPECT_TRUE(check_fun(inputs, outputs));
275-
for (size_t i = 1; i < tc::FLAGS_benchmark_warmup; ++i) {
276-
tc::aten::run(*pExecutor, inputs, outputs);
277-
}
278-
std::vector<tc::Duration> kernelTimes;
279-
kernelTimes.reserve(tc::FLAGS_benchmark_iterations);
280-
std::vector<tc::Duration> totalTimes;
281-
totalTimes.reserve(tc::FLAGS_benchmark_iterations);
282-
for (size_t i = 0; i < tc::FLAGS_benchmark_iterations; ++i) {
283-
auto timings = tc::aten::profile(*pExecutor, inputs, outputs);
284-
kernelTimes.push_back(timings.kernelRuntime);
285-
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
286-
auto start(std::chrono::system_clock::now());
287-
tc::aten::uncheckedRun(*pExecutor, inputs, outputs);
288-
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
289-
totalTimes.push_back(tc::Duration::since(start));
290-
}
291-
292-
auto p50idx = static_cast<int>(std::ceil(0.5 * kernelTimes.size()));
293-
auto p90idx = static_cast<int>(std::ceil(0.9 * kernelTimes.size()));
294-
auto p99idx = static_cast<int>(std::ceil(0.99 * kernelTimes.size()));
295-
296-
std::sort(kernelTimes.begin(), kernelTimes.end());
297-
#define GET_US(X) ((X)).toMicroSeconds()
298-
299-
std::cout << "\n---------------------------------------------------------";
300-
std::cout << "\n------------- AUTOTUNED VALIDATED KERNEL STATS ----------";
301-
std::cout << "\n------------------ " << tc::FLAGS_benchmark_iterations
302-
<< " ITERATIONS ----------------";
303-
std::cout << "\n---------------------------------------------------------";
304-
std::cout << "\n";
305-
std::cout
306-
<< "Min: " << GET_US(kernelTimes.front()) << "us, "
307-
<< "p50: "
308-
<< GET_US(kernelTimes.at(std::min(p50idx, (int)kernelTimes.size() - 1)))
309-
<< "us, "
310-
<< "p90: "
311-
<< GET_US(kernelTimes.at(std::min(p90idx, (int)kernelTimes.size() - 1)))
312-
<< "us, "
313-
<< "p99: "
314-
<< GET_US(kernelTimes.at(std::min(p99idx, (int)kernelTimes.size() - 1)))
315-
<< "us, "
316-
<< "Max: " << GET_US(kernelTimes.back()) << "us";
317-
std::cout << "\n---------------------------------------------------------";
318-
std::cout << "\n\n";
319-
320-
#undef GET_US
321-
322-
std::sort(totalTimes.begin(), totalTimes.end());
323-
#define GET_US(X) ((X)).toMicroSeconds()
324-
325-
std::cout << "\n---------------------------------------------------------";
326-
std::cout << "\n-------------- AUTOTUNED VALIDATED TOTAL STATS ----------";
327-
std::cout << "\n------------------ " << tc::FLAGS_benchmark_iterations
328-
<< " ITERATIONS ----------------";
329-
std::cout << "\n---------------------------------------------------------";
330-
std::cout << "\n";
331-
std::cout
332-
<< "Min: " << GET_US(totalTimes.front()) << "us, "
333-
<< "p50: "
334-
<< GET_US(totalTimes.at(std::min(p50idx, (int)totalTimes.size() - 1)))
335-
<< "us, "
336-
<< "p90: "
337-
<< GET_US(totalTimes.at(std::min(p90idx, (int)totalTimes.size() - 1)))
338-
<< "us, "
339-
<< "p99: "
340-
<< GET_US(totalTimes.at(std::min(p99idx, (int)totalTimes.size() - 1)))
341-
<< "us, "
342-
<< "Max: " << GET_US(totalTimes.back()) << "us";
343-
std::cout << "\n---------------------------------------------------------";
344-
std::cout << "\n\n";
345-
346232
#undef GET_US
347233
}
348234
};

0 commit comments

Comments
 (0)