Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 67b06ae

Browse files
Add the simplest possible C++ autotuning example
1 parent 9469abb commit 67b06ae

File tree

2 files changed

+123
-0
lines changed

2 files changed

+123
-0
lines changed

examples/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,24 @@ find_library(CUDA_CUDNN_LIBRARIES cudnn
1919
include_directories(${PROJECT_SOURCE_DIR}/third-party/googletest/googletest/include)
2020
set(GTEST_LIBS gtest gtest_main)
2121

22+
################################################################################
23+
# Simple Example
24+
################################################################################
25+
add_executable(example_simple example_simple.cc)
26+
add_test(example_simple example_simple)
27+
target_link_libraries(
28+
example_simple
29+
30+
tc_autotuner
31+
tc_core
32+
33+
${GTEST_LIBS}
34+
${GFLAGS_LIBRARIES}
35+
${GLOG_LIBRARIES}
36+
37+
${ATEN_LIBRARIES}
38+
)
39+
2240
################################################################################
2341
# Examples
2442
################################################################################

examples/example_simple.cc

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/**
2+
* Copyright (c) 2017-present, Facebook, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include <iostream>
17+
#include <string>
18+
#include <vector>
19+
20+
#include <gflags/gflags.h>
21+
#include <glog/logging.h>
22+
#include <gtest/gtest.h>
23+
24+
#include <ATen/ATen.h>
25+
26+
#include "tc/aten/aten_compiler.h"
27+
#include "tc/autotuner/genetic_autotuner_aten.h"
28+
#include "tc/core/cuda/cuda_mapping_options.h"
29+
#include "tc/core/flags.h"
30+
31+
#include "../test/test_harness_aten_cuda.h"
32+
33+
DEFINE_uint32(number_elites, 2, "Number of elites per generation");
34+
DEFINE_uint32(generations, 3, "Number of generations to tune for");
35+
DEFINE_uint32(pop_size, 10, "Population size to tune for");
36+
DEFINE_uint32(threads, 16, "Number of threads to tune with");
37+
DEFINE_string(gpus, "0", "List of gpus to evaluate on");
38+
39+
TEST(TensorDot, SimpleAutotune) {
40+
// 1. Define and setup the TC compilation unit with CUDA memory
41+
// management backed by ATen tensors.
42+
std::string tc = R"TC(
43+
def tensordot(float(N, C1, C2, H, W) I0,
44+
float(N, C2, C3, H, W) I1) -> (O)
45+
{
46+
O(n, c1, c3, h, w) +=! I0(n, c1, r_c2, h, w) * I1(n, r_c2, c3, h, w)
47+
}
48+
)TC";
49+
tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
50+
atCompl.define(tc);
51+
52+
// 2. Allocate tensors with random data.
53+
at::Tensor I0 = at::CUDA(at::kFloat).rand({16, 8, 16, 17, 25});
54+
at::Tensor I1 = at::CUDA(at::kFloat).rand({16, 16, 2, 17, 25});
55+
56+
// 3. Run autotuning with evolutionary search starting from a naive option.
57+
auto naiveOptions = tc::CudaMappingOptions::makeNaiveCudaMappingOptions();
58+
tc::autotune::GeneticAutotunerATen geneticAutotuneATen(tc);
59+
auto bestOption = geneticAutotuneATen.tune(
60+
"/tmp/save_results", "tensordot", {I0, I1}, naiveOptions);
61+
62+
// 4. Compile and run the TC with the best option.
63+
// Outputs get allocated; could also be pre-allocated and passed.
64+
auto handle = atCompl.compile("tensordot", {I0, I1}, bestOption.getValue());
65+
std::vector<at::Tensor> outputs;
66+
auto duration = atCompl.run("tensordot", {I0, I1}, outputs, handle, true);
67+
std::cout
68+
<< "tensordot size I0: " << I0.sizes() << ", "
69+
<< "size I1: " << I1.sizes() << " ran in: "
70+
<< std::chrono::duration_cast<std::chrono::microseconds>(duration).count()
71+
<< "us\n";
72+
73+
// 5. Optionally, perform precision checks against a ref. implementation.
74+
// TODO.
75+
76+
// 6. Reuse bestOptions from autotuning on another kernel
77+
for (auto sizes : std::vector<std::pair<at::IntList, at::IntList>>{
78+
{{4, 9, 7, 16, 14}, {4, 7, 3, 16, 14}},
79+
{{8, 5, 11, 10, 10}, {8, 11, 16, 10, 10}},
80+
}) {
81+
at::Tensor I0 = at::CUDA(at::kFloat).rand(sizes.first);
82+
at::Tensor I1 = at::CUDA(at::kFloat).rand(sizes.second);
83+
auto handle = atCompl.compile("tensordot", {I0, I1}, bestOption.getValue());
84+
std::vector<at::Tensor> outputs;
85+
auto duration = atCompl.run("tensordot", {I0, I1}, outputs, handle, true);
86+
std::cout << "tensordot size I0: " << I0.sizes() << ", "
87+
<< "size I1: " << I1.sizes() << " ran in: "
88+
<< std::chrono::duration_cast<std::chrono::microseconds>(duration)
89+
.count()
90+
<< "us\n";
91+
}
92+
}
93+
94+
int main(int argc, char** argv) {
95+
::testing::InitGoogleTest(&argc, argv);
96+
::gflags::ParseCommandLineFlags(&argc, &argv, true);
97+
::google::InitGoogleLogging(argv[0]);
98+
setAtenSeed(tc::initRandomSeed(), at::Backend::CUDA);
99+
tc::FLAGS_tuner_gen_number_elites = FLAGS_number_elites;
100+
tc::FLAGS_tuner_gen_generations = FLAGS_generations;
101+
tc::FLAGS_tuner_gen_pop_size = FLAGS_pop_size;
102+
tc::FLAGS_tuner_threads = FLAGS_threads;
103+
tc::FLAGS_tuner_gpus = FLAGS_gpus;
104+
return RUN_ALL_TESTS();
105+
}

0 commit comments

Comments
 (0)