Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 674ed01

Browse files
[C++ API] Step 11: Swap in the new TC C++ API
This replaces the old implementation with the new refactored API, updates tests and removes dead code. Dead files will be removed in the next commit. This also introduces a stub CPU example that can be run as follows: build$ make -j 16 && tc/examples/tensordot --tuner_threads=1 --tuner_gen_pop_size=1 --tuner_gen_generations=1 --tuner_gen_number_elites=0 --debug_tuner=1 --gtest_filter="*CPU*" and prints Note: Google Test filter = *CPU* [==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from TensorDotCPU [ RUN ] TensorDotCPU.SimpleAutotune NYI: CpuBackend::compileWithTcMapper NYI: CpuTcExecutor::CpuTcExecutor setup RTC NYI: CpuTuningHarness::warmupOrPrune NYI: CpuTcExecutor::profileUnchecked NYI: CpuTcExecutor::profileUnchecked NYI: CpuTcExecutor::profileUnchecked NYI: CpuTcExecutor::profileUnchecked NYI: CpuTcExecutor::profileUnchecked NYI: CpuTcExecutor::profileUnchecked NYI: CpuTcExecutor::profileUnchecked NYI: CpuTcExecutor::profileUnchecked NYI: CpuTcExecutor::profileUnchecked NYI: CpuTcExecutor::profileUnchecked Iteration 0 Jobs(Compiled, Evaluated)/total (1, 1)/1 (best/median/worst)us: 999999999999/999999999999/999999999999 NYI: CpuBackend::compileWithTcMapper NYI: CpuTcExecutor::CpuTcExecutor setup RTC NYI: CpuTcExecutor::profileUnchecked tensordot size I0: [16, 8, 16, 17, 25], size I1: [16, 16, 2, 17, 25] ran in: 999999999999us NYI: CpuBackend::compileWithTcMapper NYI: CpuTcExecutor::CpuTcExecutor setup RTC NYI: CpuTcExecutor::profileUnchecked tensordot size I0: [4, 9, 7, 16, 14], size I1: [4, 7, 3, 16, 14] ran in: 999999999999us NYI: CpuBackend::compileWithTcMapper NYI: CpuTcExecutor::CpuTcExecutor setup RTC NYI: CpuTcExecutor::profileUnchecked tensordot size I0: [8, 5, 11, 10, 10], size I1: [8, 11, 16, 10, 10] ran in: 999999999999us [ OK ] TensorDotCPU.SimpleAutotune (1269 ms) [----------] 1 test from TensorDotCPU (1269 ms total) [----------] Global test environment tear-down [==========] 1 test from 1 test case ran. (1269 ms total) [ PASSED ] 1 test.
1 parent e763a0e commit 674ed01

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+1670
-1513
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def tensordot(float(N, C1, C2, H, W) I0,
4141
O(n, c1, c3, h, w) +=! I0(n, c1, r_c2, h, w) * I1(n, r_c2, c3, h, w)
4242
}
4343
)TC";
44-
tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
44+
tc::ATenCompilationUnit<tc::CudaBackend> atCompl;
4545
atCompl.define(tc);
4646

4747
// 2. Allocate tensors with random data.

tc/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_subdirectory(proto)
55
add_subdirectory(version)
66
add_subdirectory(core)
77
add_subdirectory(autotuner)
8+
add_subdirectory(aten)
89

910
if (WITH_CAFFE2 AND WITH_CUDA)
1011
add_subdirectory(c2)

tc/aten/CMakeLists.txt

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
################################################################################
2+
# tc_aten
3+
#
4+
# Core CPU library with cross-compilation capabilities linked from
5+
# tc_aten
6+
################################################################################
7+
add_library(
8+
tc_aten
9+
10+
SHARED
11+
12+
aten_compiler_new_api.cc
13+
)
14+
target_link_libraries(
15+
tc_aten
16+
17+
${HALIDE_LIBRARIES}
18+
19+
tc_core
20+
)
21+
install(
22+
TARGETS
23+
tc_aten
24+
25+
DESTINATION lib
26+
)

tc/autotuner/CMakeLists.txt

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
1-
if (WITH_CUDA)
2-
add_library(
3-
tc_autotuner
1+
set(AUTOTUNER_FILES
2+
genetic_search.cc
3+
parameters.cc
4+
utils.cc
5+
cpu/autotuner.cc
6+
)
47

5-
SHARED
8+
set(TC_LIBRARIES ${TC_LIBRARIES} tc_core_cpu)
69

7-
genetic_autotuner.cc
8-
genetic_autotuner_aten.cc
9-
genetic_search.cc
10-
genetic_tuning_harness.cc
11-
parameters.cc
12-
utils.cc)
10+
if (WITH_CUDA)
11+
set(AUTOTUNER_FILES ${AUTOTUNER_FILES} cuda/autotuner.cc)
12+
set(TC_LIBRARIES ${TC_LIBRARIES} tc_cuda)
13+
endif()
1314

14-
target_include_directories(tc_autotuner PUBLIC ${PROJECT_SOURCE_DIR}/include)
15-
target_link_libraries(tc_autotuner PUBLIC ${ATEN_LIBRARIES} tc_cuda tc_proto)
15+
add_library(
16+
tc_autotuner
17+
SHARED
18+
${AUTOTUNER_FILES}
19+
)
1620

17-
install(TARGETS tc_autotuner DESTINATION lib)
18-
endif()
21+
target_include_directories(tc_autotuner PUBLIC ${PROJECT_SOURCE_DIR}/include)
22+
target_link_libraries(tc_autotuner PUBLIC ${ATEN_LIBRARIES} ${TC_LIBRARIES})
23+
24+
install(TARGETS tc_autotuner DESTINATION lib)

tc/autotuner/autotuner-inl.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ TuningHarness<Backend>::TuningHarness(
5454
template <typename Backend>
5555
template <typename SearchStrategy>
5656
void TuningHarness<Backend>::run(SearchStrategy& searchStrategy) {
57-
// TODO: kNumGenerations -> iterations
58-
for (size_t i = 0; i < searchStrategy.kNumGenerations; ++i) {
57+
for (size_t i = 0; i < searchStrategy.numGenerations; ++i) {
5958
if (not stopRequested_) {
6059
runOneIteration(searchStrategy, i);
6160
}

tc/autotuner/genetic_search.cc

Lines changed: 35 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -132,13 +132,13 @@ void dropInvalidConfigurations(GeneticSearch::Population& population) {
132132

133133
} // namespace
134134

135-
#define VALIDATE() \
136-
CHECK_LT(kNumberElites, kMaxPopulationSize); \
137-
CHECK(kMutationRate >= 0 and kMutationRate <= 100) \
138-
<< "the mutation rate (" << kMutationRate \
139-
<< ") should be in the [0,100] interval"; \
140-
CHECK(kCrossOverRate >= 0 and kCrossOverRate <= 100) \
141-
<< "the crossover (" << kCrossOverRate \
135+
#define VALIDATE() \
136+
CHECK_LT(numberElites, maxPopulationSize); \
137+
CHECK(mutationRate >= 0 and mutationRate <= 100) \
138+
<< "the mutation rate (" << mutationRate \
139+
<< ") should be in the [0,100] interval"; \
140+
CHECK(crossOverRate >= 0 and crossOverRate <= 100) \
141+
<< "the crossover (" << crossOverRate \
142142
<< ") rate should be in the [0,100] interval";
143143

144144
namespace {
@@ -157,55 +157,34 @@ void restoreRngState(RNG& rng) {
157157

158158
GeneticSearch::GeneticSearch(
159159
const std::vector<TuningConfiguration>& confs,
160-
size_t n,
160+
size_t numGenerations,
161+
size_t populationSize,
161162
uint8_t crossOverRate,
162163
uint8_t mutationRate,
163164
size_t numberElites)
164165
: population(),
165166
lastBestConf(confs[0]),
166-
kMaxPopulationSize(n),
167-
kCrossOverRate(crossOverRate),
168-
kMutationRate(mutationRate),
169-
kNumberElites(numberElites),
167+
numGenerations(numGenerations),
168+
maxPopulationSize(populationSize),
169+
crossOverRate(crossOverRate),
170+
mutationRate(mutationRate),
171+
numberElites(numberElites),
170172
rng{std::random_device{}()} {
171173
restoreRngState(rng);
172174
VALIDATE();
173175
CHECK(not confs.empty()) << "empty set of predefined configurations";
174-
CHECK_LE(confs.size(), n) << "too many predefined configurations";
175176

176-
population.reserve(confs.size());
177-
for (auto& c : confs) {
178-
population.push_back(make_unique<CandidateConfiguration>(c));
177+
population.reserve(populationSize);
178+
size_t size = 0;
179+
for (; size < confs.size() && size < maxPopulationSize; ++size) {
180+
population.push_back(make_unique<CandidateConfiguration>(confs[size]));
179181
}
180-
if (kMaxPopulationSize - population.size() > 0) {
181-
auto oldSize = population.size();
182-
for (size_t i = oldSize; i < kMaxPopulationSize; ++i) {
183-
population.emplace_back(
184-
make_unique<CandidateConfiguration>(*population.front()));
185-
}
186-
randomizePopulation(population.begin() + oldSize, population.end(), rng);
187-
}
188-
}
189-
190-
GeneticSearch::GeneticSearch(
191-
const TuningConfiguration& conf,
192-
size_t n,
193-
uint8_t crossOverRate,
194-
uint8_t mutationRate,
195-
size_t numberElites)
196-
: population(),
197-
lastBestConf(conf),
198-
kMaxPopulationSize(n),
199-
kCrossOverRate(crossOverRate),
200-
kMutationRate(mutationRate),
201-
kNumberElites(numberElites),
202-
rng{std::random_device{}()} {
203-
restoreRngState(rng);
204-
VALIDATE();
205-
for (size_t i = 0; i < kMaxPopulationSize; ++i) {
206-
population.emplace_back(make_unique<CandidateConfiguration>(conf));
182+
size_t oldSize = size;
183+
for (; size < maxPopulationSize; ++size) {
184+
population.emplace_back(
185+
make_unique<CandidateConfiguration>(*population.front()));
207186
}
208-
randomizePopulation(population.begin(), population.end(), rng);
187+
randomizePopulation(population.begin() + oldSize, population.end(), rng);
209188
}
210189

211190
TuningConfiguration GeneticSearch::crossover(
@@ -230,7 +209,7 @@ TuningConfiguration GeneticSearch::crossover(
230209
}
231210
};
232211

233-
for (size_t iter = 0; iter < kMutateIterations; ++iter) {
212+
for (size_t iter = 0; iter < mutateIterations; ++iter) {
234213
TuningConfiguration child{a};
235214
auto params = child.collectParameters();
236215
for (size_t i = 0; i < params.size(); ++i) {
@@ -249,28 +228,28 @@ TuningConfiguration GeneticSearch::crossover(
249228
void GeneticSearch::breed() {
250229
auto accFitness = computeAccumulatedFitness(population);
251230
Population new_population;
252-
new_population.reserve(kMaxPopulationSize);
231+
new_population.reserve(maxPopulationSize);
253232
for (auto& p : population) {
254233
new_population.push_back(
255234
make_unique<CandidateConfiguration>(p->configuration));
256235
}
257236

258-
auto select = [&]() -> TuningConfiguration& {
237+
auto select = [&]() -> const TuningConfiguration& {
259238
auto limit = std::uniform_real_distribution<double>{}(rng);
260239
auto lb = std::lower_bound(accFitness.begin(), accFitness.end(), limit);
261240
return population.at(std::distance(accFitness.begin(), lb))->configuration;
262241
};
263242
auto shouldCrossOver = [&]() -> bool {
264243
/*
265-
*Crossover should occur with probability (kCrossOverRate)%
244+
*Crossover should occur with probability (crossOverRate)%
266245
*/
267246
auto dist = std::discrete_distribution<int>{
268-
static_cast<double>(100 - kCrossOverRate),
269-
static_cast<double>(kCrossOverRate)};
247+
static_cast<double>(100 - crossOverRate),
248+
static_cast<double>(crossOverRate)};
270249
return dist(rng);
271250
};
272251

273-
while (new_population.size() < kMaxPopulationSize) {
252+
while (new_population.size() < maxPopulationSize) {
274253
if (shouldCrossOver()) {
275254
auto parent1 = select();
276255
auto parent2 = select();
@@ -303,9 +282,9 @@ void GeneticSearch::updateParameters() {
303282
lastBestConf =
304283
population.size() > 0 ? population.front()->configuration : lastBestConf;
305284

306-
if (population.size() < kMinCandidatesForBreeding) {
285+
if (population.size() < minCandidatesForBreeding) {
307286
LOG_IF(ERROR, FLAGS_debug_tuner)
308-
<< population.size() << " out of " << kMaxPopulationSize
287+
<< population.size() << " out of " << maxPopulationSize
309288
<< " candidates were valid and are not enough to form a new "
310289
"generation. Likely, most of the tuning runs during this "
311290
"generation were pruned for lack of parallelism in the "
@@ -314,7 +293,7 @@ void GeneticSearch::updateParameters() {
314293
"when autotuning a TC operating on small tensors. The next "
315294
"generation will be randomly initialized.";
316295
population.resize(0);
317-
for (size_t i = 0; i < kMaxPopulationSize; ++i) {
296+
for (size_t i = 0; i < maxPopulationSize; ++i) {
318297
population.emplace_back(
319298
make_unique<CandidateConfiguration>(lastBestConf));
320299
}
@@ -325,8 +304,8 @@ void GeneticSearch::updateParameters() {
325304
}
326305

327306
breed();
328-
for (size_t i = kNumberElites; i < population.size(); ++i) {
329-
mutate(*population[i], kMutationRate, kMutateIterations, rng);
307+
for (size_t i = numberElites; i < population.size(); ++i) {
308+
mutate(*population[i], mutationRate, mutateIterations, rng);
330309
}
331310
}
332311

tc/autotuner/genetic_search.h

Lines changed: 14 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -50,33 +50,13 @@ namespace autotune {
5050

5151
class GeneticSearch {
5252
public:
53-
/**
54-
* conf is used to determine which are the tunable parameters, the selected
55-
* values in conf are ignored and the population is randomized
56-
*
57-
* n is the population size
58-
*
59-
* crossoverRate is the probability ([0,100]) that a new candidate is produced
60-
* through reproduction
61-
*
62-
* mutationRate is the probability ([0,100]) that parameters are mutated
63-
* (randomly changed) whenever a new generation is created
64-
*
65-
* numberElites best candidates are preserved
66-
* across generations (elitism), number Elites must be less than n
67-
*/
68-
GeneticSearch(
69-
const TuningConfiguration& conf,
70-
size_t n,
71-
uint8_t crossOverRate,
72-
uint8_t mutationRate,
73-
size_t numberElites);
74-
7553
/**
7654
* confs are used to seed the first generation, the rest of the population is
7755
* randomly initialized
7856
*
79-
* n is the population size
57+
* numGenerations is the number of generations
58+
*
59+
* populationSize is the population size
8060
*
8161
* crossoverRate is the probability ([0,100]) that a new candidate is produced
8262
* through reproduction
@@ -85,11 +65,13 @@ class GeneticSearch {
8565
* (randomly changed) whenever a new generation is created
8666
*
8767
* numberElites best candidates are preserved
88-
* across generations (elitism), number Elites must be less than n
68+
* across generations (elitism), number Elites must be less than
69+
* populationSize
8970
*/
9071
GeneticSearch(
9172
const std::vector<TuningConfiguration>& confs,
92-
size_t n,
73+
size_t numGenerations,
74+
size_t populationSize,
9375
uint8_t crossOverRate,
9476
uint8_t mutationRate,
9577
size_t numberElites);
@@ -105,17 +87,18 @@ class GeneticSearch {
10587
TuningConfiguration&) const;
10688

10789
public:
108-
static constexpr int kMutateIterations = 1000;
109-
static constexpr int kMinCandidatesForBreeding = 3;
90+
static constexpr int mutateIterations = 1000;
91+
static constexpr int minCandidatesForBreeding = 3;
11092

11193
using Population = std::vector<std::unique_ptr<CandidateConfiguration>>;
11294

11395
Population population;
11496
TuningConfiguration lastBestConf;
115-
const size_t kMaxPopulationSize;
116-
const uint8_t kCrossOverRate;
117-
const uint8_t kMutationRate;
118-
const size_t kNumberElites;
97+
const size_t numGenerations;
98+
const size_t maxPopulationSize;
99+
const uint8_t crossOverRate;
100+
const uint8_t mutationRate;
101+
const size_t numberElites;
119102

120103
/*
121104
* c++11 seeding is (apparently) not of the highest quality:

tc/autotuner/genetic_tuning_harness.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ TuningConfiguration GeneticTunerHarness::makeTuningConfiguration(
176176
}
177177

178178
std::vector<size_t> parseGpus() {
179-
std::stringstream ss(FLAGS_tuner_gpus);
179+
std::stringstream ss(FLAGS_tuner_devices);
180180
size_t gpu;
181181
std::vector<size_t> res;
182182
while (ss >> gpu) {

0 commit comments

Comments
 (0)