Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit ac1871a

Browse files
Add a flag to control max unrolling during autotuning
This commit introduces a flag to limit the amount of polyhedral unrolling during autotuning. Polyhedral unrolling can be a significant source of overhead so we err on the conservative side for now. This only affects autotuning and can be controlled from the command line. It is not affecting the construction of MappingOptions: one can still unroll manually by whatever factor is deemed appropriate.
1 parent 6b319f5 commit ac1871a

File tree

6 files changed

+22
-5
lines changed

6 files changed

+22
-5
lines changed

tc/autotuner/autotuner-inl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,8 @@ void setupTuningParameters(
399399
configuration.tilingParams.setRange(nTilesDim, tileRange);
400400
configuration.blockParams.setRange(range, "b");
401401
configuration.gridParams.setRange(range, "g");
402-
configuration.unrollFactor = RangeParameter({1, 2, 4, 8, 16, 32}, "unroll");
402+
configuration.unrollFactor =
403+
RangeParameter(powers2(FLAGS_tuner_max_unroll_size), "unroll");
403404
}
404405
} // namespace
405406

tc/autotuner/parameters.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,11 @@ void TuningConfiguration::fromMappingOptions(
281281
options.proto.fix_parameters_before_scheduling());
282282
tilingParams.fromMappingOptions(options.tiling);
283283
unrollFactor.selectFromValue(
284-
(options.proto.has_unroll() ? options.proto.unroll() : 1));
284+
(options.proto.has_unroll()
285+
? std::min(
286+
static_cast<uint32_t>(options.proto.unroll()),
287+
FLAGS_tuner_max_unroll_size)
288+
: 1));
285289
tileImperfectlyNested.selectValue(options.proto.tile_imperfectly_nested());
286290
matchLibraryCalls.selectValue(options.proto.match_library_calls());
287291
}

tc/autotuner/utils.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,21 @@
2727
namespace tc {
2828
namespace autotune {
2929

30-
std::vector<std::size_t> powers2andCeilDivisors(std::size_t val) {
30+
std::vector<std::size_t> powers2(std::size_t val) {
3131
auto numPowers = static_cast<std::size_t>(std::ceil(std::log2(val)));
32-
// 1. generate `numPowers' powers of 2
3332
std::vector<std::size_t> res(numPowers + 1);
3433
std::size_t p = 1;
3534
std::generate(res.begin(), res.end(), [p]() mutable {
3635
auto old_p = p;
3736
p *= 2;
3837
return old_p;
3938
});
40-
// 2. additionally insert ceil(val / powers2)
39+
return res;
40+
}
41+
42+
std::vector<std::size_t> powers2andCeilDivisors(std::size_t val) {
43+
std::vector<std::size_t> res = powers2(val);
44+
// Additionally insert ceil(val / powers2)
4145
res.reserve(res.size() * 2);
4246
for (std::size_t i = 0, s = res.size(); i < s; ++i) {
4347
if (res[i] > val) {

tc/autotuner/utils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
namespace tc {
3030
namespace autotune {
3131

32+
/// Returns all the powers of 2 up to the first one that is larger than val
33+
std::vector<std::size_t> powers2(std::size_t val);
34+
3235
/// Returns all the powers of 2 up to the first one that is larger than val
3336
/// and the result of ceil(val/pow2) for each of those powers of 2 (except for
3437
/// the larger one)

tc/core/flags.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ DEFINE_bool(
5555
"Print debug spew for experimental schedule_tree");
5656

5757
// Autotuner flags
58+
DEFINE_uint32(
59+
tuner_max_unroll_size,
60+
32,
61+
"Polyhedral unrolling is expensive, limit to 32 by default");
5862
DEFINE_uint32(
5963
tuner_gen_pop_size,
6064
100,

tc/core/flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ DECLARE_uint32(benchmark_warmup);
3939
DECLARE_uint32(benchmark_iterations);
4040

4141
// Used in autotuning
42+
DECLARE_uint32(tuner_max_unroll_size);
4243
DECLARE_uint32(tuner_gen_pop_size);
4344
DECLARE_uint32(tuner_gen_crossover_rate);
4445
DECLARE_uint32(tuner_gen_mutation_rate);

0 commit comments

Comments
 (0)