Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 45136d9

Browse files
authored
Merge pull request #434 from nicolasvasilache/pr/flag-unroll-factor
Add a flag to control max unrolling during autotuning
2 parents 6b319f5 + ac1871a commit 45136d9

File tree

6 files changed

+22
-5
lines changed

6 files changed

+22
-5
lines changed

tc/autotuner/autotuner-inl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,8 @@ void setupTuningParameters(
399399
configuration.tilingParams.setRange(nTilesDim, tileRange);
400400
configuration.blockParams.setRange(range, "b");
401401
configuration.gridParams.setRange(range, "g");
402-
configuration.unrollFactor = RangeParameter({1, 2, 4, 8, 16, 32}, "unroll");
402+
configuration.unrollFactor =
403+
RangeParameter(powers2(FLAGS_tuner_max_unroll_size), "unroll");
403404
}
404405
} // namespace
405406

tc/autotuner/parameters.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,11 @@ void TuningConfiguration::fromMappingOptions(
281281
options.proto.fix_parameters_before_scheduling());
282282
tilingParams.fromMappingOptions(options.tiling);
283283
unrollFactor.selectFromValue(
284-
(options.proto.has_unroll() ? options.proto.unroll() : 1));
284+
(options.proto.has_unroll()
285+
? std::min(
286+
static_cast<uint32_t>(options.proto.unroll()),
287+
FLAGS_tuner_max_unroll_size)
288+
: 1));
285289
tileImperfectlyNested.selectValue(options.proto.tile_imperfectly_nested());
286290
matchLibraryCalls.selectValue(options.proto.match_library_calls());
287291
}

tc/autotuner/utils.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,21 @@
2727
namespace tc {
2828
namespace autotune {
2929

30-
std::vector<std::size_t> powers2andCeilDivisors(std::size_t val) {
30+
std::vector<std::size_t> powers2(std::size_t val) {
3131
auto numPowers = static_cast<std::size_t>(std::ceil(std::log2(val)));
32-
// 1. generate `numPowers' powers of 2
3332
std::vector<std::size_t> res(numPowers + 1);
3433
std::size_t p = 1;
3534
std::generate(res.begin(), res.end(), [p]() mutable {
3635
auto old_p = p;
3736
p *= 2;
3837
return old_p;
3938
});
40-
// 2. additionally insert ceil(val / powers2)
39+
return res;
40+
}
41+
42+
std::vector<std::size_t> powers2andCeilDivisors(std::size_t val) {
43+
std::vector<std::size_t> res = powers2(val);
44+
// Additionally insert ceil(val / powers2)
4145
res.reserve(res.size() * 2);
4246
for (std::size_t i = 0, s = res.size(); i < s; ++i) {
4347
if (res[i] > val) {

tc/autotuner/utils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
namespace tc {
3030
namespace autotune {
3131

32+
/// Returns all the powers of 2 up to the first one that is larger than val
33+
std::vector<std::size_t> powers2(std::size_t val);
34+
3235
/// Returns all the powers of 2 up to the first one that is larger than val
3336
/// and the result of ceil(val/pow2) for each of those powers of 2 (except for
3437
/// the larger one)

tc/core/flags.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ DEFINE_bool(
5555
"Print debug spew for experimental schedule_tree");
5656

5757
// Autotuner flags
58+
DEFINE_uint32(
59+
tuner_max_unroll_size,
60+
32,
61+
"Polyhedral unrolling is expensive, limit to 32 by default");
5862
DEFINE_uint32(
5963
tuner_gen_pop_size,
6064
100,

tc/core/flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ DECLARE_uint32(benchmark_warmup);
3939
DECLARE_uint32(benchmark_iterations);
4040

4141
// Used in autotuning
42+
DECLARE_uint32(tuner_max_unroll_size);
4243
DECLARE_uint32(tuner_gen_pop_size);
4344
DECLARE_uint32(tuner_gen_crossover_rate);
4445
DECLARE_uint32(tuner_gen_mutation_rate);

0 commit comments

Comments
 (0)