Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 981c27a

Browse files
Typesafe autotuner cache interaction
This PR introduces a simple explicit CanonicalTcString type that specializes std::string. All functions in tc::autotune::utils use this type. Update tests and benchmarks to comply to the modified API.
1 parent 5d51a62 commit 981c27a

File tree

10 files changed

+50
-66
lines changed

10 files changed

+50
-66
lines changed

benchmarks/benchmark_fixture.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "tc/core/cuda/cuda_tc_executor.h"
3737
#include "tc/core/flags.h"
3838
#include "tc/core/scope_guard.h"
39+
#include "tc/lang/canonicalize.h"
3940

4041
#include <cublas_v2.h> // Must be the same as Caffe2
4142
#include <cuda_runtime_api.h>
@@ -270,7 +271,8 @@ struct Benchmark : public ::testing::Test {
270271
auto inputsPair = tc::toConstDlpackTensors(inputs);
271272
auto outputs = atCompl.inferOutputTensorInfo(name, inputs);
272273
tc::ScopeGuard g([&]() { tc::deleteDlmTensors(inputsPair.second); });
273-
return tc::autotune::restoreCandidates(name, inputsPair.first, outputs);
274+
return tc::autotune::restoreCandidates(
275+
lang::canonicalTc(tc), inputsPair.first, outputs);
274276
}();
275277
auto handle = atCompl.compile(name, inputs, mappingOptions[0]);
276278
std::vector<at::Tensor> outputs;

include/tc/autotuner/utils/utils.h

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,14 @@
2121
#include "tc/core/cuda/cuda.h"
2222
#include "tc/core/cuda/cuda_mapping_options.h"
2323
#include "tc/core/utils/dlpack.h"
24+
#include "tc/lang/canonicalize.h"
2425
#include "tc/lang/tree.h"
2526

2627
#include <llvm/ADT/Optional.h>
2728

2829
namespace tc {
2930
namespace autotune {
3031

31-
struct OptionsWithMedianTime {
32-
CudaMappingOptions options;
33-
Duration medianRuntime;
34-
};
35-
3632
/// Returns all the powers of 2 up to the first one that is larger than val
3733
/// and the result of ceil(val/pow2) for each of those powers of 2 (except for
3834
/// the larger one)
@@ -41,26 +37,28 @@ std::vector<std::size_t> powers2andCeilDivisors(std::size_t val);
4137
template <typename Vector, typename... Vectors>
4238
Vector mergeVectors(Vector&& v, Vectors&&... vs);
4339

44-
std::vector<OptionsWithMedianTime> getOptionsAndMedianRuntimes(
45-
const std::string& id,
46-
const std::vector<const DLTensor*>& inputs);
47-
40+
/// The following API allows interacting with the autotuner caches.
41+
/// Caches generally take arbitrary strings for keys.
42+
/// The autotuner uses a canonicalized TC expression to load / store into
43+
/// caches. Add a layer of type safety to interact with these.
4844
std::vector<CudaMappingOptions> restoreCandidates(
49-
const std::string& id,
50-
const std::vector<const DLTensor*>& inputs,
51-
const std::vector<const DLTensor*>& outputs);
52-
53-
std::vector<CudaMappingOptions> restoreCandidates(
54-
const lang::TreeRef& tc,
45+
const lang::CanonicalTcString& tc,
5546
const std::vector<const DLTensor*>& inputs,
5647
const std::vector<const DLTensor*>& outputs);
5748

5849
llvm::Optional<CudaMappingOptions> getBestOptions(
59-
const std::string& id,
50+
const lang::CanonicalTcString& id,
6051
const std::vector<const DLTensor*>& inputs,
6152
const std::vector<const DLTensor*>& outputs);
6253

63-
std::string canonicalTC(const lang::TreeRef& tc);
54+
struct OptionsWithMedianTime {
55+
CudaMappingOptions options;
56+
Duration medianRuntime;
57+
};
58+
59+
std::vector<OptionsWithMedianTime> getOptionsAndMedianRuntimes(
60+
const lang::CanonicalTcString& id,
61+
const std::vector<const DLTensor*>& inputs);
6462

6563
} // namespace autotune
6664
} // namespace tc

include/tc/core/tc_executor.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#include "tc/core/utils/dlpack.h"
2626
#include "tc/core/utils/time.h"
2727

28-
#include "tc/lang/parser.h"
28+
#include "tc/lang/canonicalize.h"
2929

3030
namespace tc {
3131

@@ -124,7 +124,7 @@ class TcExecutor {
124124

125125
tc2halide::HalideComponents halideComponents_;
126126
lang::TreeRef tcTree_;
127-
std::string cacheKeyId;
127+
lang::CanonicalTcString cacheKeyId;
128128
};
129129

130130
// templating to match both const and non-const DLTensor pointers

include/tc/lang/canonicalize.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,16 @@
1717

1818
#include <string>
1919

20+
#include "tc/lang/parser.h"
21+
#include "tc/lang/sema.h"
2022
#include "tc/lang/tree.h"
2123
#include "tc/lang/tree_views.h"
2224

2325
namespace lang {
2426

2527
// takes a tree after semantic analysis and create
2628
// a canonicalized version that is agnostic to the choice of identifiers
27-
TreeRef canonicalize(TreeRef tree) {
29+
inline TreeRef canonicalize(TreeRef tree) {
2830
struct Context {
2931
std::unordered_map<std::string, std::string> identMap;
3032
std::string rename(const std::string& name) {
@@ -53,4 +55,19 @@ TreeRef canonicalize(TreeRef tree) {
5355
Context ctx;
5456
return ctx.apply(tree);
5557
}
58+
59+
struct CanonicalTcString : public std::string {
60+
explicit CanonicalTcString(const std::string& s) : std::string(s) {}
61+
};
62+
63+
inline CanonicalTcString canonicalTc(const lang::TreeRef& tc) {
64+
std::stringstream ss;
65+
// TODO: use tcFormat when more robust
66+
ss << lang::canonicalize(lang::Sema().checkFunction(tc));
67+
return CanonicalTcString(ss.str());
68+
}
69+
70+
inline CanonicalTcString canonicalTc(const std::string& tc) {
71+
return canonicalTc(lang::Parser(tc).parseFunction());
72+
}
5673
} // namespace lang

src/autotuner/genetic_autotuner.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ std::vector<CudaMappingOptions> GeneticAutotuner::load(
8181
ee.define(tc_);
8282
auto outputs = ee.inferOutputTensorInfo(tcName, inputs);
8383
return tc::autotune::restoreCandidates(
84-
tcNameMap_.at(tcName), inputs, outputs);
84+
canonicalTc(tcNameMap_.at(tcName)), inputs, outputs);
8585
}
8686

8787
namespace {
@@ -186,7 +186,7 @@ llvm::Optional<CudaMappingOptions> GeneticAutotuner::tune(
186186

187187
CHECK_GT(inputs.size(), 0);
188188
return tc::autotune::getBestOptions(
189-
canonicalTC(tcNameMap_.at(tcName)), inputs.begin()->second, outputPtrs);
189+
canonicalTc(tcNameMap_.at(tcName)), inputs.begin()->second, outputPtrs);
190190
}
191191

192192
} // namespace detail

src/autotuner/utils/utils.cc

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
#include "tc/core/cuda/cuda_compilation_cache.h"
2222
#include "tc/core/utils/math.h"
2323
#include "tc/lang/canonicalize.h"
24-
#include "tc/lang/parser.h"
25-
#include "tc/lang/sema.h"
2624

2725
namespace tc {
2826
namespace autotune {
@@ -55,7 +53,7 @@ std::vector<std::size_t> powers2andCeilDivisors(std::size_t val) {
5553
}
5654

5755
std::vector<OptionsWithMedianTime> getOptionsAndMedianRuntimes(
58-
const std::string& id,
56+
const lang::CanonicalTcString& id,
5957
const std::vector<const DLTensor*>& inputs,
6058
const std::vector<const DLTensor*>& outputs) {
6159
auto candidates =
@@ -73,18 +71,11 @@ std::vector<OptionsWithMedianTime> getOptionsAndMedianRuntimes(
7371
return c;
7472
}
7573

76-
std::string canonicalTC(const lang::TreeRef& tc) {
77-
std::stringstream ss;
78-
ss << lang::canonicalize(lang::Sema().checkFunction(tc));
79-
return ss.str();
80-
}
81-
8274
std::vector<CudaMappingOptions> restoreCandidates(
83-
const lang::TreeRef& tc,
75+
const lang::CanonicalTcString& tc,
8476
const std::vector<const DLTensor*>& inputs,
8577
const std::vector<const DLTensor*>& outputs) {
86-
auto candidates =
87-
getOptionsAndMedianRuntimes(canonicalTC(tc), inputs, outputs);
78+
auto candidates = getOptionsAndMedianRuntimes(tc, inputs, outputs);
8879
LOG_IF(INFO, candidates.size() < FLAGS_tuner_gen_restore_number)
8980
<< "Requested " << FLAGS_tuner_gen_restore_number
9081
<< " candidates but there are only " << candidates.size() << " in cache.";
@@ -106,15 +97,8 @@ std::vector<CudaMappingOptions> restoreCandidates(
10697
return res;
10798
}
10899

109-
std::vector<CudaMappingOptions> restoreCandidates(
110-
const std::string& tc,
111-
const std::vector<const DLTensor*>& inputs,
112-
const std::vector<const DLTensor*>& outputs) {
113-
return restoreCandidates(lang::Parser(tc).parseFunction(), inputs, outputs);
114-
}
115-
116100
llvm::Optional<CudaMappingOptions> getBestOptions(
117-
const std::string& id,
101+
const lang::CanonicalTcString& id,
118102
const std::vector<const DLTensor*>& inputs,
119103
const std::vector<const DLTensor*>& outputs) {
120104
auto bestOptions =

src/core/tc_executor.cc

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020

2121
#include "tc/core/utils/dlpack.h"
2222
#include "tc/lang/canonicalize.h"
23-
#include "tc/lang/parser.h"
24-
#include "tc/lang/sema.h"
2523

2624
namespace tc {
2725

@@ -32,13 +30,6 @@ int toTypeToken(DLDataType dtype) {
3230
return lang::TypeInfo(lang::TypeInfo::Code(dtype.code), dtype.bits)
3331
.toScalarToken();
3432
}
35-
36-
std::string canonicalizedTc(const lang::TreeRef tcDefinition) {
37-
std::stringstream ss;
38-
ss << canonicalize(lang::Sema().checkFunction(tcDefinition));
39-
return ss.str();
40-
}
41-
4233
} // namespace
4334

4435
TcExecutor::TcExecutor(
@@ -49,7 +40,8 @@ TcExecutor::TcExecutor(
4940
: identifier(id),
5041
inputsInfo(dlutils::makeDLTensorVector(inputsInfo)),
5142
options(options),
52-
tcTree_(tcDefinition) {
43+
tcTree_(tcDefinition),
44+
cacheKeyId(lang::canonicalTc(tcDefinition)) {
5345
executionInfo_.kernelName = lang::Def(tcTree_).name().name();
5446
halideComponents_ =
5547
tc2halide::translate(isl::with_exceptions::globalIslCtx(), tcTree_);
@@ -58,7 +50,6 @@ TcExecutor::TcExecutor(
5850
// TODO: check if this is wrong, packed tensors may have 0 strides stored
5951
executionInfo_.outputsInfo =
6052
tc::inferOutputTensorInfo(halideComponents_, inputsInfo);
61-
cacheKeyId = canonicalizedTc(tcDefinition);
6253
}
6354

6455
TcExecutor::~TcExecutor() {}

test/cuda/test_autotuner_utility.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "tc/core/cuda/cuda_compilation_cache.h"
2121
#include "tc/core/cuda/cuda_tc_executor.h"
2222
#include "tc/core/scope_guard.h"
23+
#include "tc/lang/canonicalize.h"
2324

2425
using namespace tc;
2526
using namespace autotune;
@@ -54,7 +55,7 @@ std::vector<CudaMappingOptions> restoreCandidates(
5455
});
5556

5657
return tc::autotune::restoreCandidates(
57-
tc, inputsPair.first, outputsPair.first);
58+
lang::canonicalTc(tc), inputsPair.first, outputsPair.first);
5859
}
5960

6061
TEST(RestoreCandidates, NoCache) {

test/test_lang.cc

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,6 @@ TreeRef loadText(const std::string& text) {
147147
return Sema().checkFunction(Parser(text).parseFunction());
148148
}
149149

150-
std::string canonicalText(const std::string& text) {
151-
std::stringstream ss;
152-
ss << canonicalize(loadText(text));
153-
return ss.str();
154-
}
155-
156150
void testTcFormat() {
157151
static std::ios_base::Init initIostreams;
158152
auto source = R"(def fun2(float(B, N, M) X, float(B, M, K) Y) -> (Q) {
@@ -334,7 +328,7 @@ int main(int argc, char** argv) {
334328
Q(b, ii, j) += X(b, ii, k) * Y(b, k, j)
335329
}
336330
)";
337-
ASSERT(canonicalText(option_one) == canonicalText(option_two));
331+
ASSERT(lang::canonicalTc(option_one) == lang::canonicalTc(option_two));
338332

339333
testTcFormat();
340334

test/test_tc_mapper_harness-inl.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,8 @@ struct TcMapperTest : public ::testing::Test {
5959
tc::deleteDlmTensors(outputDLTensorsPair.second);
6060
});
6161
// Check that cache insertion worked properly (with canonicalized TC)
62-
std::stringstream ss;
63-
ss << lang::canonicalize(
64-
lang::Sema().checkFunction(lang::Parser(tc).parseFunction()));
6562
auto cached = CacheType::getCache()->retrieveKernel(
66-
ss.str(),
63+
lang::canonicalTc(tc),
6764
mappingOptions,
6865
inputDLTensorsPair.first,
6966
outputDLTensorsPair.first);

0 commit comments

Comments
 (0)