Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit fa0936f

Browse files
Merge pull request #379 from nicolasvasilache/pr/c++api-refactor
[c++api] Refactor
2 parents b8efdaf + fd57aae commit fa0936f

File tree

118 files changed

+4851
-5304
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+4851
-5304
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,5 @@ tensor_comprehensions/version.py
2020
tensor_comprehensions/*.proto
2121
slurm-*
2222
examples/results*
23+
*.pyc
24+
test_python/tc_test/*

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def tensordot(float(N, C1, C2, H, W) I0,
4141
O(n, c1, c3, h, w) +=! I0(n, c1, r_c2, h, w) * I1(n, r_c2, c3, h, w)
4242
}
4343
)TC";
44-
tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
44+
tc::ATenCompilationUnit<tc::CudaBackend> atCompl;
4545
atCompl.define(tc);
4646

4747
// 2. Allocate tensors with random data.

tc/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_subdirectory(proto)
55
add_subdirectory(version)
66
add_subdirectory(core)
77
add_subdirectory(autotuner)
8+
add_subdirectory(aten)
89

910
if (WITH_CAFFE2 AND WITH_CUDA)
1011
add_subdirectory(c2)

tc/aten/CMakeLists.txt

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
################################################################################
2+
# tc_aten
3+
#
4+
# Core CPU library with cross-compilation capabilities linked from
5+
# tc_aten
6+
################################################################################
7+
add_library(
8+
tc_aten
9+
10+
SHARED
11+
12+
aten_compiler.cc
13+
)
14+
target_link_libraries(
15+
tc_aten
16+
17+
${HALIDE_LIBRARIES}
18+
19+
tc_core
20+
)
21+
install(
22+
TARGETS
23+
tc_aten
24+
25+
DESTINATION lib
26+
)

tc/aten/utils-inl.h renamed to tc/aten/aten-inl.h

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,36 +20,41 @@
2020

2121
#include <ATen/ATen.h>
2222
#include <ATen/DLConvertor.h>
23+
24+
#include "tc/core/tensor.h"
25+
2326
namespace tc {
24-
namespace {
25-
inline std::pair<std::vector<DLTensor*>, std::vector<DLManagedTensor*>>
26-
toDlpackTensors(const std::vector<at::Tensor>& tensors) {
27-
std::vector<DLTensor*> dlTensors;
28-
std::vector<DLManagedTensor*> dlMTensors;
27+
namespace aten {
28+
inline std::vector<DLTensorUPtr> makeDLTensors(
29+
const std::vector<at::Tensor>& tensors) {
30+
std::vector<DLTensorUPtr> dlTensors;
2931
for (auto tensor : tensors) {
3032
auto dlMTensor = at::toDLPack(tensor);
31-
dlTensors.push_back(&(dlMTensor->dl_tensor));
32-
dlMTensors.push_back(dlMTensor);
33+
dlTensors.push_back(makeDLTensor(&(dlMTensor->dl_tensor)));
34+
dlMTensor->deleter(dlMTensor);
3335
}
34-
return make_pair(dlTensors, dlMTensors);
36+
return dlTensors;
3537
}
3638

37-
inline std::pair<std::vector<const DLTensor*>, std::vector<DLManagedTensor*>>
38-
toConstDlpackTensors(const std::vector<at::Tensor>& tensors) {
39-
std::vector<const DLTensor*> dlTensors;
40-
std::vector<DLManagedTensor*> dlMTensors;
39+
inline std::vector<DLConstTensorUPtr> makeDLConstTensors(
40+
const std::vector<at::Tensor>& tensors) {
41+
std::vector<DLConstTensorUPtr> dlTensors;
4142
for (auto tensor : tensors) {
4243
auto dlMTensor = at::toDLPack(tensor);
43-
dlTensors.push_back(&(dlMTensor->dl_tensor));
44-
dlMTensors.push_back(dlMTensor);
44+
dlTensors.push_back(makeDLConstTensor(&(dlMTensor->dl_tensor)));
45+
dlMTensor->deleter(dlMTensor);
4546
}
46-
return make_pair(dlTensors, dlMTensors);
47+
return dlTensors;
4748
}
4849

49-
inline void deleteDlmTensors(std::vector<DLManagedTensor*>& tensors) {
50-
for (auto& tensor : tensors) {
51-
tensor->deleter(tensor);
52-
}
50+
inline void setAtenSeed(uint64_t seed, at::Backend backend) {
51+
at::Generator& gen = at::globalContext().defaultGenerator(backend);
52+
gen.manualSeed(seed);
53+
}
54+
55+
inline uint64_t getAtenSeed(at::Backend backend) {
56+
at::Generator& gen = at::globalContext().defaultGenerator(backend);
57+
return gen.seed();
5358
}
54-
} // namespace
59+
} // namespace aten
5560
} // namespace tc

tc/aten/utils.h renamed to tc/aten/aten.h

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,22 @@
1919
#include <vector>
2020

2121
#include <ATen/ATen.h>
22-
#include <ATen/DLConvertor.h>
22+
23+
#include "tc/core/tensor.h"
2324

2425
namespace tc {
25-
namespace {
26-
std::pair<std::vector<DLTensor*>, std::vector<DLManagedTensor*>>
27-
toDlpackTensors(const std::vector<at::Tensor>& tensors);
26+
namespace aten {
27+
28+
inline std::vector<DLTensorUPtr> makeDLTensors(
29+
const std::vector<at::Tensor>& tensors);
30+
31+
inline std::vector<DLConstTensorUPtr> makeDLConstTensors(
32+
const std::vector<at::Tensor>& tensors);
2833

29-
std::pair<std::vector<const DLTensor*>, std::vector<DLManagedTensor*>>
30-
toConstDlpackTensors(const std::vector<at::Tensor>& tensors);
34+
inline void setAtenSeed(uint64_t seed, at::Backend backend);
35+
inline uint64_t getAtenSeed(at::Backend backend);
3136

32-
void deleteDlmTensors(std::vector<DLManagedTensor*>& tensors);
33-
} // namespace
37+
} // namespace aten
3438
} // namespace tc
3539

36-
#include "tc/aten/utils-inl.h"
40+
#include "tc/aten/aten-inl.h"

tc/aten/aten_autotuner-inl.h

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/**
2+
* Copyright (c) 2017-present, Facebook, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include "tc/autotuner/autotuner.h"
17+
18+
#include <atomic>
19+
#include <chrono>
20+
#include <numeric>
21+
#include <thread>
22+
23+
#include <glog/stl_logging.h>
24+
25+
#include "tc/aten/aten.h"
26+
#include "tc/aten/aten_compiler.h"
27+
#include "tc/autotuner/utils.h"
28+
#include "tc/core/compiler.h"
29+
#include "tc/core/flags.h"
30+
#include "tc/core/scope_guard.h"
31+
#include "tc/core/tensor.h"
32+
#include "tc/core/utils/math.h"
33+
#include "tc/lang/canonicalize.h"
34+
35+
namespace tc {
36+
namespace aten {
37+
template <typename Backend, typename Search>
38+
ATenAutotuner<Backend, Search>::ATenAutotuner(const std::string& tc)
39+
: tc_(tc) {}
40+
41+
std::vector<at::Tensor> cloneTensors(const std::vector<at::Tensor>& inputs) {
42+
std::vector<at::Tensor> copies;
43+
copies.reserve(inputs.size());
44+
for (const auto& t : inputs) {
45+
copies.push_back(t.clone());
46+
}
47+
return copies;
48+
}
49+
50+
template <typename Backend, typename Search>
51+
std::vector<typename Backend::MappingOptionsType>
52+
ATenAutotuner<Backend, Search>::tune(
53+
const std::string& tcName,
54+
const std::vector<at::Tensor>& inputs,
55+
const typename Backend::MappingOptionsType& baseMapping,
56+
const std::string& cacheFileName,
57+
const tc::autotune::TuningParameterFixer& fixedParams) {
58+
// TODO: some checks that inputs memory lives on the proper Backend device
59+
60+
// prepare outputs of the proper shape
61+
auto outputs = tc::aten::prepareOutputs(tc_, tcName, inputs);
62+
63+
// first parse the devices
64+
auto devices =
65+
tc::autotune::detail::parseDevices<Backend>(FLAGS_tuner_devices);
66+
// clone the inputs/outputs on each device
67+
// TODO: this takes twice the space it should, alternatives are:
68+
// 1. enforce inputs and outputs live on the CPU in the first place so we
69+
// don't spuriously run out of device memory (assuming CPU memory is
70+
// infinite for now);
71+
// 2. if 1. is not reasonable, detect the device on which each tensor lives
72+
// and point to the raw data for that (device, tensor) pair.
73+
std::unordered_map<size_t, std::vector<DLConstTensorUPtr>> inputsPerDevice;
74+
std::unordered_map<size_t, std::vector<const DLConstTensor*>>
75+
rawInputsPerDevice;
76+
std::unordered_map<size_t, std::vector<DLTensorUPtr>> outputsPerDevice;
77+
std::unordered_map<size_t, std::vector<const DLTensor*>> rawOutputsPerDevice;
78+
for (auto device : devices) {
79+
typename Backend::WithDevice wd(device);
80+
auto deviceInputs = cloneTensors(inputs);
81+
inputsPerDevice.emplace(device, makeDLConstTensors(deviceInputs));
82+
rawInputsPerDevice.emplace(
83+
device, extractRawPtrs(inputsPerDevice.at(device)));
84+
auto deviceOutputs = cloneTensors(outputs);
85+
outputsPerDevice.emplace(device, makeDLTensors(deviceOutputs));
86+
rawOutputsPerDevice.emplace(
87+
device, extractRawPtrs(outputsPerDevice.at(device)));
88+
}
89+
return tc::autotune::Autotuner<Backend, Search>::tune(
90+
tc_,
91+
tcName,
92+
rawInputsPerDevice,
93+
rawOutputsPerDevice,
94+
baseMapping,
95+
cacheFileName,
96+
fixedParams);
97+
}
98+
} // namespace aten
99+
} // namespace tc

tc/aten/aten_autotuner.h

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/**
2+
* Copyright (c) 2017-present, Facebook, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include <string>
19+
#include <vector>
20+
21+
#include "tc/aten/aten.h"
22+
#include "tc/autotuner/autotuner.h"
23+
24+
namespace tc {
25+
namespace aten {
26+
/**
27+
* An Autotuner provides the basic interface to run a SearchStrategy over a
28+
* particular Backend.
29+
*
30+
* Possible usage:
31+
* using namespace tc::aten;
32+
* std::string tc("...");
33+
* ATenAutotuner<tc::CudaBackend, tc::autotune::GeneticSearch> tuner(tc);
34+
* std::string cacheFn("/tmp/some_file");
35+
* auto best = tuner.tune("tc_function_name", inputs, baseOption, cacheFn)
36+
*
37+
* The best options may then be used to compile an executor and run.
38+
* CHECK_GT(best.size(), 0);
39+
* auto pExecutor = compile(tc, "tc_function_name", inputs, best[0]);
40+
* auto outputs = prepareOutputs(tc, "tc_function_name", inputs);
41+
* // memoize the executor and outputs if needed
42+
* run(*pExecutor, inputs, outputs);
43+
*/
44+
template <typename Backend, typename SearchStrategy>
45+
class ATenAutotuner : public tc::autotune::Autotuner<Backend, SearchStrategy> {
46+
public:
47+
using BackendType = Backend;
48+
using MappingOptionsType = typename BackendType::MappingOptionsType;
49+
50+
/// An ATenAutotuner is built from a TC string which contains multiple TC
51+
/// functions on which tuning can be run independently.
52+
ATenAutotuner(const std::string& tc);
53+
54+
/// Runs autotuning on the TC function tcEntryPoint.
55+
/// Proper output shapes are inferred automatically from the input shapes.
56+
///
57+
/// Optionally an OptionsCache cacheFileName serialized path
58+
/// can be specified to which the tuner will save the best options found for
59+
/// later offline reuse, in the proper protobuf format.
60+
///
61+
/// Additionally, if such a cacheFileName is specified and if it contains a
62+
/// previously saved protobuf then the autotuner will load it. In that case
63+
/// the tuner recovers multiple starting points and appends them to the
64+
/// baseMapping. This can be useful in a reinforcement situation where short
65+
/// tunings are run and their results cached iteratively. The best options
66+
/// are still saved at the end of tuning, possibly overwriting that
67+
/// previously saved protobuf file.
68+
///
69+
/// Lastly a TuningParameterFixer function can be specified to limit the
70+
/// search space (i.e. when certain parameters are known to be good/bad
71+
/// independently on a particular TC).
72+
///
73+
/// \return a vector MappingOptions, if it is empty then tuning did not find
74+
/// a single good configuration. This should be a very rare occurrence but
75+
/// it is possible in particular if the skipExecutionOrWarmup function is too
76+
/// aggressive and the problem size is too small. If the vector is not empty
77+
/// it contains the best performing options for the particular Backend,
78+
/// ranked by execution speed, where result[0] is the fastest.
79+
std::vector<MappingOptionsType> tune(
80+
const std::string& tcEntryPoint,
81+
const std::vector<at::Tensor>& inputs,
82+
const MappingOptionsType& baseMapping,
83+
const std::string& cacheFileName = "",
84+
const tc::autotune::TuningParameterFixer& fixedParams = {});
85+
86+
protected:
87+
/// The TC string is stored internally so we can tune independent TC
88+
/// functions on demand.
89+
const std::string tc_;
90+
};
91+
} // namespace aten
92+
} // namespace tc
93+
94+
#include "tc/aten/aten_autotuner-inl.h"

0 commit comments

Comments
 (0)