Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 0258d2d

Browse files
CpuExecutor skeleton
1 parent 06ba78a commit 0258d2d

File tree

3 files changed

+176
-1
lines changed

3 files changed

+176
-1
lines changed

include/tc/core/cpu/cpu_tc_executor.h

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/**
2+
* Copyright (c) 2017-present, Facebook, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include <string>
19+
#include <vector>
20+
21+
#include <dlpack/dlpack.h>
22+
23+
#include "tc/core/halide_utils.h"
24+
#include "tc/core/mapping_options.h"
25+
#include "tc/core/polyhedral/scop.h"
26+
#include "tc/core/tc_executor.h"
27+
#include "tc/core/utils/dlpack.h"
28+
#include "tc/lang/parser.h"
29+
30+
namespace tc {
31+
32+
struct CpuRTCFunction {
33+
void clear() {}
34+
};
35+
36+
class CpuTcExecutor : public ::tc::TcExecutor {
37+
public:
38+
CpuTcExecutor(
39+
std::string id,
40+
const std::vector<const DLTensor*>& inputsInfo,
41+
const std::string& options,
42+
lang::TreeRef tcDefinition)
43+
: TcExecutor(id, inputsInfo, options, tcDefinition) {}
44+
45+
~CpuTcExecutor() {}
46+
47+
CpuTcExecutor(CpuTcExecutor&&) = delete;
48+
CpuTcExecutor& operator=(CpuTcExecutor&&) = delete;
49+
CpuTcExecutor(const CpuTcExecutor&) = delete;
50+
CpuTcExecutor& operator=(const CpuTcExecutor&) = delete;
51+
52+
// Can only be called once with specific kernel options. Input sizes are
53+
// set up as constructor argument and output sizes are inferred.
54+
//
55+
// If you need another kernel for another Tc or another inputs, outputs,
56+
// options then just instantiate another CpuTcExecutor.
57+
// This is because for the time being we fully specialize all the sizes and
58+
// strides at runtime.
59+
// @{
60+
void compile(const std::string& options) override {
61+
compile(MappingOptions(options));
62+
}
63+
void compile(const tc::MappingOptions& options);
64+
// @}
65+
66+
// Run can be called multiple times given a compilation, inputs are allowed
67+
// to change in that their data pointer is allowed to change.
68+
// Sizes and strides must remain constant otherwise this is an error
69+
// The only thing that is allowed to change across runs is the input
70+
// and output pointers base address.
71+
// It is the caller's responsibility to ensure proper non-aliasing (or
72+
// advanced aliasing) properties of the input and output tensors.
73+
// if profile is set the kernel runtime (nanoseconds) is returned
74+
Duration run(
75+
const std::vector<const DLTensor*>& inputs,
76+
const std::vector<DLTensor*>& outputs,
77+
bool profile = false) const;
78+
79+
// This is the "low-latency" mode in which we just propagate raw pointers to
80+
// data in GPU address space.
81+
// No tensor-related information can be checked so it is the user's
82+
// responsibility to ensure that shapes and strides match. If the user
83+
// doesn't then segfault will likely occur.
84+
void uncheckedRun(
85+
const std::vector<const void*>& inputs,
86+
const std::vector<void*>& outputs) const;
87+
88+
bool hasRuntimeCompiledFunction() override {
89+
return rtcFun.get() != nullptr;
90+
}
91+
92+
// It is necessary to clear the RTC manually because it can throw and we
93+
// can't have that in the destructor.
94+
void clearRuntimeCompiledFunction() override {
95+
if (!hasRuntimeCompiledFunction()) {
96+
return;
97+
}
98+
rtcFun->clear();
99+
}
100+
101+
std::string kernelName() const {
102+
return executionInfo_.kernelName;
103+
}
104+
105+
private:
106+
void compileWithTcMapper();
107+
108+
public:
109+
std::string kernelSpecializedName;
110+
std::string cpuSource;
111+
112+
protected:
113+
std::shared_ptr<CpuRTCFunction> rtcFun;
114+
};
115+
116+
} // namespace tc

src/core/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ add_library(
1414
halide2isl.cc
1515
halide_utils.cc
1616

17-
polyhedral/codegen.cc
17+
cpu/cpu_tc_executor.cc
18+
1819
polyhedral/codegen_cuda.cc
1920
polyhedral/codegen_llvm.cc
2021
polyhedral/llvm_jit.cc

src/core/cpu/cpu_tc_executor.cc

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/**
2+
* Copyright (c) 2017-present, Facebook, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include "tc/core/cpu/cpu_tc_executor.h"
17+
18+
#include "tc/core/halide_utils.h"
19+
#include "tc/core/mapping_options_cpp_printer.h"
20+
#include "tc/core/polyhedral/mapped_scop.h"
21+
#include "tc/core/tc2halide.h"
22+
#include "tc/core/utils/dlpack.h"
23+
24+
#include "tc/lang/parser.h"
25+
#include "tc/lang/sema.h"
26+
27+
#include <version.h>
28+
#include <utility>
29+
30+
namespace tc {
31+
32+
using namespace dlutils;
33+
34+
void CpuTcExecutor::compile(const tc::MappingOptions& options) {}
35+
36+
void CpuTcExecutor::compileWithTcMapper() {}
37+
38+
Duration CpuTcExecutor::run(
39+
const std::vector<const DLTensor*>& inputs,
40+
const std::vector<DLTensor*>& outputs,
41+
bool profile) const {
42+
CHECK(rtcFun) << "Can't launch uncompiled: " << executionInfo_.kernelName;
43+
CHECK_NE(executionInfo_.options, "");
44+
checkSizesAndStridesAreCompliant(
45+
inputs, executionInfo_.inputsInfo, halideComponents_.getDef().params());
46+
checkSizesAndStridesAreCompliant(
47+
outputs,
48+
executionInfo_.outputsInfo,
49+
halideComponents_.getDef().returns());
50+
51+
return Duration();
52+
}
53+
54+
void CpuTcExecutor::uncheckedRun(
55+
const std::vector<const void*>& inputs,
56+
const std::vector<void*>& outputs) const {}
57+
58+
} // namespace tc

0 commit comments

Comments
 (0)