Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 87caef1

Browse files
Merge pull request #429 from mingzhe09088/add-param
Add argument RuntimeInformation to uncheckedRun and run, so CUDA stre…
2 parents 0dddfce + 44aa049 commit 87caef1

File tree

8 files changed

+27
-12
lines changed

8 files changed

+27
-12
lines changed

tc/c2/tc_op.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,15 @@ class TcOp : public Operator<Context> {
119119
compiled_ = true;
120120
}
121121

122+
// Get CUDA stream id from C2
123+
tc::CudaBackend::RuntimeInformation info;
124+
info.stream = context_.cuda_stream();
125+
122126
// run
123127
if (!check_sizes_) {
124-
executor_->uncheckedRun(input_void_ptrs_, output_void_ptrs_);
128+
executor_->uncheckedRun(input_void_ptrs_, output_void_ptrs_, info);
125129
} else {
126-
executor_->run(raw_input_dl_tensors_, raw_output_dl_tensors_);
130+
executor_->run(raw_input_dl_tensors_, raw_output_dl_tensors_, info);
127131
}
128132
return true;
129133
}

tc/core/cpu/cpu_tc_executor.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ CpuCompilationResult CpuBackend::compileWithTcMapper(
5252

5353
void CpuTcExecutor::uncheckedRun(
5454
const std::vector<const void*>& inputs,
55-
const std::vector<void*>& outputs) const {
55+
const std::vector<void*>& outputs,
56+
typename CpuBackend::RuntimeInformation info) const {
5657
LOG(ERROR) << "NYI: CpuTcExecutor::uncheckedRun";
5758
}
5859

tc/core/cpu/cpu_tc_executor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ class CpuTcExecutor : public TcExecutor<CpuBackend> {
3939
/// doesn't then segfault will likely occur.
4040
void uncheckedRun(
4141
const std::vector<const void*>& inputs,
42-
const std::vector<void*>& outputs) const;
42+
const std::vector<void*>& outputs,
43+
typename CpuBackend::RuntimeInformation info) const;
4344

4445
/// Calls uncheckedRun and profiles the cpu overhead and kernel runtime
4546
/// (microseconds).

tc/core/cuda/cuda_backend.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ struct CudaCompilationResult {
4444
* synchronization information of a kernel.
4545
*/
4646
struct CudaRuntimeInformation {
47-
cudaStream_t stream{0};
47+
public:
48+
CudaRuntimeInformation() : stream(0) {}
49+
50+
public:
51+
cudaStream_t stream;
4852
};
4953

5054
struct CudaTcExecutor;

tc/core/cuda/cuda_tc_executor.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,16 +111,16 @@ CudaCompilationResult CudaBackend::compileWithTcMapper(
111111

112112
void CudaTcExecutor::uncheckedRun(
113113
const std::vector<const void*>& inputs,
114-
const std::vector<void*>& outputs) const {
114+
const std::vector<void*>& outputs,
115+
typename CudaBackend::RuntimeInformation info) const {
115116
CHECK(rtcFun_) << "No rtcFun_ attached, cannot launch";
116-
cudaStream_t stream = 0;
117117
CHECK_NE(grid_.view[0], 0u) << "Grid dims are not set up";
118118
CHECK_NE(block_.view[0], 0u) << "Block dims are not set up";
119119
rtcFun_->Launch(
120120
grid_.view.extractDefaultedArray(),
121121
block_.view.extractDefaultedArray(),
122122
0,
123-
stream,
123+
info.stream,
124124
parameters_,
125125
outputs,
126126
inputs);

tc/core/cuda/cuda_tc_executor.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,12 @@ class CudaTcExecutor : public TcExecutor<CudaBackend> {
3737
/// No tensor-related information can be checked so it is the user's
3838
/// responsibility to ensure that shapes and strides match. If the user
3939
/// doesn't then segfault will likely occur.
40+
/// May need to reconsider whether use pass by value or by reference
4041
void uncheckedRun(
4142
const std::vector<const void*>& inputs,
42-
const std::vector<void*>& outputs) const;
43+
const std::vector<void*>& outputs,
44+
typename CudaBackend::RuntimeInformation info =
45+
CudaBackend::RuntimeInformation()) const;
4346

4447
/// Calls uncheckedRun and profiles the cpu overhead and kernel runtime
4548
/// (microseconds).

tc/core/tc_executor-inl.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,16 @@ inline std::pair<std::vector<const void*>, std::vector<void*>> prepareRun(
109109
template <typename Backend>
110110
void TcExecutor<Backend>::run(
111111
const std::vector<const DLConstTensor*>& inputs,
112-
const std::vector<const DLTensor*>& outputs) const {
112+
const std::vector<const DLTensor*>& outputs,
113+
typename Backend::RuntimeInformation info) const {
113114
std::vector<const void*> rawInputs;
114115
std::vector<void*> rawOutputs;
115116
std::tie(rawInputs, rawOutputs) = detail::prepareRun(
116117
inputs, outputs, inputsInfo_, outputsInfo_, halideComponents_);
117118

118119
// Static dispatch instead of virtual functions requires this cast.
119120
static_cast<const typename Backend::ExecutorType&>(*this).uncheckedRun(
120-
rawInputs, rawOutputs);
121+
rawInputs, rawOutputs, info);
121122
}
122123

123124
template <typename Backend>

tc/core/tc_executor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ class TcExecutor {
8888
/// advanced aliasing) properties of the input and output tensors.
8989
void run(
9090
const std::vector<const DLConstTensor*>& inputs,
91-
const std::vector<const DLTensor*>& outputs) const;
91+
const std::vector<const DLTensor*>& outputs,
92+
typename Backend::RuntimeInformation info = {}) const;
9293

9394
/// Calls run and profiles the cpu overhead and kernel runtime (microseconds).
9495
/// \returns profiling information

0 commit comments

Comments
 (0)