facebookresearch
diff --git a/‎examples/CMakeLists.txt
Lines changed: 2 additions & 1 deletion b/‎examples/CMakeLists.txt
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/example_fixture.h
Lines changed: 5 additions & 4 deletions b/‎examples/example_fixture.h
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/aten/aten_compiler.cc renamed to ‎include/tc/aten/aten_compiler-inl.h
Lines changed: 15 additions & 8 deletions b/‎src/aten/aten_compiler.cc renamed to ‎include/tc/aten/aten_compiler-inl.h
Lines changed: 15 additions & 8 deletions
diff --git a/‎include/tc/aten/aten_compiler.h
Lines changed: 4 additions & 3 deletions b/‎include/tc/aten/aten_compiler.h
Lines changed: 4 additions & 3 deletions
diff --git a/‎include/tc/aten/utils-inl.h
Lines changed: 2 additions & 0 deletions b/‎include/tc/aten/utils-inl.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎include/tc/aten/utils.h
Lines changed: 2 additions & 0 deletions b/‎include/tc/aten/utils.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎include/tc/core/execution_engine-inl.h
Lines changed: 12 additions & 12 deletions b/‎include/tc/core/execution_engine-inl.h
Lines changed: 12 additions & 12 deletions
diff --git a/‎include/tc/core/execution_engine.h
Lines changed: 2 additions & 2 deletions b/‎include/tc/core/execution_engine.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/CMakeLists.txt
Lines changed: 0 additions & 4 deletions b/‎src/CMakeLists.txt
Lines changed: 0 additions & 4 deletions
diff --git a/‎src/aten/CMakeLists.txt
Lines changed: 0 additions & 28 deletions b/‎src/aten/CMakeLists.txt
Lines changed: 0 additions & 28 deletions
@@ -33,7 +33,6 @@ foreach(i ${EXAMPLES_FILES})
   add_test(${i} ${i})
   target_link_libraries(
      ${i}
-     tc_aten
      tc_autotuner
      tc_core
      tc_c2
@@ -43,5 +42,7 @@ foreach(i ${EXAMPLES_FILES})
      ${GTEST_LIBS}
      ${GFLAGS_LIBRARIES}
      ${GLOG_LIBRARIES}
+
+     ${ATEN_LIBRARIES}
   )
 endforeach()
@@ -32,6 +32,7 @@
 #include "tc/core/cuda/cuda.h"
 #include "tc/core/cuda/cuda_compilation_cache.h"
 #include "tc/core/cuda/cuda_rtc.h"
+#include "tc/core/cuda/cuda_tc_executor.h"
 #include "tc/core/flags.h"
 #include "tc/core/mapping_options.h"
 #include "tc/core/scope_guard.h"
@@ -63,7 +64,7 @@ std::vector<const DLTensor*> inferOutputTensorInfo(
     const std::string& tc,
     const std::string& name,
     const std::vector<at::Tensor>& inputs) {
-  tc::ATenCompilationUnit atCompl;
+  tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
   atCompl.define(tc);
   return atCompl.inferOutputTensorInfo(name, inputs);
 }
@@ -133,7 +134,7 @@ struct Benchmark : public ::testing::Test {
                                   std::vector<at::Tensor>& outputs) {
         return true;
       }) {
-    tc::ATenCompilationUnit atCompl;
+    tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
     atCompl.define(tc);
     auto handle = atCompl.compile(name, inputs, mappingOptions);
     atCompl.run(name, inputs, outputs, handle);
@@ -281,7 +282,7 @@ struct Benchmark : public ::testing::Test {
     tc::CudaCache::loadCacheFromProtobuf(tc::makeCudaFilename(cacheFilename));
     tc::FLAGS_tuner_gen_restore_number = 1;
 
-    tc::ATenCompilationUnit atCompl;
+    tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
     atCompl.define(tc);
 
     auto mappingOptions = [&]() {
@@ -399,7 +400,7 @@ struct Benchmark : public ::testing::Test {
         return *options;
       }();
 
-      tc::ATenCompilationUnit atCompl;
+      tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
       atCompl.define(TC);
       auto handle = atCompl.compile(kernelName, inputs, bestOptions);
       std::vector<at::Tensor> outputs;
 
@@ -23,12 +23,14 @@
 
 namespace tc {
 
-ATenCompilationUnit::ATenCompilationUnit() {
-  executionEngine_ = std::unique_ptr<ExecutionEngine<CudaTcExecutor>>(
-      new ExecutionEngine<CudaTcExecutor>());
+template <typename ExecutorType>
+ATenCompilationUnit<ExecutorType>::ATenCompilationUnit() {
+  executionEngine_ = std::unique_ptr<ExecutionEngine<ExecutorType>>(
+      new ExecutionEngine<ExecutorType>());
 }
 
-void ATenCompilationUnit::define(const std::string& language) {
+template <typename ExecutorType>
+void ATenCompilationUnit<ExecutorType>::define(const std::string& language) {
   executionEngine_->define(language);
 }
 
@@ -67,7 +69,8 @@ void prepareOutputs(
 
 } // namespace
 
-size_t ATenCompilationUnit::compile(
+template <typename ExecutorType>
+size_t ATenCompilationUnit<ExecutorType>::compile(
     const std::string& name,
     const std::vector<at::Tensor>& inputs,
     const MappingOptions& options) {
@@ -77,7 +80,9 @@ size_t ATenCompilationUnit::compile(
       name, inputDLTensorsPair.first, options.toProtobufSerializedString());
 }
 
-std::vector<const DLTensor*> ATenCompilationUnit::inferOutputTensorInfo(
+template <typename ExecutorType>
+std::vector<const DLTensor*>
+ATenCompilationUnit<ExecutorType>::inferOutputTensorInfo(
     const std::string& name,
     const std::vector<at::Tensor>& inputs) {
   auto inputDLTensorsPair = toConstDlpackTensors(inputs);
@@ -86,7 +91,8 @@ std::vector<const DLTensor*> ATenCompilationUnit::inferOutputTensorInfo(
       name, inputDLTensorsPair.first);
 }
 
-Duration ATenCompilationUnit::run(
+template <typename ExecutorType>
+Duration ATenCompilationUnit<ExecutorType>::run(
     const std::string& name,
     const std::vector<at::Tensor>& inputs,
     std::vector<at::Tensor>& outputs,
@@ -105,7 +111,8 @@ Duration ATenCompilationUnit::run(
       handle, inputDLTensorsPair.first, outputDLTensorsPair.first, profile);
 }
 
-void ATenCompilationUnit::uncheckedRun(
+template <typename ExecutorType>
+void ATenCompilationUnit<ExecutorType>::uncheckedRun(
     const std::vector<at::Tensor>& inputs,
     std::vector<at::Tensor>& outputs,
     size_t handle) {
 
@@ -23,15 +23,14 @@
 #include <ATen/DLConvertor.h>
 
 #include "tc/aten/utils.h"
-#include "tc/core/cuda/cuda.h"
-#include "tc/core/cuda/cuda_tc_executor.h"
 #include "tc/core/execution_engine.h"
 #include "tc/lang/parser.h"
 
 namespace tc {
 /// This provides the basic interface for writing ATen style tensor operations
 /// based on Tensor Comprehensions.
 
+template <typename ExecutorType>
 class ATenCompilationUnit {
  public:
   explicit ATenCompilationUnit();
@@ -72,6 +71,8 @@ class ATenCompilationUnit {
       size_t handle);
 
  private:
-  std::unique_ptr<ExecutionEngine<CudaTcExecutor>> executionEngine_;
+  std::unique_ptr<ExecutionEngine<ExecutorType>> executionEngine_;
 };
 } // namespace tc
+
+#include "tc/aten/aten_compiler-inl.h"
@@ -21,6 +21,7 @@
 #include <ATen/ATen.h>
 #include <ATen/DLConvertor.h>
 namespace tc {
+namespace {
 std::pair<std::vector<DLTensor*>, std::vector<DLManagedTensor*>>
 toDlpackTensors(const std::vector<at::Tensor>& tensors) {
   std::vector<DLTensor*> dlTensors;
@@ -50,4 +51,5 @@ void deleteDlmTensors(std::vector<DLManagedTensor*>& tensors) {
     tensor->deleter(tensor);
   }
 }
+} // namespace
 } // namespace tc
@@ -22,13 +22,15 @@
 #include <ATen/DLConvertor.h>
 
 namespace tc {
+namespace {
 std::pair<std::vector<DLTensor*>, std::vector<DLManagedTensor*>>
 toDlpackTensors(const std::vector<at::Tensor>& tensors);
 
 std::pair<std::vector<const DLTensor*>, std::vector<DLManagedTensor*>>
 toConstDlpackTensors(const std::vector<at::Tensor>& tensors);
 
 void deleteDlmTensors(std::vector<DLManagedTensor*>& tensors);
+} // namespace
 } // namespace tc
 
 #include "tc/aten/utils-inl.h"
@@ -67,11 +67,11 @@ ExecutionEngine<ExecutorType>::inferOutputTensorInfo(
     CHECK_EQ(1, tcNameMap_.count(name))
         << "attempting to access undefined function " << name;
     // If we have already compiled for the given inputs, regardless of
-    // the options, we can get sizes from a corresponding TcExecutor.
+    // the options, we can get sizes from a corresponding ExecutorType.
     auto e = std::find_if(
         executors_.begin(),
         executors_.end(),
-        [&](const std::unique_ptr<TcExecutor>& e) {
+        [&](const std::unique_ptr<ExecutorType>& e) {
           return e && name == e->identifier &&
               compareDLTensorVectorMetadata(
                      extractRawPtrs(e->inputsInfo), inputs);
@@ -85,7 +85,7 @@ ExecutionEngine<ExecutorType>::inferOutputTensorInfo(
   // null options. It will be used for further size queries but
   // will fail if somebody attempts to run it.
   auto executor =
-      tc::make_unique<TcExecutor>(name, inputs, "", tcNameMap_.at(name));
+      tc::make_unique<ExecutorType>(name, inputs, "", tcNameMap_.at(name));
   auto outputsInfo = executor->inferOutputTensorInfo();
   emplaceExecutor(std::move(executor));
   return outputsInfo;
@@ -114,16 +114,16 @@ size_t ExecutionEngine<ExecutorType>::compile(
   return handle;
 }
 
-// Steal TcExecutor and give it back under lock
-// Run outside of lock on owning TcExecutor.
+// Steal ExecutorType and give it back under lock
+// Run outside of lock on owning ExecutorType.
 template <typename ExecutorType>
 Duration ExecutionEngine<ExecutorType>::run(
     size_t handle,
     const std::vector<const DLTensor*>& inputs,
     const std::vector<DLTensor*>& outputs,
     bool profile,
     std::function<bool(const ExecutorType*)> pruningFunction) {
-  std::unique_ptr<TcExecutor> executorUPtr(nullptr);
+  std::unique_ptr<ExecutorType> executorUPtr(nullptr);
   {
     std::lock_guard<std::mutex> lg(tcExecutorMutex_);
     std::swap(executorUPtr, executors_[handle]);
@@ -155,14 +155,14 @@ Duration ExecutionEngine<ExecutorType>::run(
   return res;
 }
 
-// Steal TcExecutor and give it back under lock
-// Run outside of lock on owning TcExecutor.
+// Steal ExecutorType and give it back under lock
+// Run outside of lock on owning ExecutorType.
 template <typename ExecutorType>
 void ExecutionEngine<ExecutorType>::uncheckedRun(
     size_t handle,
     const std::vector<const void*>& inputs,
     const std::vector<void*>& outputs) {
-  std::unique_ptr<TcExecutor> executorUPtr(nullptr);
+  std::unique_ptr<ExecutorType> executorUPtr(nullptr);
   {
     std::lock_guard<std::mutex> lg(tcExecutorMutex_);
     std::swap(executorUPtr, executors_[handle]);
@@ -193,12 +193,12 @@ template <typename ExecutorType>
 void ExecutionEngine<ExecutorType>::clear(size_t handle) {
   std::lock_guard<std::mutex> lg(tcExecutorMutex_);
   executors_[handle]->clearRuntimeCompiledFunction();
-  executors_[handle] = std::unique_ptr<TcExecutor>(nullptr);
+  executors_[handle] = std::unique_ptr<ExecutorType>(nullptr);
 }
 
 template <typename ExecutorType>
 size_t ExecutionEngine<ExecutorType>::emplaceExecutor(
-    std::unique_ptr<TcExecutor> executorUPtr) {
+    std::unique_ptr<ExecutorType> executorUPtr) {
   // Insert in vector under lock
   std::lock_guard<std::mutex> lg(tcExecutorMutex_);
   size_t handle = uidCounter++;
@@ -219,7 +219,7 @@ size_t ExecutionEngine<ExecutorType>::getHandle(
   auto it = std::find_if(
       executors_.begin(),
       executors_.end(),
-      [&](const std::unique_ptr<TcExecutor>& e) {
+      [&](const std::unique_ptr<ExecutorType>& e) {
         return e && // UPtrs get stolen by run to avoid underlying vector
                     // realloc issues, guard against that
             name == e->identifier &&
 
@@ -85,7 +85,7 @@ class ExecutionEngine {
   void clear(size_t handle);
 
  protected:
-  size_t emplaceExecutor(std::unique_ptr<TcExecutor> p);
+  size_t emplaceExecutor(std::unique_ptr<ExecutorType> p);
 
   size_t getHandle(
       const std::string& name,
@@ -100,7 +100,7 @@ class ExecutionEngine {
 
   /// List of executors, indexed by handle.  Derived ExecutionEngines can also
   /// derive TcExecutor.
-  std::vector<std::unique_ptr<TcExecutor>> executors_;
+  std::vector<std::unique_ptr<ExecutorType>> executors_;
 
   size_t uidCounter = 0;
 };
 
@@ -7,10 +7,6 @@ add_subdirectory(version)
 add_subdirectory(core)
 add_subdirectory(autotuner)
 
-if (WITH_CUDA)
-  add_subdirectory(aten)
-endif()
-
 if (WITH_CAFFE2 AND WITH_CUDA)
   add_subdirectory(c2)
 endif()
Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@`
`21`	`21`	`#include <ATen/ATen.h>`
`22`	`22`	`#include <ATen/DLConvertor.h>`
`23`	`23`	`namespace tc {`
	`24`	`+namespace {`
`24`	`25`	`std::pair<std::vector<DLTensor>, std::vector<DLManagedTensor>>`
`25`	`26`	`toDlpackTensors(const std::vector<at::Tensor>& tensors) {`
`26`	`27`	`std::vector<DLTensor*> dlTensors;`
`@@ -50,4 +51,5 @@ void deleteDlmTensors(std::vector<DLManagedTensor*>& tensors) {`
`50`	`51`	`tensor->deleter(tensor);`
`51`	`52`	`}`
`52`	`53`	`}`
	`54`	`+} // namespace`
`53`	`55`	`} // namespace tc`