facebookresearch
diff --git a/‎examples/example_MLP_model.cc
Lines changed: 5 additions & 5 deletions b/‎examples/example_MLP_model.cc
Lines changed: 5 additions & 5 deletions
diff --git a/‎examples/example_batchmatmul.cc
Lines changed: 1 addition & 1 deletion b/‎examples/example_batchmatmul.cc
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/example_group_convolution.cc
Lines changed: 1 addition & 1 deletion b/‎examples/example_group_convolution.cc
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/example_tmm.cc
Lines changed: 1 addition & 1 deletion b/‎examples/example_tmm.cc
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/tc/c2/2lut_op.h
Lines changed: 1 addition & 1 deletion b/‎include/tc/c2/2lut_op.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/tc/c2/copy_op.h
Lines changed: 1 addition & 1 deletion b/‎include/tc/c2/copy_op.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/tc/c2/dper_lut_concat_op.h
Lines changed: 1 addition & 1 deletion b/‎include/tc/c2/dper_lut_concat_op.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/tc/c2/lut_op.h
Lines changed: 1 addition & 1 deletion b/‎include/tc/c2/lut_op.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/tc/c2/matmul_op.h
Lines changed: 1 addition & 1 deletion b/‎include/tc/c2/matmul_op.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/tc/core/cuda/cuda_mapping_options-inl.h
Lines changed: 0 additions & 106 deletions b/‎include/tc/core/cuda/cuda_mapping_options-inl.h
Lines changed: 0 additions & 106 deletions
@@ -521,7 +521,7 @@ TEST_F(ProductionModel, 1LUT) {
   auto L1 = FLAGS_L1;
   auto E1 = FLAGS_E1;
   auto options = tc::CudaMappingOptions::makeNaiveCudaMappingOptions()
-                     .tile({1, 32})
+                     .tile(1, 32)
                      .mapToThreads({1, 32})
                      .mapToBlocks({128, 128})
                      .unroll(256);
@@ -598,7 +598,7 @@ TEST_F(ProductionModel, 2LUT) {
   auto E1 = FLAGS_E1;
   auto E2 = FLAGS_E2;
   auto options = tc::CudaMappingOptions::makeNaiveCudaMappingOptions()
-                     .tile({1, 32})
+                     .tile(1, 32)
                      .mapToThreads({1, 32})
                      .mapToBlocks({128, 128})
                      .unroll(256);
@@ -688,7 +688,7 @@ TEST_F(ProductionModel, C3) {
   auto WY = FLAGS_WY;
   auto options = tc::CudaMappingOptions::makeNaiveCudaMappingOptions()
                      .fixParametersBeforeScheduling(true)
-                     .tile({32, 32, 32})
+                     .tile(32, 32, 32)
                      .mapToThreads({4, 32})
                      .mapToBlocks({128, 128})
                      .useSharedMemory(true)
@@ -783,7 +783,7 @@ TEST_F(ProductionModel, MLP1) {
   auto M = FLAGS_M;
   auto options = tc::CudaMappingOptions::makeNaiveCudaMappingOptions()
                      .fixParametersBeforeScheduling(true)
-                     .tile({16, 16, 128})
+                     .tile(16, 16, 128)
                      .mapToThreads({16, 16})
                      .mapToBlocks({32, 32})
                      .useSharedMemory(true)
@@ -882,7 +882,7 @@ TEST_F(ProductionModel, MLP3) {
   auto Q = FLAGS_Q;
   auto options = tc::CudaMappingOptions::makeNaiveCudaMappingOptions()
                      .fixParametersBeforeScheduling(true)
-                     .tile({16, 16, 128})
+                     .tile(16, 16, 128)
                      .mapToThreads({16, 16})
                      .mapToBlocks({32, 32})
                      .useSharedMemory(true)
 
@@ -117,7 +117,7 @@ TEST_F(BatchMatMul, TransposedBatchMatMul) {
   auto M = FLAGS_M;
   auto K = FLAGS_K;
   auto options = tc::CudaMappingOptions::makeNaiveCudaMappingOptions()
-                     .tile({1})
+                     .tile(1)
                      .mapToThreads({128})
                      .mapToBlocks({B})
                      .useSharedMemory(true)
 
@@ -177,7 +177,7 @@ TEST_F(GroupConvolution, GroupConvolution) {
   auto threads = (W >= 10) ? std::vector<size_t>{W / 4, H / 2}
                            : std::vector<size_t>{4, 8, 4};
   auto options = tc::CudaMappingOptions::makeNaiveCudaMappingOptions()
-                     .tile({1, 1, 1})
+                     .tile(1, 1, 1)
                      .mapToThreads(threads)
                      .mapToBlocks({32, 32})
                      .useSharedMemory(true)
 
@@ -111,7 +111,7 @@ TEST_F(TransposedMatMul, TransposedMatMul) {
   auto K = FLAGS_K;
   auto options = tc::CudaMappingOptions::makeNaiveCudaMappingOptions()
                      .fixParametersBeforeScheduling(true)
-                     .tile({32, 32, 32})
+                     .tile(32, 32, 32)
                      .mapToThreads({32, 32})
                      .mapToBlocks({M / 32, N / 32})
                      .useSharedMemory(true)
 
@@ -44,7 +44,7 @@ class Tc2LUTOp : public TcOp<T, Context, Engine> {
   void setupNaiveCudaMappingOptions() override {
     this->cudaMappingOptions_.mapToBlocks(256)
         .mapToThreads(64)
-        .tile({1})
+        .tile(1)
         .unroll(1)
         .useSharedMemory(false)
         .usePrivateMemory(false);
 
@@ -45,7 +45,7 @@ class TcCopyOp : public TcOp<T, Context, Engine> {
   void setupNaiveCudaMappingOptions() override {
     this->cudaMappingOptions_ =
         tc::CudaMappingOptions::makePointwiseCudaMappingOptions()
-            .tile({4, 8, 8})
+            .tile(4, 8, 8)
             .mapToThreads({32, 4, 4})
             .mapToBlocks({100, 100, 100})
             .unroll(128);
 
@@ -40,7 +40,7 @@ class TcDperLutConcatOp : public TcOp<T, Context, Engine> {
 
  protected:
   void setupNaiveCudaMappingOptions() override {
-    this->cudaMappingOptions_.tile({1})
+    this->cudaMappingOptions_.tile(1)
         .mapToThreads(128)
         .mapToBlocks(32)
         .unroll(1)
 
@@ -42,7 +42,7 @@ class TcLUTOp : public TcOp<T, Context, Engine> {
 
  protected:
   void setupNaiveCudaMappingOptions() override {
-    this->cudaMappingOptions_.tile({1})
+    this->cudaMappingOptions_.tile(1)
         .mapToThreads(64)
         .mapToBlocks(256)
         .unroll(1)
 
@@ -40,7 +40,7 @@ class TcMatMulOp : public TcOp<T, Context, Engine> {
 
  protected:
   void setupNaiveCudaMappingOptions() override {
-    this->cudaMappingOptions_.tile({16, 16, 32})
+    this->cudaMappingOptions_.tile(16, 16, 32)
         .mapToThreads(4, 32)
         .mapToBlocks(32, 32, 32)
         .unroll(1);
 
@@ -178,112 +178,6 @@ bool CudaMappingOptions::operator!=(const CudaMappingOptions& options) const {
       options.ownedProto_.SerializeAsString();
 }
 
-CudaMappingOptions& CudaMappingOptions::tile(
-    const std::vector<uint64_t>& sizes) {
-  generic.tile(sizes);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::tile(
-    std::initializer_list<uint64_t> sizes) {
-  generic.tile(sizes);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::tile(
-    const std::string& commaSeparatedSizes) {
-  generic.tile(commaSeparatedSizes);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::tile(const char* commaSeparatedSizes) {
-  generic.tile(commaSeparatedSizes);
-  return *this;
-}
-
-template <typename... Args>
-CudaMappingOptions& CudaMappingOptions::tile(Args... args) {
-  generic.tile(args...);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::unroll(uint64_t size) {
-  generic.unroll(size);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::fixParametersBeforeScheduling(bool b) {
-  generic.fixParametersBeforeScheduling(b);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::tileImperfectlyNested(bool b) {
-  generic.tileImperfectlyNested(b);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::matchLibraryCalls(bool b) {
-  generic.matchLibraryCalls(b);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::scheduleFusionStrategy(
-    FusionStrategy fs) {
-  generic.scheduleFusionStrategy(fs);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::scheduleFusionStrategy(
-    const std::string& str) {
-  generic.scheduleFusionStrategy(str);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::outerScheduleFusionStrategy(
-    FusionStrategy fs) {
-  generic.outerScheduleFusionStrategy(fs);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::outerScheduleFusionStrategy(
-    const std::string& str) {
-  generic.outerScheduleFusionStrategy(str);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::outerScheduleAllowSkewing(bool b) {
-  generic.outerScheduleAllowSkewing(b);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::outerSchedulePositiveOrthant(bool b) {
-  generic.outerSchedulePositiveOrthant(b);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::intraTileScheduleFusionStrategy(
-    FusionStrategy fs) {
-  generic.intraTileScheduleFusionStrategy(fs);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::intraTileScheduleFusionStrategy(
-    const std::string& str) {
-  generic.intraTileScheduleFusionStrategy(str);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::intraTileScheduleAllowSkewing(bool b) {
-  generic.intraTileScheduleAllowSkewing(b);
-  return *this;
-}
-
-CudaMappingOptions& CudaMappingOptions::intraTileSchedulePositiveOrthant(
-    bool b) {
-  generic.intraTileSchedulePositiveOrthant(b);
-  return *this;
-}
-
 CudaMappingOptions& CudaMappingOptions::mapToThreads(
     std::initializer_list<uint64_t> threads) {
   block = CudaDim(threads).view; // tmp CudaDim, copy, delete