Merge pull request #408 from nicolasvasilache/pr/fix-fbcode-issues

nicolasvasilache · web-flow · commit 72e0703906a6 · 2018-05-07T19:31:18.000-04:00
Fix fbcode issues
diff --git a/benchmarks_python/caffe2_benchmark.py b/benchmarks_python/caffe2_benchmark.py
@@ -51,7 +51,7 @@ def GetArgumentParser():
     parser.add_argument("--tuner_cache_file", type=str,
                         default="tuner_cache",
                         help="File to store tuned mapping options")
-    parser.add_argument("--tuner_gpus", type=str,
+    parser.add_argument("--tuner_devices", type=str,
                         default="0",
                         help="String representation of gpus to use for tuning (e.g. \"0,1\")")
     parser.add_argument("--tuner_threads", type=int, default=10,
@@ -70,7 +70,7 @@ def main():
     core.GlobalInit([
         'tc_bench',
         '--caffe2_logging_operator_dyno_sampling_rate=0',
-        '--tuner_gpus=' + args.tuner_gpus,
+        '--tuner_devices=' + args.tuner_devices,
         '--caffe2_simple_net_benchmark_run_whole_net=0',
     ] + extra_args)
     mapping_options = tune(args)
diff --git a/tc/core/polyhedral/codegen_llvm.cc b/tc/core/polyhedral/codegen_llvm.cc
@@ -614,31 +614,32 @@ IslCodegenRes codegenISL(const Scop& scop) {
     auto collectIteratorMaps =
         [](isl::ast_node node,
            isl::ast_build build,
-           IteratorMapsType& iteratorMaps,
-           const Scop& scop,
-           StmtSubscriptExprMapType& stmtSubscripts) -> isl::ast_node {
+           IteratorMapsType& iteratorMapsInFun,
+           const Scop& scopInFun,
+           StmtSubscriptExprMapType& stmtSubscriptsInFun) -> isl::ast_node {
       auto user = node.as<isl::ast_node_user>();
       CHECK(user);
       auto expr = user.get_expr().as<isl::ast_expr_op>();
       auto schedule = build.get_schedule();
       auto scheduleMap = isl::map::from_union_map(schedule);
 
       auto stmtId = expr.get_arg(0).as<isl::ast_expr_id>().get_id();
-      CHECK_EQ(0u, iteratorMaps.count(stmtId)) << "entry exists: " << stmtId;
+      CHECK_EQ(0u, iteratorMapsInFun.count(stmtId))
+          << "entry exists: " << stmtId;
       auto iteratorMap = isl::pw_multi_aff(scheduleMap.reverse());
-      auto iterators = scop.halide.iterators.at(stmtId);
-      auto& stmtIteratorMap = iteratorMaps[stmtId];
+      auto iterators = scopInFun.halide.iterators.at(stmtId);
+      auto& stmtIteratorMap = iteratorMapsInFun[stmtId];
       for (size_t i = 0; i < iterators.size(); ++i) {
         auto expr = build.expr_from(iteratorMap.get_pw_aff(i));
         stmtIteratorMap.emplace(iterators[i], expr);
       }
-      auto& subscripts = stmtSubscripts[stmtId];
-      auto provide =
-          scop.halide.statements.at(stmtId).as<Halide::Internal::Provide>();
+      auto& subscripts = stmtSubscriptsInFun[stmtId];
+      auto provide = scopInFun.halide.statements.at(stmtId)
+                         .as<Halide::Internal::Provide>();
       for (auto e : provide->args) {
         const auto& map = iteratorMap;
         auto space = map.get_space().params();
-        auto aff = scop.makeIslAffFromStmtExpr(stmtId, space, e);
+        auto aff = scopInFun.makeIslAffFromStmtExpr(stmtId, space, e);
         auto pulled = isl::pw_aff(aff).pullback(map);
         CHECK_EQ(pulled.n_piece(), 1);
         subscripts.push_back(build.expr_from(pulled));
diff --git a/tc/core/polyhedral/cuda/mapped_scop.cc b/tc/core/polyhedral/cuda/mapped_scop.cc
@@ -463,14 +463,16 @@ isl::union_set modifyMappingNames(
     space = space.set_dim_name(isl::dim_type::param, dim, name + suffix);
   }
   auto newSet = isl::union_set::empty(space);
-  set.foreach_set([&newSet, &identifiers, &suffix](isl::set set) {
+  set.foreach_set([&newSet, &identifiers, &suffix](isl::set setInFun) {
     for (auto id : identifiers) {
       auto name = id.get_name();
-      auto dim = set.get_space().find_dim_by_name(isl::dim_type::param, name);
+      auto dim =
+          setInFun.get_space().find_dim_by_name(isl::dim_type::param, name);
       CHECK_LE(0, dim);
-      set = set.set_dim_name(isl::dim_type::param, dim, name + suffix);
+      setInFun =
+          setInFun.set_dim_name(isl::dim_type::param, dim, name + suffix);
     }
-    newSet = newSet.unite(set);
+    newSet = newSet.unite(setInFun);
   });
   return newSet;
 }
diff --git a/tc/library/common.h b/tc/library/common.h
@@ -17,7 +17,7 @@
 
 namespace tc {
 
-std::string replaceString(
+inline std::string replaceString(
     std::string str,
     const std::string& search,
     const std::string& replace) {
diff --git a/test/caffe2/test_harness-inl.h b/test/caffe2/test_harness-inl.h
@@ -19,10 +19,7 @@ namespace caffe2 {
 
 namespace detail {
 
-std::mutex& RNGMutex() {
-  static std::mutex rng_mutex;
-  return rng_mutex;
-}
+std::mutex& RNGMutex();
 
 template <typename T>
 T* NewTensor(
@@ -97,9 +94,9 @@ at::Tensor MakeAtenTensor(
 
 template <
     typename Backend,
-    class IterableInputs = std::initializer_list<string>,
-    class IterableOutputs = std::initializer_list<string>,
-    class IterableArgs = std::initializer_list<Argument>>
+    class IterableInputs,
+    class IterableOutputs,
+    class IterableArgs>
 OperatorDef MakeOperatorDef(
     std::string type,
     IterableInputs ins,
diff --git a/test/caffe2/test_harness.cc b/test/caffe2/test_harness.cc
@@ -16,6 +16,20 @@
 #include "test_harness.h"
 
 namespace caffe2 {
+namespace detail {
+
+std::mutex& RNGMutex() {
+  static std::mutex rng_mutex;
+  return rng_mutex;
+}
+
+} // namespace detail
+
+ReferenceImplementationBuilder MakeDefaultReferenceImplementationBuilder() {
+  return [](const OperatorDef& op_def, NetDef* net_def) {
+    caffe2::ReferenceImplementationRegistry::Append(net_def, op_def);
+  };
+}
 
 void CheckEqual(
     const caffe2::Tensor<caffe2::CPUContext>& Texpected,
diff --git a/test/caffe2/test_harness.h b/test/caffe2/test_harness.h
@@ -87,11 +87,7 @@ at::Tensor MakeAtenTensor(
 using ReferenceImplementationBuilder =
     std::function<void(const OperatorDef& op_def, NetDef* net_def)>;
 
-ReferenceImplementationBuilder MakeDefaultReferenceImplementationBuilder() {
-  return [](const OperatorDef& op_def, NetDef* net_def) {
-    caffe2::ReferenceImplementationRegistry::Append(net_def, op_def);
-  };
-}
+ReferenceImplementationBuilder MakeDefaultReferenceImplementationBuilder();
 
 /// Creates an OperatorDef for a particular Backend
 /// op_name is the name of the operator (e.g. TcOp)
diff --git a/test/cuda/test_tc_mapper.cc b/test/cuda/test_tc_mapper.cc
@@ -253,18 +253,17 @@ TEST_F(TcCudaMapperTest, BatchTripleHadamard) {
   at::Tensor V = at::CUDA(at::kFloat).rand({B, D});
   at::Tensor W = at::CUDA(at::kFloat).rand({B, D});
   std::vector<at::Tensor> inputs = {U, V, W};
-  std::vector<at::Tensor> outputs;
 
   static constexpr auto TC = R"TC(
 def batch_triple_hadamard(float(B, D) U, float(B, D) V, float(B, D) W) -> (Z) {
     Z(b, d) = U(b, d) * V(b, d) * W(b, d)
 }
   )TC";
 
-  auto checkFun = [=](const std::vector<at::Tensor>& inputs,
-                      std::vector<at::Tensor>& outputs) {
-    at::Tensor diff = outputs[0].sub(inputs[0] * inputs[1] * inputs[2]);
-    checkRtol(diff, inputs, D);
+  auto checkFun = [=](const std::vector<at::Tensor>& ins,
+                      std::vector<at::Tensor>& outs) {
+    at::Tensor diff = outs[0].sub(ins[0] * ins[1] * ins[2]);
+    checkRtol(diff, ins, D);
   };
   Check(
       TC,
@@ -283,16 +282,15 @@ TEST_F(TcCudaMapperTest, TensorDot) {
   at::Tensor I0 = at::CUDA(at::kFloat).rand({N, C1, C2, H, W});
   at::Tensor I1 = at::CUDA(at::kFloat).rand({N, C2, C3, H, W});
   std::vector<at::Tensor> inputs = {I0, I1};
-  std::vector<at::Tensor> outputs;
 
   static constexpr auto TC = R"TC(
 def tensordot(float(N, C1, C2, H, W) I0, float(N, C2, C3, H, W) I1) -> (O) {
     O(n, c1, c3, h, w) +=! I0(n, c1, r_c2, h, w) * I1(n, r_c2, c3, h, w)
 }
   )TC";
   // No defaults for this case
-  auto checkFun = [](const std::vector<at::Tensor>& inputs,
-                     std::vector<at::Tensor>& outputs) { return true; };
+  auto checkFun = [](const std::vector<at::Tensor>& ins,
+                     std::vector<at::Tensor>& outs) { return true; };
   auto options = tc::CudaMappingOptions::makeNaiveMappingOptions();
   auto name = "tensordot";
   Check(TC, name, options, inputs, checkFun);
@@ -309,21 +307,20 @@ TEST_F(TcCudaMapperTest, LUT) {
   at::Tensor I =
       at::CUDA(at::kFloat).rand({B, N}).mul_(B).floor_().toType(at::kInt);
   std::vector<at::Tensor> inputs = {LUT, I};
-  std::vector<at::Tensor> outputs;
 
   static constexpr auto TC = R"TC(
 def fun(float(B, R) LUT, int32(B, N) I) -> (O) {
   O(b, n) +=! LUT(I(b, n), r_r)
 }
 )TC";
 
-  auto checkFun = [=](const std::vector<at::Tensor>& inputs,
-                      std::vector<at::Tensor>& outputs) {
-    at::Tensor LUT = inputs[0].toBackend(at::kCPU);
-    at::Tensor I = inputs[1].toBackend(at::kCPU);
-    at::Tensor O = outputs[0].toBackend(at::kCPU);
-    auto LUTAccessor = LUT.accessor<float, 2>();
-    auto IAccessor = I.accessor<int, 2>();
+  auto checkFun = [=](const std::vector<at::Tensor>& ins,
+                      std::vector<at::Tensor>& outs) {
+    at::Tensor lut = ins[0].toBackend(at::kCPU);
+    at::Tensor in = ins[1].toBackend(at::kCPU);
+    at::Tensor O = outs[0].toBackend(at::kCPU);
+    auto LUTAccessor = lut.accessor<float, 2>();
+    auto IAccessor = in.accessor<int, 2>();
     auto OAccessor = O.accessor<float, 2>();
     for (int b = 0; b < B; b++) {
       for (int n = 0; n < N; n++) {
@@ -337,7 +334,7 @@ def fun(float(B, R) LUT, int32(B, N) I) -> (O) {
       }
     }
 
-    checkRtol(O, inputs, 5e-7);
+    checkRtol(O, ins, 5e-7);
   };
   Check(
       TC,
@@ -361,7 +358,6 @@ TEST_F(TcCudaMapperTest, DISABLED_SpatialBatchNormalization) {
   at::Tensor rMeanIn = at::CUDA(at::kFloat).rand({C2});
   at::Tensor rVarIn = at::CUDA(at::kFloat).rand({C2});
   std::vector<at::Tensor> inputs = {momentum, eps, I, rMeanIn, rVarIn};
-  std::vector<at::Tensor> outputs;
 
   static constexpr auto TC = R"TC(
 def spatial_batch_norm(
@@ -382,8 +378,8 @@ def spatial_batch_norm(
     normalizedOut(n, c, h, w) =        O(n, c, h, w)
 })TC";
 
-  auto checkFun = [=](const std::vector<at::Tensor>& inputs,
-                      std::vector<at::Tensor>& outputs) {
+  auto checkFun = [=](const std::vector<at::Tensor>& ins,
+                      std::vector<at::Tensor>& outs) {
     TC_CUDA_RUNTIMEAPI_ENFORCE(cudaDeviceSynchronize());
     double prec = 3e-7;
     std::cout << "Checking expected output relative precision @" << prec;
@@ -400,8 +396,8 @@ def spatial_batch_norm(
         at::Scalar(momentum[0]).toFloat(),
         at::Scalar(eps[0]).toFloat(),
         true);
-    auto diff = O.sub(outputs[0]);
-    checkRtol(diff, inputs, N * H * W, prec);
+    auto diff = O.sub(outs[0]);
+    checkRtol(diff, ins, N * H * W, prec);
   };
 
   auto name = "spatial_batch_norm";
diff --git a/test_python/test_c2.py b/test_python/test_c2.py
@@ -31,7 +31,7 @@
 
 MATMUL_LANG = """
 def matmul(float(M,N) A, float(N,K) B) -> (output) {
-    output(m, n) +=! A(m, r_n) * B(r_n, k)
+    output(m, k) +=! A(m, r_n) * B(r_n, k)
 }
 """
 

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@`
`31`	`31`
`32`	`32`	`MATMUL_LANG = """`
`33`	`33`	`def matmul(float(M,N) A, float(N,K) B) -> (output) {`
`34`		`- output(m, n) +=! A(m, r_n) * B(r_n, k)`
	`34`	`+ output(m, k) +=! A(m, r_n) * B(r_n, k)`
`35`	`35`	`}`
`36`	`36`	`"""`
`37`	`37`