Merge pull request #268 from facebookresearch/pr/rename

nicolasvasilache · web-flow · commit c60eff392ff6 · 2018-04-07T15:12:33.000+02:00
rename *Idxx* to *IdxX*
diff --git a/include/tc/core/polyhedral/cuda/mapped_scop.h b/include/tc/core/polyhedral/cuda/mapped_scop.h
@@ -179,7 +179,7 @@ class MappedScop {
   // XXX: this is a partially redundant state as this information can
   // potentially be extracted from the schedule tree; however, until we get a
   // first-class MappingNode, it requires some dirty hacks.
-  ThreadIdxxScheduleDepthState threadIdxxScheduleDepthState;
+  ThreadIdxXScheduleDepthState threadIdxXScheduleDepthState;
 
  private:
   // Information about a detected reduction that can potentially
diff --git a/include/tc/core/polyhedral/cuda/memory_promotion_heuristic.h b/include/tc/core/polyhedral/cuda/memory_promotion_heuristic.h
@@ -22,7 +22,7 @@
 
 namespace tc {
 namespace polyhedral {
-using ThreadIdxxScheduleDepthState =
+using ThreadIdxXScheduleDepthState =
     std::vector<std::pair<isl::union_set, size_t>>;
 
 class MappedScop;
@@ -32,19 +32,19 @@ class Scop;
 // promote to shared memory at "depth" until "sharedMemorySize" is used.
 // Map copies between global and shared memory to threads and unroll those
 // copies if "unrollCopies" is set, using the options in "mscop".
-// "threadIdxxScheduleDepthState" contains the schedule depth at which the
+// "threadIdxXScheduleDepthState" contains the schedule depth at which the
 // computation was mapped to thread x and is used to check whether the global
 // memory is accessed in a coalesced way.
 void promoteGreedilyAtDepth(
     MappedScop& scop,
-    const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
+    const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
     std::size_t depth,
     std::size_t sharedMemorySize,
     bool unrollCopies);
 
 void promoteToRegistersBelowThreads(
     Scop& scop,
-    const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
+    const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
     std::size_t nRegisters);
 } // namespace polyhedral
 } // namespace tc
diff --git a/src/core/polyhedral/cuda/mapped_scop.cc b/src/core/polyhedral/cuda/mapped_scop.cc
@@ -113,7 +113,7 @@ void MappedScop::mapRemaining(
 
   for (size_t i = nMapped; i < nToMap; ++i) {
     if (MappingTypeId::makeId(i) == mapping::ThreadId::x()) {
-      threadIdxxScheduleDepthState.emplace_back(std::make_pair(
+      threadIdxXScheduleDepthState.emplace_back(std::make_pair(
           activeDomainPoints(schedule(), tree),
           tree->scheduleDepth(schedule())));
     }
@@ -179,7 +179,7 @@ void fixThreadsBelowFilter(
       // Mapping happend below filterTree, so we need points active for its
       // children.  After insertion, filterTree is guaranteed to have at least
       // one child.
-      mscop.threadIdxxScheduleDepthState.emplace_back(std::make_pair(
+      mscop.threadIdxXScheduleDepthState.emplace_back(std::make_pair(
           activeDomainPoints(mscop.schedule(), filterTree->child({0})),
           filterTree->scheduleDepth(mscop.schedule())));
     }
@@ -339,7 +339,7 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band, size_t nInner) {
       return nInner;
     }
     CHECK(reductionBandUpdates_.at(band).separated);
-    threadIdxxScheduleDepthState.emplace_back(std::make_pair(
+    threadIdxXScheduleDepthState.emplace_back(std::make_pair(
         activeDomainPoints(schedule(), band),
         band->scheduleDepth(schedule()) + 0));
     band = map(band, 0, mapping::ThreadId::x());
@@ -380,7 +380,7 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band, size_t nInner) {
        ++i, --dim) {
     auto id = mapping::ThreadId::makeId(nInner + i);
     if (id == mapping::ThreadId::x()) {
-      threadIdxxScheduleDepthState.emplace_back(std::make_pair(
+      threadIdxXScheduleDepthState.emplace_back(std::make_pair(
           activeDomainPoints(schedule(), band),
           band->scheduleDepth(schedule()) + dim));
     }
@@ -677,7 +677,7 @@ std::unique_ptr<MappedScop> MappedScop::makeWithOuterBlockInnerThreadStrategy(
 
       promoteGreedilyAtDepth(
           *mappedScop,
-          mappedScop->threadIdxxScheduleDepthState,
+          mappedScop->threadIdxXScheduleDepthState,
           std::min(band->nOuterCoincident(), mappedScop->numBlocks.view.size()),
           sharedMemorySize,
           cudaOptions.proto().unroll_copy_shared() &&
@@ -695,7 +695,7 @@ std::unique_ptr<MappedScop> MappedScop::makeWithOuterBlockInnerThreadStrategy(
   // 8. Promote to registers below the loops mapped to threads.
   if (cudaOptions.proto().use_private_memory()) {
     promoteToRegistersBelowThreads(
-        mappedScop->scop(), mappedScop->threadIdxxScheduleDepthState, -1ull);
+        mappedScop->scop(), mappedScop->threadIdxXScheduleDepthState, -1ull);
   }
 
   // 9. Insert mapping context
diff --git a/src/core/polyhedral/cuda/memory_promotion_heuristic.cc b/src/core/polyhedral/cuda/memory_promotion_heuristic.cc
@@ -253,11 +253,11 @@ isl::map makeNextElementMap(isl::space setSpace, int dim) {
 // Obtain the depth of the schedule dimension that was mapped to threadIdx.x
 // for the domain elements identified by "s".  Assumes the depth is the same
 // for all these elements.
-size_t computeThreadIdxxScheduleDepth(
-    const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
+size_t computeThreadIdxXScheduleDepth(
+    const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
     isl::union_set s) {
   std::unordered_set<size_t> depths;
-  for (auto p : threadIdxxScheduleDepthState) {
+  for (auto p : threadIdxXScheduleDepthState) {
     if (!p.first.intersect(s).is_empty()) {
       depths.insert(p.second);
     }
@@ -284,7 +284,7 @@ size_t computeThreadIdxxScheduleDepth(
  * a coalesced way.
  */
 bool isCoalesced(
-    const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
+    const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
     const TensorReferenceGroup& group,
     isl::union_map schedule,
     isl::union_set activePoints) {
@@ -296,16 +296,16 @@ bool isCoalesced(
       auto elementToNext = makeNextElementMap(
           tensorSpace, tensorSpace.dim(isl::dim_type::set) - 1);
       auto domainUMap = isl::union_set(isl::set(access.domain()));
-      int threadIdxxDepth = computeThreadIdxxScheduleDepth(
-          threadIdxxScheduleDepthState, domainUMap.intersect(activePoints));
+      int threadIdxXDepth = computeThreadIdxXScheduleDepth(
+          threadIdxXScheduleDepthState, domainUMap.intersect(activePoints));
       auto partialScheduleUMap =
           schedule.intersect_domain(domainUMap.universe());
       if (partialScheduleUMap.n_map() != 1) {
         throw promotion::PromotionLogicError("expected single schedule space");
       }
       auto partialSchedule = isl::map::from_union_map(partialScheduleUMap);
       auto scheduleToNextX = makeNextElementMap(
-          partialSchedule.get_space().range(), threadIdxxDepth);
+          partialSchedule.get_space().range(), threadIdxXDepth);
       auto scheduledAccess = isl::map(access).apply_domain(partialSchedule);
       auto accessedByAdjacentX = scheduleToNextX.apply_domain(scheduledAccess)
                                      .apply_range(scheduledAccess);
@@ -322,13 +322,13 @@ bool isCoalesced(
  * Check if the given "group" can be promoted to registers for the given active
  * domain points under full "schedule" where "nThreads" consecutive dimensions
  * are mapped to threads (the innermost of them being mapped to thread x) and
- * the depth of this mapping can be obtained from threadIdxxScheduleDepthState.
+ * the depth of this mapping can be obtained from threadIdxXScheduleDepthState.
  *
  * In parciular, the group's footprint must contain only one element and the
  * same tensor element should never be accessed by two different threads.
  */
 bool isPromotableToRegisterBelowThreads(
-    const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
+    const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
     const TensorReferenceGroup& group,
     isl::union_map schedule,
     size_t nThreads,
@@ -349,8 +349,8 @@ bool isPromotableToRegisterBelowThreads(
   // thread mapping, all refs in the group must all have the same thread-x
   // depth.
   auto depth = 1 +
-      computeThreadIdxxScheduleDepth(
-                   threadIdxxScheduleDepthState,
+      computeThreadIdxXScheduleDepth(
+                   threadIdxXScheduleDepthState,
                    originalAccesses.domain().intersect(activePoints));
 
   auto scheduledAccesses = originalAccesses.apply_domain(schedule);
@@ -431,7 +431,7 @@ std::vector<detail::ScheduleTree*> bandsSplitAfterDepth(
  */
 void promoteToSharedGreedy(
     Scop& scop,
-    const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
+    const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
     const Block& block,
     size_t depth,
     size_t maxMemory) {
@@ -524,7 +524,7 @@ void promoteToSharedGreedy(
         // coalesced way.
         if (!hasReuse(*group, fullSched, depth) &&
             isCoalesced(
-                threadIdxxScheduleDepthState,
+                threadIdxXScheduleDepthState,
                 *group,
                 fullSched,
                 activePoints)) {
@@ -548,14 +548,14 @@ void promoteToSharedGreedy(
 
 void promoteGreedilyAtDepth(
     MappedScop& mscop,
-    const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
+    const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
     size_t depth,
     size_t sharedMemorySize,
     bool unrollCopies) {
   // 1. Promote using heuristic.
   promoteToSharedGreedy(
       mscop.scop(),
-      threadIdxxScheduleDepthState,
+      threadIdxXScheduleDepthState,
       mscop.numThreads,
       depth,
       sharedMemorySize);
@@ -568,14 +568,14 @@ void promoteGreedilyAtDepth(
 // loop is mapped to thread x, promote below that depth.
 void promoteToRegistersBelowThreads(
     Scop& scop,
-    const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
+    const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
     size_t nRegisters) {
   using namespace tc::polyhedral::detail;
 
   auto root = scop.scheduleRoot();
 
   auto fullSched = fullSchedule(root);
-  for (const auto& kvp : threadIdxxScheduleDepthState) {
+  for (const auto& kvp : threadIdxXScheduleDepthState) {
     auto depth = kvp.second + 1;
     auto subdomain = kvp.first;
 
@@ -636,7 +636,7 @@ void promoteToRegistersBelowThreads(
             continue;
           }
           if (!isPromotableToRegisterBelowThreads(
-                  threadIdxxScheduleDepthState,
+                  threadIdxXScheduleDepthState,
                   *group,
                   fullSched,
                   nMappedThreads,
diff --git a/test/test_mapper_memory_promotion.cc b/test/test_mapper_memory_promotion.cc
@@ -397,7 +397,7 @@ def fun(float(N, M) A) -> (B, C) {
         tc, {{"N", problemSize1}, {"M", problemSize2}}, {tileSize1, tileSize2});
     promoteGreedilyAtDepth(
         *mscop,
-        mscop->threadIdxxScheduleDepthState,
+        mscop->threadIdxXScheduleDepthState,
         depth,
         maxSharedMemory,
         false);