Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit c60eff3

Browse files
Merge pull request #268 from facebookresearch/pr/rename
rename *Idxx* to *IdxX*
2 parents aa0ec13 + b6391cd commit c60eff3

File tree

5 files changed

+30
-30
lines changed

5 files changed

+30
-30
lines changed

include/tc/core/polyhedral/cuda/mapped_scop.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ class MappedScop {
179179
// XXX: this is a partially redundant state as this information can
180180
// potentially be extracted from the schedule tree; however, until we get a
181181
// first-class MappingNode, it requires some dirty hacks.
182-
ThreadIdxxScheduleDepthState threadIdxxScheduleDepthState;
182+
ThreadIdxXScheduleDepthState threadIdxXScheduleDepthState;
183183

184184
private:
185185
// Information about a detected reduction that can potentially

include/tc/core/polyhedral/cuda/memory_promotion_heuristic.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
namespace tc {
2424
namespace polyhedral {
25-
using ThreadIdxxScheduleDepthState =
25+
using ThreadIdxXScheduleDepthState =
2626
std::vector<std::pair<isl::union_set, size_t>>;
2727

2828
class MappedScop;
@@ -32,19 +32,19 @@ class Scop;
3232
// promote to shared memory at "depth" until "sharedMemorySize" is used.
3333
// Map copies between global and shared memory to threads and unroll those
3434
// copies if "unrollCopies" is set, using the options in "mscop".
35-
// "threadIdxxScheduleDepthState" contains the schedule depth at which the
35+
// "threadIdxXScheduleDepthState" contains the schedule depth at which the
3636
// computation was mapped to thread x and is used to check whether the global
3737
// memory is accessed in a coalesced way.
3838
void promoteGreedilyAtDepth(
3939
MappedScop& scop,
40-
const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
40+
const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
4141
std::size_t depth,
4242
std::size_t sharedMemorySize,
4343
bool unrollCopies);
4444

4545
void promoteToRegistersBelowThreads(
4646
Scop& scop,
47-
const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
47+
const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
4848
std::size_t nRegisters);
4949
} // namespace polyhedral
5050
} // namespace tc

src/core/polyhedral/cuda/mapped_scop.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ void MappedScop::mapRemaining(
113113

114114
for (size_t i = nMapped; i < nToMap; ++i) {
115115
if (MappingTypeId::makeId(i) == mapping::ThreadId::x()) {
116-
threadIdxxScheduleDepthState.emplace_back(std::make_pair(
116+
threadIdxXScheduleDepthState.emplace_back(std::make_pair(
117117
activeDomainPoints(schedule(), tree),
118118
tree->scheduleDepth(schedule())));
119119
}
@@ -179,7 +179,7 @@ void fixThreadsBelowFilter(
179179
// Mapping happend below filterTree, so we need points active for its
180180
// children. After insertion, filterTree is guaranteed to have at least
181181
// one child.
182-
mscop.threadIdxxScheduleDepthState.emplace_back(std::make_pair(
182+
mscop.threadIdxXScheduleDepthState.emplace_back(std::make_pair(
183183
activeDomainPoints(mscop.schedule(), filterTree->child({0})),
184184
filterTree->scheduleDepth(mscop.schedule())));
185185
}
@@ -339,7 +339,7 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band, size_t nInner) {
339339
return nInner;
340340
}
341341
CHECK(reductionBandUpdates_.at(band).separated);
342-
threadIdxxScheduleDepthState.emplace_back(std::make_pair(
342+
threadIdxXScheduleDepthState.emplace_back(std::make_pair(
343343
activeDomainPoints(schedule(), band),
344344
band->scheduleDepth(schedule()) + 0));
345345
band = map(band, 0, mapping::ThreadId::x());
@@ -380,7 +380,7 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band, size_t nInner) {
380380
++i, --dim) {
381381
auto id = mapping::ThreadId::makeId(nInner + i);
382382
if (id == mapping::ThreadId::x()) {
383-
threadIdxxScheduleDepthState.emplace_back(std::make_pair(
383+
threadIdxXScheduleDepthState.emplace_back(std::make_pair(
384384
activeDomainPoints(schedule(), band),
385385
band->scheduleDepth(schedule()) + dim));
386386
}
@@ -677,7 +677,7 @@ std::unique_ptr<MappedScop> MappedScop::makeWithOuterBlockInnerThreadStrategy(
677677

678678
promoteGreedilyAtDepth(
679679
*mappedScop,
680-
mappedScop->threadIdxxScheduleDepthState,
680+
mappedScop->threadIdxXScheduleDepthState,
681681
std::min(band->nOuterCoincident(), mappedScop->numBlocks.view.size()),
682682
sharedMemorySize,
683683
cudaOptions.proto().unroll_copy_shared() &&
@@ -695,7 +695,7 @@ std::unique_ptr<MappedScop> MappedScop::makeWithOuterBlockInnerThreadStrategy(
695695
// 8. Promote to registers below the loops mapped to threads.
696696
if (cudaOptions.proto().use_private_memory()) {
697697
promoteToRegistersBelowThreads(
698-
mappedScop->scop(), mappedScop->threadIdxxScheduleDepthState, -1ull);
698+
mappedScop->scop(), mappedScop->threadIdxXScheduleDepthState, -1ull);
699699
}
700700

701701
// 9. Insert mapping context

src/core/polyhedral/cuda/memory_promotion_heuristic.cc

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -253,11 +253,11 @@ isl::map makeNextElementMap(isl::space setSpace, int dim) {
253253
// Obtain the depth of the schedule dimension that was mapped to threadIdx.x
254254
// for the domain elements identified by "s". Assumes the depth is the same
255255
// for all these elements.
256-
size_t computeThreadIdxxScheduleDepth(
257-
const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
256+
size_t computeThreadIdxXScheduleDepth(
257+
const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
258258
isl::union_set s) {
259259
std::unordered_set<size_t> depths;
260-
for (auto p : threadIdxxScheduleDepthState) {
260+
for (auto p : threadIdxXScheduleDepthState) {
261261
if (!p.first.intersect(s).is_empty()) {
262262
depths.insert(p.second);
263263
}
@@ -284,7 +284,7 @@ size_t computeThreadIdxxScheduleDepth(
284284
* a coalesced way.
285285
*/
286286
bool isCoalesced(
287-
const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
287+
const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
288288
const TensorReferenceGroup& group,
289289
isl::union_map schedule,
290290
isl::union_set activePoints) {
@@ -296,16 +296,16 @@ bool isCoalesced(
296296
auto elementToNext = makeNextElementMap(
297297
tensorSpace, tensorSpace.dim(isl::dim_type::set) - 1);
298298
auto domainUMap = isl::union_set(isl::set(access.domain()));
299-
int threadIdxxDepth = computeThreadIdxxScheduleDepth(
300-
threadIdxxScheduleDepthState, domainUMap.intersect(activePoints));
299+
int threadIdxXDepth = computeThreadIdxXScheduleDepth(
300+
threadIdxXScheduleDepthState, domainUMap.intersect(activePoints));
301301
auto partialScheduleUMap =
302302
schedule.intersect_domain(domainUMap.universe());
303303
if (partialScheduleUMap.n_map() != 1) {
304304
throw promotion::PromotionLogicError("expected single schedule space");
305305
}
306306
auto partialSchedule = isl::map::from_union_map(partialScheduleUMap);
307307
auto scheduleToNextX = makeNextElementMap(
308-
partialSchedule.get_space().range(), threadIdxxDepth);
308+
partialSchedule.get_space().range(), threadIdxXDepth);
309309
auto scheduledAccess = isl::map(access).apply_domain(partialSchedule);
310310
auto accessedByAdjacentX = scheduleToNextX.apply_domain(scheduledAccess)
311311
.apply_range(scheduledAccess);
@@ -322,13 +322,13 @@ bool isCoalesced(
322322
* Check if the given "group" can be promoted to registers for the given active
323323
* domain points under full "schedule" where "nThreads" consecutive dimensions
324324
* are mapped to threads (the innermost of them being mapped to thread x) and
325-
* the depth of this mapping can be obtained from threadIdxxScheduleDepthState.
325+
* the depth of this mapping can be obtained from threadIdxXScheduleDepthState.
326326
*
327327
* In parciular, the group's footprint must contain only one element and the
328328
* same tensor element should never be accessed by two different threads.
329329
*/
330330
bool isPromotableToRegisterBelowThreads(
331-
const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
331+
const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
332332
const TensorReferenceGroup& group,
333333
isl::union_map schedule,
334334
size_t nThreads,
@@ -349,8 +349,8 @@ bool isPromotableToRegisterBelowThreads(
349349
// thread mapping, all refs in the group must all have the same thread-x
350350
// depth.
351351
auto depth = 1 +
352-
computeThreadIdxxScheduleDepth(
353-
threadIdxxScheduleDepthState,
352+
computeThreadIdxXScheduleDepth(
353+
threadIdxXScheduleDepthState,
354354
originalAccesses.domain().intersect(activePoints));
355355

356356
auto scheduledAccesses = originalAccesses.apply_domain(schedule);
@@ -431,7 +431,7 @@ std::vector<detail::ScheduleTree*> bandsSplitAfterDepth(
431431
*/
432432
void promoteToSharedGreedy(
433433
Scop& scop,
434-
const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
434+
const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
435435
const Block& block,
436436
size_t depth,
437437
size_t maxMemory) {
@@ -524,7 +524,7 @@ void promoteToSharedGreedy(
524524
// coalesced way.
525525
if (!hasReuse(*group, fullSched, depth) &&
526526
isCoalesced(
527-
threadIdxxScheduleDepthState,
527+
threadIdxXScheduleDepthState,
528528
*group,
529529
fullSched,
530530
activePoints)) {
@@ -548,14 +548,14 @@ void promoteToSharedGreedy(
548548

549549
void promoteGreedilyAtDepth(
550550
MappedScop& mscop,
551-
const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
551+
const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
552552
size_t depth,
553553
size_t sharedMemorySize,
554554
bool unrollCopies) {
555555
// 1. Promote using heuristic.
556556
promoteToSharedGreedy(
557557
mscop.scop(),
558-
threadIdxxScheduleDepthState,
558+
threadIdxXScheduleDepthState,
559559
mscop.numThreads,
560560
depth,
561561
sharedMemorySize);
@@ -568,14 +568,14 @@ void promoteGreedilyAtDepth(
568568
// loop is mapped to thread x, promote below that depth.
569569
void promoteToRegistersBelowThreads(
570570
Scop& scop,
571-
const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState,
571+
const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
572572
size_t nRegisters) {
573573
using namespace tc::polyhedral::detail;
574574

575575
auto root = scop.scheduleRoot();
576576

577577
auto fullSched = fullSchedule(root);
578-
for (const auto& kvp : threadIdxxScheduleDepthState) {
578+
for (const auto& kvp : threadIdxXScheduleDepthState) {
579579
auto depth = kvp.second + 1;
580580
auto subdomain = kvp.first;
581581

@@ -636,7 +636,7 @@ void promoteToRegistersBelowThreads(
636636
continue;
637637
}
638638
if (!isPromotableToRegisterBelowThreads(
639-
threadIdxxScheduleDepthState,
639+
threadIdxXScheduleDepthState,
640640
*group,
641641
fullSched,
642642
nMappedThreads,

test/test_mapper_memory_promotion.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ def fun(float(N, M) A) -> (B, C) {
397397
tc, {{"N", problemSize1}, {"M", problemSize2}}, {tileSize1, tileSize2});
398398
promoteGreedilyAtDepth(
399399
*mscop,
400-
mscop->threadIdxxScheduleDepthState,
400+
mscop->threadIdxXScheduleDepthState,
401401
depth,
402402
maxSharedMemory,
403403
false);

0 commit comments

Comments
 (0)