Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 343a2d5

Browse files
author
Sven Verdoolaege
committed
introduce single entry point for mapping to threads
This is a step towards creating a single mapping filter node for an entire thread mapping within a branch, rather than having separate nodes for each mapped thread identifier. In particular, this commit consolidates the introduction of thread mapping nodes to a single function.
1 parent 308281f commit 343a2d5

File tree

4 files changed

+31
-48
lines changed

4 files changed

+31
-48
lines changed

tc/core/polyhedral/cuda/mapped_scop.cc

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,7 @@ void fixThreadsBelow(
175175

176176
auto band = detail::ScheduleTree::makeEmptyBand(mscop.scop().scheduleRoot());
177177
auto bandTree = insertNodeBelow(tree, std::move(band));
178-
auto ctx = tree->ctx_;
179-
insertNodeBelow(
180-
bandTree, detail::ScheduleTree::makeThreadSpecificMarker(ctx));
181-
mscop.mapRemaining<mapping::ThreadId>(bandTree, begin);
178+
mscop.mapThreadsBackward(bandTree);
182179
}
183180

184181
bool MappedScop::detectReductions(detail::ScheduleTree* tree) {
@@ -314,6 +311,27 @@ detail::ScheduleTree* MappedScop::separateReduction(detail::ScheduleTree* st) {
314311
return st->ancestor(root, 2);
315312
}
316313

314+
detail::ScheduleTree* MappedScop::mapThreadsBackward(
315+
detail::ScheduleTree* band) {
316+
auto bandNode = band->elemAs<detail::ScheduleTreeElemBand>();
317+
CHECK(bandNode);
318+
auto nMember = bandNode->nMember();
319+
auto nToMap = std::min(nMember, numThreads.view.size());
320+
CHECK_LE(nToMap, 3) << "mapping to too many threads";
321+
322+
auto ctx = band->ctx_;
323+
insertNodeBelow(band, detail::ScheduleTree::makeThreadSpecificMarker(ctx));
324+
325+
auto root = scop_->scheduleRoot();
326+
for (size_t i = 0; i < nToMap; ++i) {
327+
auto id = mapping::ThreadId::makeId(i);
328+
auto pos = nMember - 1 - i;
329+
band = mapToParameterWithExtent(root, band, pos, id, numThreads.view[i]);
330+
}
331+
mapRemaining<mapping::ThreadId>(band, nToMap);
332+
return band;
333+
}
334+
317335
size_t MappedScop::mapToThreads(detail::ScheduleTree* band) {
318336
using namespace tc::polyhedral::detail;
319337

@@ -364,20 +382,9 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band) {
364382
bandSplit(scop_->scheduleRoot(), band, nMappedThreads);
365383
}
366384

367-
auto ctx = band->ctx_;
368-
insertNodeBelow(band, detail::ScheduleTree::makeThreadSpecificMarker(ctx));
369-
370385
CHECK_GT(nMappedThreads, 0) << "not mapping to threads";
371-
CHECK_LE(nMappedThreads, 3) << "mapping to too many threads";
372386

373-
// Map the coincident dimensions to threads starting from the innermost and
374-
// from thread x.
375-
for (size_t i = 0; i < nMappedThreads; ++i) {
376-
auto id = mapping::ThreadId::makeId(i);
377-
auto dim = nMappedThreads - 1 - i;
378-
band = map(band, dim, id);
379-
}
380-
mapRemaining<mapping::ThreadId>(band, nMappedThreads);
387+
mapThreadsBackward(band);
381388

382389
if (isReduction) {
383390
splitOutReductionAndInsertSyncs(band, nMappedThreads - 1);

tc/core/polyhedral/cuda/mapped_scop.h

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,11 @@ class MappedScop {
9393
detail::ScheduleTree* mapBlocksForward(
9494
detail::ScheduleTree* band,
9595
size_t nToMap);
96-
// Map a particular "pos"-th dimension in a _band_ node identified by "tree"
97-
// to the thread dimension. Ancestors or descendants of "tree" must
98-
// not have a dimension already mapped to the same thread.
99-
inline detail::ScheduleTree*
100-
map(detail::ScheduleTree* tree, int pos, const mapping::ThreadId& id) {
101-
return mapToParameterWithExtent(
102-
scop_->scheduleRoot(), tree, pos, id, id.mappingSize(numThreads));
103-
}
96+
// Map the final band members of "band"
97+
// to successive thread identifiers, with the last member mapped
98+
// to thread identifier X.
99+
// This function can only be called once in any branch of the tree.
100+
detail::ScheduleTree* mapThreadsBackward(detail::ScheduleTree* band);
104101

105102
// Given that "nMapped" identifiers of type "MappingTypeId" have already
106103
// been mapped, map the remaining ones to zero

tc/core/polyhedral/cuda/memory_promotion_heuristic.cc

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,6 @@ void mapCopiesToThreads(MappedScop& mscop, bool unroll) {
8383
throw promotion::PromotionLogicError("no copy band");
8484
}
8585

86-
auto ctx = node->ctx_;
87-
insertNodeBelow(
88-
bandNode, detail::ScheduleTree::makeThreadSpecificMarker(ctx));
89-
9086
// Check that we are not mapping to threads below other thread mappings.
9187
std::unordered_set<mapping::ThreadId, mapping::ThreadId::Hash> usedThreads;
9288
for (auto n : node->ancestors(root)) {
@@ -97,20 +93,7 @@ void mapCopiesToThreads(MappedScop& mscop, bool unroll) {
9793
}
9894
}
9995

100-
// Map band dimensions to threads, in inverse order since the last member
101-
// iterates over the last subscript and is likely to result in coalescing.
102-
// If not all available thread ids are used, fix remaining to 1 thread.
103-
auto nToMap = std::min(band->nMember(), mscop.numThreads.view.size());
104-
for (size_t t = 0; t < nToMap; ++t) {
105-
auto pos = band->nMember() - 1 - t;
106-
mapToParameterWithExtent(
107-
root,
108-
bandNode,
109-
pos,
110-
mapping::ThreadId::makeId(t),
111-
mscop.numThreads.view[t]);
112-
}
113-
mscop.mapRemaining<mapping::ThreadId>(bandNode, nToMap);
96+
mscop.mapThreadsBackward(bandNode);
11497

11598
// Unroll if requested.
11699
if (unroll) {

test/test_cuda_mapper.cc

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,9 @@ struct PolyhedralMapperTest : public ::testing::Test {
8686
auto band = mscop->mapBlocksForward(root->child({0}), 1);
8787
bandScale(band, tileSizes);
8888

89-
USING_MAPPING_SHORT_NAMES(BX, BY, BZ, TX, TY, TZ);
9089
auto ns = detail::ScheduleTree::collectDFSPostorder(
9190
root, detail::ScheduleTreeType::Band);
92-
mscop->map(ns[1], 1, TX);
93-
mscop->map(ns[1], 0, TY);
91+
mscop->mapThreadsBackward(ns[1]);
9492
mscop->insertMappingContext();
9593
return mscop;
9694
}
@@ -113,9 +111,7 @@ struct PolyhedralMapperTest : public ::testing::Test {
113111
auto band = mscop->mapBlocksForward(root->child({0}), 2);
114112
bandScale(band, tileSizes);
115113

116-
USING_MAPPING_SHORT_NAMES(BX, BY, BZ, TX, TY, TZ);
117-
band = mscop->map(band->child({0}), 1, TX);
118-
band = mscop->map(band, 0, TY);
114+
band = mscop->mapThreadsBackward(band->child({0}));
119115
mscop->insertMappingContext();
120116
return mscop;
121117
}

0 commit comments

Comments
 (0)