Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit a7ae6b8

Browse files
author
Sven Verdoolaege
committed
MappedScop::mapToThreads: treat coincident and reduction dimensions the same
Now that an entire band is mapped as a whole, rather than having the reduction dimensions and coincident dimensions mapped by separate calls to MappedScop::mapToThreads, there is no longer any need to treat them separately. The reduction dimension still needs to be mapped to X thread identifier, but since it appears in the innermost position of the dimensions mapped to thread identifiers, it will get mapped to the X thread identifier automatically. This significantly simplifies the code and is a useful step in moving towards generating a single mapping node for the entire thread mapping.
1 parent 74feae5 commit a7ae6b8

File tree

1 file changed

+21
-30
lines changed

1 file changed

+21
-30
lines changed

tc/core/polyhedral/cuda/mapped_scop.cc

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -332,45 +332,36 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band) {
332332
return 0;
333333
}
334334

335-
size_t nMappedReductionThreads = 0;
336-
if (reductionBandUpdates_.count(band) == 1) {
337-
// A reduction is assumed to get mapped to threadIdx.x
338-
CHECK(reductionBandUpdates_.at(band).separated);
339-
auto reductionDim = reductionBandUpdates_.at(band).reductionDim;
340-
threadIdxXScheduleDepthState.emplace_back(std::make_pair(
341-
activeDomainPoints(schedule(), band),
342-
band->scheduleDepth(schedule()) + reductionDim));
343-
band = map(band, reductionDim, mapping::ThreadId::x());
344-
nMappedReductionThreads = 1;
345-
}
346-
347335
// With current isl scheduler, if coincident dimensions exist in a band,
348336
// they are outermost.
349337
// If a band has more than 3 coincident dimensions,
350338
// then the innermost of those will be used.
351-
auto nOuterCoincident = bandNode->nOuterCoincident();
352-
if (nOuterCoincident < 1) {
353-
return nMappedReductionThreads;
354-
}
339+
auto nCanMap = bandNode->nOuterCoincident();
355340

356-
auto nMappedThreads = std::min(
357-
numThreads.view.size() - nMappedReductionThreads,
358-
static_cast<size_t>(nOuterCoincident));
341+
// If the band has a detected reduction, then the first member
342+
// after the coincident members is the reduction member and
343+
// this member has to be mapped as well.
344+
// In particular, it will get mapped to threadIdx.x
345+
if (reductionBandUpdates_.count(band) == 1) {
346+
CHECK(reductionBandUpdates_.at(band).separated);
347+
nCanMap++;
348+
}
359349

360-
// Immediately return if mapping to one thread dimension only was requested
361-
// and a reduction was already mapped. (Note that reduction is detected only
362-
// if there are not enough outer coincident members, 0 in this case).
363-
if (nMappedThreads == 0) {
364-
return nMappedReductionThreads;
350+
if (nCanMap < 1) {
351+
return 0;
365352
}
366-
CHECK_LE(nMappedThreads, 3 - nMappedReductionThreads)
367-
<< "mapping to too many threads";
353+
354+
auto nMappedThreads =
355+
std::min(numThreads.view.size(), static_cast<size_t>(nCanMap));
356+
357+
CHECK_GT(nMappedThreads, 0) << "not mapping to threads";
358+
CHECK_LE(nMappedThreads, 3) << "mapping to too many threads";
368359

369360
// Map the coincident dimensions to threads starting from the innermost and
370-
// from thread x unless it was already mapped to a reduction.
361+
// from thread x.
371362
for (size_t i = 0; i < nMappedThreads; ++i) {
372-
auto id = mapping::ThreadId::makeId(nMappedReductionThreads + i);
373-
auto dim = nOuterCoincident - 1 - i;
363+
auto id = mapping::ThreadId::makeId(i);
364+
auto dim = nCanMap - 1 - i;
374365
if (id == mapping::ThreadId::x()) {
375366
threadIdxXScheduleDepthState.emplace_back(std::make_pair(
376367
activeDomainPoints(schedule(), band),
@@ -379,7 +370,7 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band) {
379370
band = map(band, dim, id);
380371
}
381372

382-
return nMappedReductionThreads + nMappedThreads;
373+
return nMappedThreads;
383374
}
384375

385376
namespace {

0 commit comments

Comments
 (0)