Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit b6ac9cc

Browse files
author
Sven Verdoolaege
committed
insert mapping filters right above band members mapped to threads
This allows the band members mapped to threads to be derived from the position of the mapping filters (and the marker underneath).
1 parent e11308d commit b6ac9cc

File tree

2 files changed

+22
-8
lines changed

2 files changed

+22
-8
lines changed

tc/core/polyhedral/cuda/mapped_scop.cc

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -342,11 +342,25 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band) {
342342
return 0;
343343
}
344344

345-
auto nMappedThreads =
346-
std::min(numThreads.view.size(), static_cast<size_t>(nCanMap));
345+
auto nMappedThreads = nCanMap;
346+
if (nMappedThreads > numThreads.view.size()) {
347+
// Split band such that mapping filters get inserted
348+
// right above the first member mapped to a thread identifier.
349+
nMappedThreads = numThreads.view.size();
350+
bandSplit(scop_->scheduleRoot(), band, nCanMap - nMappedThreads);
351+
auto child = band->child({0});
352+
if (isReduction) {
353+
// Update reductionBandUpdates_ such that splitOutReductionAndInsertSyncs
354+
// can find the information it needs.
355+
reductionBandUpdates_.emplace(child, reductionBandUpdates_.at(band));
356+
reductionBandUpdates_.erase(band);
357+
}
358+
band = child;
359+
bandNode = band->elemAs<ScheduleTreeElemBand>();
360+
}
347361

348-
if (nCanMap < bandNode->nMember()) {
349-
bandSplit(scop_->scheduleRoot(), band, nCanMap);
362+
if (nMappedThreads < bandNode->nMember()) {
363+
bandSplit(scop_->scheduleRoot(), band, nMappedThreads);
350364
}
351365

352366
auto ctx = band->ctx_;
@@ -359,7 +373,7 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band) {
359373
// from thread x.
360374
for (size_t i = 0; i < nMappedThreads; ++i) {
361375
auto id = mapping::ThreadId::makeId(i);
362-
auto dim = nCanMap - 1 - i;
376+
auto dim = nMappedThreads - 1 - i;
363377
if (id == mapping::ThreadId::x()) {
364378
threadIdxXScheduleDepthState.emplace_back(std::make_pair(
365379
activeDomainPoints(schedule(), band),
@@ -369,7 +383,7 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band) {
369383
}
370384

371385
if (isReduction) {
372-
splitOutReductionAndInsertSyncs(band, nCanMap - 1);
386+
splitOutReductionAndInsertSyncs(band, nMappedThreads - 1);
373387
}
374388

375389
return nMappedThreads;

test/test_cuda_mapper_memory_promotion.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,8 @@ TEST_F(Sum4D, CodeInnerBand) {
204204
"C[16 * b0 + c4][16 * b1 + c5][c2 + c6][t0 + c3] = _C_0[0][0][0][0];";
205205
auto sync = "__syncthreads()";
206206

207-
auto code =
208-
emitCode({256, 128, 192, 224}, {16, 16, 16, 16}, {0, 0, 0, 0, 0, 0, 0});
207+
auto code = emitCode(
208+
{256, 128, 192, 224}, {16, 16, 16, 16}, {0, 0, 0, 0, 0, 0, 0, 0});
209209
// Order of copies may be arbitrary, but syncs must be inserted before and
210210
// after
211211
for (auto d : declarations) {

0 commit comments

Comments
 (0)