Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit e1db3a9

Browse files
author
Sven Verdoolaege
committed
insert thread specific markers
These markers are placed right underneath the innermost band member mapped to a thread identifier and allow the memory promotion to recover these positions. They form an alternative to ThreadIdxXScheduleDepthState, which will gradually be removed. ThreadIdxXScheduleDepthState maintains duplicate state and does so in an inconsistent way. In particular, when at least one member is mapped to a thread identifier, it holds the depth of the member mapped to the x thread identifier (which is one less that the schedule depth of the newly introduced marker), but if no member is mapped, then it points to the depth of where the marker is introduced. For consistency, a marker is also introduced in mapCopiesToThreads, even though ThreadIdxXScheduleDepthState is not being set there.
1 parent 6644cb2 commit e1db3a9

9 files changed

+66
-0
lines changed

tc/core/polyhedral/cuda/mapped_scop.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ void MappedScop::mapToBlocksAndScaleBand(
161161
* Given a node in the schedule tree of a mapped scop,
162162
* insert a mapping filter underneath (if needed) that fixes
163163
* the remaining thread identifiers starting at "begin" to zero.
164+
* Add a marker underneath that marks the subtree that is thread specific.
164165
*/
165166
void fixThreadsBelow(
166167
MappedScop& mscop,
@@ -173,6 +174,9 @@ void fixThreadsBelow(
173174

174175
auto band = detail::ScheduleTree::makeEmptyBand(mscop.scop().scheduleRoot());
175176
auto bandTree = insertNodeBelow(tree, std::move(band));
177+
auto ctx = tree->ctx_;
178+
insertNodeBelow(
179+
bandTree, detail::ScheduleTree::makeThreadSpecificMarker(ctx));
176180
mscop.mapRemaining<mapping::ThreadId>(bandTree, begin);
177181
}
178182

@@ -345,6 +349,9 @@ size_t MappedScop::mapToThreads(detail::ScheduleTree* band) {
345349
bandSplit(scop_->scheduleRoot(), band, nCanMap);
346350
}
347351

352+
auto ctx = band->ctx_;
353+
insertNodeBelow(band, detail::ScheduleTree::makeThreadSpecificMarker(ctx));
354+
348355
CHECK_GT(nMappedThreads, 0) << "not mapping to threads";
349356
CHECK_LE(nMappedThreads, 3) << "mapping to too many threads";
350357

tc/core/polyhedral/cuda/mapped_scop.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ class MappedScop {
162162
// coincident dimensions (plus reduction dimension, if any),
163163
// insert synchronization in case of a reduction, and
164164
// return the number of mapped thread identifiers.
165+
// A marker is added to mark the part of the tree that is thread specific
166+
// (right underneath the innermost band member mapped to a thread identifier).
165167
size_t mapToThreads(detail::ScheduleTree* band);
166168
// Map innermost bands to thread identifiers,
167169
// inserting synchronization in case of a reduction, and

tc/core/polyhedral/cuda/memory_promotion_heuristic.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ void mapCopiesToThreads(MappedScop& mscop, bool unroll) {
6565
throw promotion::PromotionLogicError("no copy band");
6666
}
6767

68+
auto ctx = node->ctx_;
69+
insertNodeBelow(
70+
bandNode, detail::ScheduleTree::makeThreadSpecificMarker(ctx));
71+
6872
// Check that we are not mapping to threads below other thread mappings.
6973
std::unordered_set<mapping::ThreadId, mapping::ThreadId::Hash> usedThreads;
7074
for (auto n : node->ancestors(root)) {

tc/core/polyhedral/schedule_isl_conversion.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ isl::schedule_node insert(isl::schedule_node node, const ScheduleTree* st) {
203203
return insertBranch(node, st);
204204
} else if (st->elemAs<ScheduleTreeElemExtension>()) {
205205
return insertExtension(node, st);
206+
} else if (st->elemAs<ScheduleTreeElemThreadSpecificMarker>()) {
207+
return insertChild(node, st);
206208
} else {
207209
LOG(FATAL) << "NYI: insert type: " << *st;
208210
}
@@ -329,6 +331,7 @@ isl::space definitionParamSpace(const ScheduleTree* node) {
329331
case detail::ScheduleTreeType::None:
330332
case detail::ScheduleTreeType::Set:
331333
case detail::ScheduleTreeType::Sequence:
334+
case detail::ScheduleTreeType::ThreadSpecificMarker:
332335
break;
333336
}
334337
return space;

tc/core/polyhedral/schedule_print.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ std::ostream& operator<<(std::ostream& os, detail::ScheduleTreeType nt) {
127127
os << "sequence";
128128
} else if (nt == detail::ScheduleTreeType::Set) {
129129
os << "seq";
130+
} else if (nt == detail::ScheduleTreeType::ThreadSpecificMarker) {
131+
os << "thread_specific";
130132
} else {
131133
LOG(FATAL) << "NYI: print type: " << static_cast<int>(nt);
132134
}
@@ -224,6 +226,13 @@ std::ostream& ScheduleTreeElemSet::write(std::ostream& os) const {
224226
return os;
225227
}
226228

229+
std::ostream& ScheduleTreeElemThreadSpecificMarker::write(
230+
std::ostream& os) const {
231+
WS w;
232+
os << w.tab() << "thread_specific()";
233+
return os;
234+
}
235+
227236
std::ostream& operator<<(
228237
std::ostream& os,
229238
const std::vector<ScheduleTreeUPtr>& vst) {

tc/core/polyhedral/schedule_tree.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,17 @@ std::unique_ptr<ScheduleTree> ScheduleTree::makeExtension(
264264
return res;
265265
}
266266

267+
std::unique_ptr<ScheduleTree> ScheduleTree::makeThreadSpecificMarker(
268+
isl::ctx ctx,
269+
std::vector<ScheduleTreeUPtr>&& children) {
270+
ScheduleTreeUPtr res(new ScheduleTree(ctx));
271+
res->elem_ = std::unique_ptr<ScheduleTreeElemThreadSpecificMarker>(
272+
new ScheduleTreeElemThreadSpecificMarker());
273+
res->type_ = detail::ScheduleTreeType::ThreadSpecificMarker;
274+
res->appendChildren(std::move(children));
275+
return res;
276+
}
277+
267278
////////////////////////////////////////////////////////////////////////////////
268279
// Collector member functions
269280
////////////////////////////////////////////////////////////////////////////////

tc/core/polyhedral/schedule_tree.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,10 @@ struct ScheduleTree {
311311
isl::union_map extension,
312312
std::vector<ScheduleTreeUPtr>&& children = {});
313313

314+
static ScheduleTreeUPtr makeThreadSpecificMarker(
315+
isl::ctx ctx,
316+
std::vector<ScheduleTreeUPtr>&& children = {});
317+
314318
template <typename... Args>
315319
static ScheduleTreeUPtr makeBand(
316320
isl::multi_union_pw_aff mupa,

tc/core/polyhedral/schedule_tree_elem.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ std::unique_ptr<ScheduleTreeElemBase> ScheduleTreeElemBase::make(
113113
ELEM_MAKE_CASE(ScheduleTreeElemMappingFilter)
114114
ELEM_MAKE_CASE(ScheduleTreeElemSequence)
115115
ELEM_MAKE_CASE(ScheduleTreeElemSet)
116+
ELEM_MAKE_CASE(ScheduleTreeElemThreadSpecificMarker)
116117

117118
#undef ELEM_MAKE_CASE
118119

tc/core/polyhedral/schedule_tree_elem.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ enum class ScheduleTreeType {
3838
Sequence,
3939
Set,
4040
MappingFilter,
41+
ThreadSpecificMarker,
4142
Any,
4243
};
4344

@@ -279,6 +280,30 @@ struct ScheduleTreeElemBand : public ScheduleTreeElemBase {
279280
std::vector<bool> unroll_;
280281
};
281282

283+
/*
284+
* A node of type ThreadSpecificMarker marks part of a schedule tree
285+
* that is specific to a thread. That is, the marker appears right
286+
* underneath the innermost band member mapped to threads.
287+
*/
288+
struct ScheduleTreeElemThreadSpecificMarker : public ScheduleTreeElemBase {
289+
static constexpr std::initializer_list<detail::ScheduleTreeType>
290+
NodeDerivedTypes{detail::ScheduleTreeType::None};
291+
static constexpr detail::ScheduleTreeType NodeType =
292+
detail::ScheduleTreeType::ThreadSpecificMarker;
293+
explicit ScheduleTreeElemThreadSpecificMarker() {}
294+
virtual ~ScheduleTreeElemThreadSpecificMarker() override {}
295+
bool operator==(const ScheduleTreeElemThreadSpecificMarker& other) const {
296+
return true;
297+
}
298+
bool operator!=(const ScheduleTreeElemThreadSpecificMarker& other) const {
299+
return !(*this == other);
300+
}
301+
virtual std::ostream& write(std::ostream& os) const override;
302+
virtual detail::ScheduleTreeType type() const override {
303+
return NodeType;
304+
}
305+
};
306+
282307
bool elemEquals(
283308
const ScheduleTreeElemBase* e1,
284309
const ScheduleTreeElemBase* e2,

0 commit comments

Comments
 (0)