Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 74feae5

Browse files
author
Sven Verdoolaege
committed
isPromotableToRegisterBelowThreads: pass in depth from caller
The caller knows the depth, so there is no point in recomputing it.
1 parent 45e32d7 commit 74feae5

File tree

1 file changed

+5
-18
lines changed

1 file changed

+5
-18
lines changed

tc/core/polyhedral/cuda/memory_promotion_heuristic.cc

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -316,18 +316,17 @@ bool isCoalesced(
316316
/*
317317
* Check if the given "group" can be promoted to registers for the given active
318318
* domain points under full "schedule" where "nThreads" consecutive dimensions
319-
* are mapped to threads (the innermost of them being mapped to thread x) and
320-
* the depth of this mapping can be obtained from threadIdxXScheduleDepthState.
319+
* at "depth"
320+
* are mapped to threads (the innermost of them being mapped to thread x).
321321
*
322322
* In particular, the group's footprint must contain only one element and the
323323
* same tensor element should never be accessed by two different threads.
324324
*/
325325
bool isPromotableToRegisterBelowThreads(
326-
const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState,
327326
const TensorReferenceGroup& group,
328327
isl::union_map schedule,
329-
size_t nThreads,
330-
isl::union_set activePoints) {
328+
size_t depth,
329+
size_t nThreads) {
331330
auto originalAccesses = group.originalAccesses();
332331

333332
// Return early if more than one element needs to be stored in registers.
@@ -340,14 +339,6 @@ bool isPromotableToRegisterBelowThreads(
340339
return false;
341340
}
342341

343-
// Since this function is only supposed to be called on groups seen _below_
344-
// thread mapping, all refs in the group must all have the same thread-x
345-
// depth.
346-
auto depth = 1 +
347-
computeThreadIdxXScheduleDepth(
348-
threadIdxXScheduleDepthState,
349-
originalAccesses.domain().intersect(activePoints));
350-
351342
auto scheduledAccesses = originalAccesses.apply_domain(schedule);
352343

353344
// Scheduled accesses contain maps from schedule dimensions to tensor
@@ -635,11 +626,7 @@ void promoteToRegistersBelowThreads(
635626
continue;
636627
}
637628
if (!isPromotableToRegisterBelowThreads(
638-
threadIdxXScheduleDepthState,
639-
*group,
640-
fullSched,
641-
nMappedThreads,
642-
points)) {
629+
*group, fullSched, depth, nMappedThreads)) {
643630
continue;
644631
}
645632
if (!hasReuseWithin(*group, partialSchedMupa)) {

0 commit comments

Comments
 (0)