@@ -253,11 +253,11 @@ isl::map makeNextElementMap(isl::space setSpace, int dim) {
253
253
// Obtain the depth of the schedule dimension that was mapped to threadIdx.x
254
254
// for the domain elements identified by "s". Assumes the depth is the same
255
255
// for all these elements.
256
- size_t computeThreadIdxxScheduleDepth (
257
- const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState ,
256
+ size_t computeThreadIdxXScheduleDepth (
257
+ const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState ,
258
258
isl::union_set s) {
259
259
std::unordered_set<size_t > depths;
260
- for (auto p : threadIdxxScheduleDepthState ) {
260
+ for (auto p : threadIdxXScheduleDepthState ) {
261
261
if (!p.first .intersect (s).is_empty ()) {
262
262
depths.insert (p.second );
263
263
}
@@ -284,7 +284,7 @@ size_t computeThreadIdxxScheduleDepth(
284
284
* a coalesced way.
285
285
*/
286
286
bool isCoalesced (
287
- const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState ,
287
+ const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState ,
288
288
const TensorReferenceGroup& group,
289
289
isl::union_map schedule,
290
290
isl::union_set activePoints) {
@@ -296,16 +296,16 @@ bool isCoalesced(
296
296
auto elementToNext = makeNextElementMap (
297
297
tensorSpace, tensorSpace.dim (isl::dim_type::set) - 1 );
298
298
auto domainUMap = isl::union_set (isl::set (access.domain ()));
299
- int threadIdxxDepth = computeThreadIdxxScheduleDepth (
300
- threadIdxxScheduleDepthState , domainUMap.intersect (activePoints));
299
+ int threadIdxXDepth = computeThreadIdxXScheduleDepth (
300
+ threadIdxXScheduleDepthState , domainUMap.intersect (activePoints));
301
301
auto partialScheduleUMap =
302
302
schedule.intersect_domain (domainUMap.universe ());
303
303
if (partialScheduleUMap.n_map () != 1 ) {
304
304
throw promotion::PromotionLogicError (" expected single schedule space" );
305
305
}
306
306
auto partialSchedule = isl::map::from_union_map (partialScheduleUMap);
307
307
auto scheduleToNextX = makeNextElementMap (
308
- partialSchedule.get_space ().range (), threadIdxxDepth );
308
+ partialSchedule.get_space ().range (), threadIdxXDepth );
309
309
auto scheduledAccess = isl::map (access).apply_domain (partialSchedule);
310
310
auto accessedByAdjacentX = scheduleToNextX.apply_domain (scheduledAccess)
311
311
.apply_range (scheduledAccess);
@@ -322,13 +322,13 @@ bool isCoalesced(
322
322
* Check if the given "group" can be promoted to registers for the given active
323
323
* domain points under full "schedule" where "nThreads" consecutive dimensions
324
324
* are mapped to threads (the innermost of them being mapped to thread x) and
325
- * the depth of this mapping can be obtained from threadIdxxScheduleDepthState .
325
+ * the depth of this mapping can be obtained from threadIdxXScheduleDepthState .
326
326
*
327
327
* In parciular, the group's footprint must contain only one element and the
328
328
* same tensor element should never be accessed by two different threads.
329
329
*/
330
330
bool isPromotableToRegisterBelowThreads (
331
- const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState ,
331
+ const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState ,
332
332
const TensorReferenceGroup& group,
333
333
isl::union_map schedule,
334
334
size_t nThreads,
@@ -349,8 +349,8 @@ bool isPromotableToRegisterBelowThreads(
349
349
// thread mapping, all refs in the group must all have the same thread-x
350
350
// depth.
351
351
auto depth = 1 +
352
- computeThreadIdxxScheduleDepth (
353
- threadIdxxScheduleDepthState ,
352
+ computeThreadIdxXScheduleDepth (
353
+ threadIdxXScheduleDepthState ,
354
354
originalAccesses.domain ().intersect (activePoints));
355
355
356
356
auto scheduledAccesses = originalAccesses.apply_domain (schedule);
@@ -431,7 +431,7 @@ std::vector<detail::ScheduleTree*> bandsSplitAfterDepth(
431
431
*/
432
432
void promoteToSharedGreedy (
433
433
Scop& scop,
434
- const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState ,
434
+ const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState ,
435
435
const Block& block,
436
436
size_t depth,
437
437
size_t maxMemory) {
@@ -524,7 +524,7 @@ void promoteToSharedGreedy(
524
524
// coalesced way.
525
525
if (!hasReuse (*group, fullSched, depth) &&
526
526
isCoalesced (
527
- threadIdxxScheduleDepthState ,
527
+ threadIdxXScheduleDepthState ,
528
528
*group,
529
529
fullSched,
530
530
activePoints)) {
@@ -548,14 +548,14 @@ void promoteToSharedGreedy(
548
548
549
549
void promoteGreedilyAtDepth (
550
550
MappedScop& mscop,
551
- const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState ,
551
+ const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState ,
552
552
size_t depth,
553
553
size_t sharedMemorySize,
554
554
bool unrollCopies) {
555
555
// 1. Promote using heuristic.
556
556
promoteToSharedGreedy (
557
557
mscop.scop (),
558
- threadIdxxScheduleDepthState ,
558
+ threadIdxXScheduleDepthState ,
559
559
mscop.numThreads ,
560
560
depth,
561
561
sharedMemorySize);
@@ -568,14 +568,14 @@ void promoteGreedilyAtDepth(
568
568
// loop is mapped to thread x, promote below that depth.
569
569
void promoteToRegistersBelowThreads (
570
570
Scop& scop,
571
- const ThreadIdxxScheduleDepthState& threadIdxxScheduleDepthState ,
571
+ const ThreadIdxXScheduleDepthState& threadIdxXScheduleDepthState ,
572
572
size_t nRegisters) {
573
573
using namespace tc ::polyhedral::detail;
574
574
575
575
auto root = scop.scheduleRoot ();
576
576
577
577
auto fullSched = fullSchedule (root);
578
- for (const auto & kvp : threadIdxxScheduleDepthState ) {
578
+ for (const auto & kvp : threadIdxXScheduleDepthState ) {
579
579
auto depth = kvp.second + 1 ;
580
580
auto subdomain = kvp.first ;
581
581
@@ -636,7 +636,7 @@ void promoteToRegistersBelowThreads(
636
636
continue ;
637
637
}
638
638
if (!isPromotableToRegisterBelowThreads (
639
- threadIdxxScheduleDepthState ,
639
+ threadIdxXScheduleDepthState ,
640
640
*group,
641
641
fullSched,
642
642
nMappedThreads,
0 commit comments