@@ -210,25 +210,19 @@ isl::map fixOuterInputDimsAsParameters(isl::map map, int nDims) {
210
210
}
211
211
212
212
/*
213
- * Check if a reference group features reuse at "depth" after applying
214
- * "schedule". In particular, consider first depth schedule dimensions as fixed
215
- * by equating them to parameters and check if the resulting relation is not
216
- * injective.
213
+ * Check if a reference group features reuse within the "outer" schedule.
214
+ * In particular, check that for some given point in the outer schedule and
215
+ * some given group element, there is more than one statement instance
216
+ * accessing the element within the point in the outer schedule.
217
+ * In other words, check that the mapping from statement instances
218
+ * to pairs of outer schedule points and group elements is not injective.
217
219
*/
218
- bool hasReuse (
220
+ bool hasReuseWithin (
219
221
const TensorReferenceGroup& group,
220
- isl::union_map schedule,
221
- size_t depth) {
222
- auto scheduledAccessesUMap = group.originalAccesses ().apply_domain (schedule);
223
- auto scheduledAccessMaps =
224
- isl::UnionAsVector<isl::union_map>(scheduledAccessesUMap);
225
- return std::any_of (
226
- scheduledAccessMaps.begin (),
227
- scheduledAccessMaps.end (),
228
- [schedule, depth](isl::map access) {
229
- access = fixOuterInputDimsAsParameters (access, static_cast <int >(depth));
230
- return !access.is_injective ();
231
- });
222
+ isl::multi_union_pw_aff outer) {
223
+ auto map = isl::union_map::from (outer);
224
+ map = map.range_product (group.originalAccesses ());
225
+ return !map.is_injective ();
232
226
}
233
227
234
228
/*
@@ -463,6 +457,8 @@ void promoteToSharedGreedy(
463
457
for (auto bandNode : bands) {
464
458
auto groupMap = TensorReferenceGroup::accessedBySubtree (bandNode, scop);
465
459
auto partialSched = partialSchedule (root, bandNode);
460
+ // Pure affine schedule without (mapping) filters.
461
+ auto partialSchedMupa = partialScheduleMupa (root, bandNode);
466
462
auto activePoints = activeDomainPoints (root, bandNode);
467
463
468
464
// Prepare groups for sorting, to have specified order necessary for
@@ -522,7 +518,7 @@ void promoteToSharedGreedy(
522
518
}
523
519
// Do not promote if the group features no reuse and is accessed in a
524
520
// coalesced way.
525
- if (!hasReuse (*group, fullSched, depth ) &&
521
+ if (!hasReuseWithin (*group, partialSchedMupa ) &&
526
522
isCoalesced (
527
523
threadIdxXScheduleDepthState,
528
524
*group,
@@ -606,6 +602,8 @@ void promoteToRegistersBelowThreads(
606
602
// per-thread-group access relations.
607
603
auto points = activeDomainPoints (root, band);
608
604
auto partialSched = partialSchedule (root, band);
605
+ // Pure affine schedule without (mapping) filters.
606
+ auto partialSchedMupa = partialScheduleMupa (root, band);
609
607
610
608
size_t nMappedThreads = 0 ;
611
609
for (int j = 0 ; j < points.dim (isl::dim_type::param); ++j) {
@@ -643,7 +641,7 @@ void promoteToRegistersBelowThreads(
643
641
points)) {
644
642
continue ;
645
643
}
646
- if (!hasReuse (*group, fullSched, depth )) {
644
+ if (!hasReuseWithin (*group, partialSchedMupa )) {
647
645
continue ;
648
646
}
649
647
// TODO: if something is already in shared, but reuse it within one
0 commit comments