Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 788dd94

Browse files
committed
register promotion: use promoteToRegisterAtDepth
Introduce a mapping option "privateDepth" to control the schedule depth at which register promotion is attempted. Pass this option to promoteToRegisterAtDepth instead of calling promoteToRegistersBelowThreads in the mapping strategy.
1 parent f716c7e commit 788dd94

8 files changed

+14
-13
lines changed

tc/core/cuda/cuda_mapping_options.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,11 @@ CudaMappingOptions& CudaMappingOptions::useReadOnlyCache(bool b) {
289289
return *this;
290290
}
291291

292+
CudaMappingOptions& CudaMappingOptions::privateDepth(uint32_t depth) {
293+
ownedProto_.set_private_depth(depth);
294+
return *this;
295+
}
296+
292297
CudaMappingOptions& CudaMappingOptions::mapToThreads(
293298
const std::string& commaSeparatedSizes) {
294299
auto sizes = parseCommaSeparatedIntegers<uint64_t>(commaSeparatedSizes);

tc/core/cuda/cuda_mapping_options.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ class CudaMappingOptions {
195195
CudaMappingOptions& maxSharedMemory(uint64_t size);
196196
CudaMappingOptions& unrollCopyShared(bool b);
197197
CudaMappingOptions& useReadOnlyCache(bool b);
198+
CudaMappingOptions& privateDepth(uint32_t depth);
198199
///@}
199200

200201
/// Static constructors for predefined strategies.

tc/core/cuda/cuda_mapping_options_cpp_printer.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ CudaMappingOptionsCppPrinter& operator<<(
3838
prn.printValueOption(
3939
"maxSharedMemory", cudaOptions.proto().max_shared_memory());
4040
}
41+
prn.printValueOption("privateDepth", cudaOptions.proto().private_depth());
4142
prn.endStmt();
4243
return prn;
4344
}

tc/core/polyhedral/cuda/mapped_scop.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1068,7 +1068,7 @@ std::unique_ptr<MappedScop> MappedScop::makeWithOuterBlockInnerThreadStrategy(
10681068

10691069
// 9. Promote to registers below the loops mapped to threads.
10701070
if (cudaOptions.proto().use_private_memory()) {
1071-
promoteToRegistersBelowThreads(*mappedScop, -1ull);
1071+
promoteToRegistersAtDepth(*mappedScop, cudaOptions.proto().private_depth());
10721072
}
10731073

10741074
LOG_IF(INFO, FLAGS_debug_tc_mapper)

tc/core/polyhedral/cuda/memory_promotion_heuristic.cc

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -779,16 +779,5 @@ void promoteToRegistersAtDepth(MappedScop& mscop, size_t depth) {
779779
}
780780
}
781781

782-
// Promote at the positions of the thread specific markers.
783-
void promoteToRegistersBelowThreads(MappedScop& mscop, size_t nRegisters) {
784-
auto& scop = mscop.scop();
785-
auto root = scop.scheduleRoot();
786-
auto markers = findThreadSpecificMarkers(root);
787-
788-
for (auto marker : markers) {
789-
promoteToRegistersBelow(mscop, marker);
790-
}
791-
}
792-
793782
} // namespace polyhedral
794783
} // namespace tc

tc/core/polyhedral/cuda/memory_promotion_heuristic.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ void promoteGreedilyAtDepth(
4444

4545
void promoteToRegistersBelow(MappedScop& mscop, detail::ScheduleTree* scope);
4646

47-
void promoteToRegistersBelowThreads(MappedScop& scop, std::size_t nRegisters);
4847
void promoteToRegistersAtDepth(MappedScop& scop, std::size_t depth);
4948

5049
} // namespace polyhedral

tc/proto/mapping_options.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ message CudaMappingOptionsProto {
7070
optional uint64 max_shared_memory = 7;
7171
// Use the readonly cache (i.e. emit __ldg loads)
7272
required bool use_readonly_cache = 8;
73+
// Depth of promotion to private memory, ignored if use_private_memory is false.
74+
optional uint32 private_depth = 9;
7375
}
7476

7577
message CpuMappingOptionsProto {

test/test_cuda_mapper_memory_promotion.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,12 @@ def fun(float(N,K) A, float(K,M) B, float(N,M) C) -> (O) {
531531
};
532532

533533
TEST_F(MatMulBias, RegisterPromotion) {
534+
// Scheduled code has three loops, tile all, the top band gets mapped to 2
535+
// blocks, the bottom band gets mapped to 2 threads, test promotion before
536+
// thread-mapped loops, that is at depth 3 + 2 = 5.
534537
auto mappingOptions = CudaMappingOptions::makeNaiveMappingOptions()
535538
.tile(32, 32, 32)
539+
.privateDepth(5)
536540
.useSharedMemory(false)
537541
.usePrivateMemory(true);
538542

0 commit comments

Comments
 (0)