Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 18ace39

Browse files
committed
introduce shared_depth to mapping options
By analogy with private_depth, introduce a "shared_depth" mapping option to control the schedule depth at which the promotion to shared memory is performed. This value is optional. If it is not provided, but the shared memory promotion is requested, then perform the promotion at depth equal to the number of block-mapped loops. This is a pragmatic choice to keep the original behavior if the new option is not provided, in particular for benchmarks with hardcoded options until we re-run the autotuning.
1 parent 04f1062 commit 18ace39

File tree

5 files changed

+19
-7
lines changed

5 files changed

+19
-7
lines changed

tc/core/cuda/cuda_mapping_options.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,11 @@ CudaMappingOptions& CudaMappingOptions::privateDepth(uint32_t depth) {
294294
return *this;
295295
}
296296

297+
CudaMappingOptions& CudaMappingOptions::sharedDepth(uint32_t depth) {
298+
ownedProto_.set_shared_depth(depth);
299+
return *this;
300+
}
301+
297302
CudaMappingOptions& CudaMappingOptions::mapToThreads(
298303
const std::string& commaSeparatedSizes) {
299304
auto sizes = parseCommaSeparatedIntegers<uint64_t>(commaSeparatedSizes);

tc/core/cuda/cuda_mapping_options.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ class CudaMappingOptions {
196196
CudaMappingOptions& unrollCopyShared(bool b);
197197
CudaMappingOptions& useReadOnlyCache(bool b);
198198
CudaMappingOptions& privateDepth(uint32_t depth);
199+
CudaMappingOptions& sharedDepth(uint32_t depth);
199200
///@}
200201

201202
/// Static constructors for predefined strategies.

tc/core/cuda/cuda_mapping_options_cpp_printer.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ CudaMappingOptionsCppPrinter& operator<<(
3939
"maxSharedMemory", cudaOptions.proto().max_shared_memory());
4040
}
4141
prn.printValueOption("privateDepth", cudaOptions.proto().private_depth());
42+
prn.printValueOption("sharedDepth", cudaOptions.proto().shared_depth());
4243
prn.endStmt();
4344
return prn;
4445
}

tc/core/polyhedral/cuda/mapped_scop.cc

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,8 +1049,9 @@ std::unique_ptr<MappedScop> MappedScop::makeWithOuterBlockInnerThreadStrategy(
10491049
LOG_IF(INFO, FLAGS_debug_tc_mapper) << "After mapping to blocks:" << std::endl
10501050
<< *mappedScop->schedule();
10511051

1052-
// 8. Promote to shared memory below the loops mapped to blocks.
1053-
// This may split the outer band, so find the new outer band after promotion.
1052+
// 8. Promote to shared memory.
1053+
// If shared promotion depth is specified in the mapping options, use the
1054+
// specified value. Otherwise, promote below the loops mapped to blocks.
10541055
if (cudaOptions.proto().use_shared_memory()) {
10551056
size_t sharedMemorySize = cudaOptions.proto().has_max_shared_memory()
10561057
? cudaOptions.proto().max_shared_memory()
@@ -1069,19 +1070,21 @@ std::unique_ptr<MappedScop> MappedScop::makeWithOuterBlockInnerThreadStrategy(
10691070
sharedMemorySize -= reductionMemoryRequirement;
10701071
}
10711072

1072-
auto band = outerBand->as<ScheduleTreeBand>();
1073-
LOG_IF(WARNING, FLAGS_debug_tc_mapper && band->nMember() == 0)
1074-
<< "Aborting memory promotion because outer band has 0 members (NYI)";
1075-
if (band->nMember() > 0 && sharedMemorySize > 0) {
1073+
if (sharedMemorySize > 0) {
10761074
LOG_IF(
10771075
WARNING,
10781076
cudaOptions.proto().unroll_copy_shared() &&
10791077
!generic.proto.has_unroll())
10801078
<< "requested to unroll copies to shared memory without providing the unroll size";
10811079

1080+
auto depth = cudaOptions.proto().has_shared_depth()
1081+
? cudaOptions.proto().shared_depth()
1082+
: std::min(
1083+
outerBand->as<ScheduleTreeBand>()->nOuterCoincident(),
1084+
mappedScop->numBlocks.view.size());
10821085
promoteToSharedAtDepth(
10831086
*mappedScop,
1084-
std::min(band->nOuterCoincident(), mappedScop->numBlocks.view.size()),
1087+
depth,
10851088
sharedMemorySize,
10861089
cudaOptions.proto().unroll_copy_shared() &&
10871090
generic.proto.has_unroll());

tc/proto/mapping_options.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ message CudaMappingOptionsProto {
7272
required bool use_readonly_cache = 8;
7373
// Depth of promotion to private memory, ignored if use_private_memory is false.
7474
optional uint32 private_depth = 9;
75+
// Depth of promotion to shared memory, ignored if use_shared_memory is false.
76+
optional uint32 shared_depth = 10;
7577
}
7678

7779
message CpuMappingOptionsProto {

0 commit comments

Comments
 (0)