Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit e632eb8

Browse files
math-fehrftynse
authored andcommitted
Add more tests for reduction detection
Both cases test reduction detection for a non-parametric 2-dimensionnal mean. They differ only by the number of mapped threads, and the expected result. When mapping to more threads than tiling size, some threads are considered to do nothing, which prevents the reduction detection from succeeding. Fixing this would require to perform launch bounds tightening before reduction detection.
1 parent d443b59 commit e632eb8

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

test/test_cuda_mapper.cc

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,78 @@ def fun(float(N, K) I, float(N) O0) -> (O) {
926926
)TC");
927927
}
928928

929+
/*
930+
* Check that a 2D mean with these parameters does not produce a library call.
931+
* The call is not produced because the band is tiled by 32 and 512 threads are
932+
* mapped to the band.
933+
* In practice, check that the library call does not appear in the code.
934+
*/
935+
TEST_F(PolyhedralMapperTest, Mean2DNonParametric_512threads) {
936+
string tc = R"TC(
937+
def fun(float(36864, 1024) I) -> (O) {
938+
O(n) +=! I(n, r_n)
939+
O(n) = O(n) / (1024)
940+
}
941+
)TC";
942+
auto mappingOptions =
943+
DefaultOptions()
944+
.outerScheduleFusionStrategy(tc::FusionStrategy::Preserve3Coincident)
945+
.outerScheduleAllowSkewing(false)
946+
.outerSchedulePositiveOrthant(true)
947+
.intraTileScheduleFusionStrategy(tc::FusionStrategy::Min)
948+
.intraTileScheduleAllowSkewing(false)
949+
.intraTileSchedulePositiveOrthant(true)
950+
.fixParametersBeforeScheduling(false)
951+
.tile(18, 32)
952+
.unroll(16)
953+
.tileImperfectlyNested(false)
954+
.matchLibraryCalls(true)
955+
.mapToThreads({512})
956+
.mapToBlocks({16384})
957+
.useSharedMemory(true)
958+
.usePrivateMemory(false)
959+
.unrollCopyShared(true);
960+
961+
auto code = codegenMapped(tc, mappingOptions);
962+
using tc::code::cuda::kCUBReductionName;
963+
EXPECT_TRUE(code.find(kCUBReductionName) == std::string::npos);
964+
}
965+
966+
/*
967+
* Check that a 2D mean with these parameters produce a reduction library call.
968+
* In practice, check that the library call appears in the code.
969+
*/
970+
TEST_F(PolyhedralMapperTest, Mean2DNonParametric_32threads) {
971+
string tc = R"TC(
972+
def fun(float(36864, 1024) I) -> (O) {
973+
O(n) +=! I(n, r_n)
974+
O(n) = O(n) / (1024)
975+
}
976+
)TC";
977+
auto mappingOptions =
978+
DefaultOptions()
979+
.outerScheduleFusionStrategy(tc::FusionStrategy::Preserve3Coincident)
980+
.outerScheduleAllowSkewing(false)
981+
.outerSchedulePositiveOrthant(true)
982+
.intraTileScheduleFusionStrategy(tc::FusionStrategy::Min)
983+
.intraTileScheduleAllowSkewing(false)
984+
.intraTileSchedulePositiveOrthant(true)
985+
.fixParametersBeforeScheduling(false)
986+
.tile(18, 32)
987+
.unroll(16)
988+
.tileImperfectlyNested(false)
989+
.matchLibraryCalls(true)
990+
.mapToThreads({32})
991+
.mapToBlocks({16384})
992+
.useSharedMemory(true)
993+
.usePrivateMemory(false)
994+
.unrollCopyShared(true);
995+
996+
auto code = codegenMapped(tc, mappingOptions);
997+
using tc::code::cuda::kCUBReductionName;
998+
EXPECT_TRUE(code.find(kCUBReductionName) != std::string::npos);
999+
}
1000+
9291001
static const string kTcMM = R"TC(
9301002
def fun(float(M, K) A, float(K, N) B) -> (C) {
9311003
C(m, n) +=! A(m, r_k) * B(r_k, n)

0 commit comments

Comments
 (0)