Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit f084cfe

Browse files
authored
Merge pull request #442 from math-fehr/reduction-detection-bug
More reduction detection tests.
2 parents e1d5c6e + e632eb8 commit f084cfe

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

test/test_cuda_mapper.cc

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,78 @@ def fun(float(N, K) I, float(N) O0) -> (O) {
926926
)TC");
927927
}
928928

929+
/*
930+
* Check that a 2D mean with these parameters does not produce a library call.
931+
* The call is not produced because the band is tiled by 32 and 512 threads are
932+
* mapped to the band.
933+
* In practice, check that the library call does not appear in the code.
934+
*/
935+
TEST_F(PolyhedralMapperTest, Mean2DNonParametric_512threads) {
936+
string tc = R"TC(
937+
def fun(float(36864, 1024) I) -> (O) {
938+
O(n) +=! I(n, r_n)
939+
O(n) = O(n) / (1024)
940+
}
941+
)TC";
942+
auto mappingOptions =
943+
DefaultOptions()
944+
.outerScheduleFusionStrategy(tc::FusionStrategy::Preserve3Coincident)
945+
.outerScheduleAllowSkewing(false)
946+
.outerSchedulePositiveOrthant(true)
947+
.intraTileScheduleFusionStrategy(tc::FusionStrategy::Min)
948+
.intraTileScheduleAllowSkewing(false)
949+
.intraTileSchedulePositiveOrthant(true)
950+
.fixParametersBeforeScheduling(false)
951+
.tile(18, 32)
952+
.unroll(16)
953+
.tileImperfectlyNested(false)
954+
.matchLibraryCalls(true)
955+
.mapToThreads({512})
956+
.mapToBlocks({16384})
957+
.useSharedMemory(true)
958+
.usePrivateMemory(false)
959+
.unrollCopyShared(true);
960+
961+
auto code = codegenMapped(tc, mappingOptions);
962+
using tc::code::cuda::kCUBReductionName;
963+
EXPECT_TRUE(code.find(kCUBReductionName) == std::string::npos);
964+
}
965+
966+
/*
967+
* Check that a 2D mean with these parameters produce a reduction library call.
968+
* In practice, check that the library call appears in the code.
969+
*/
970+
TEST_F(PolyhedralMapperTest, Mean2DNonParametric_32threads) {
971+
string tc = R"TC(
972+
def fun(float(36864, 1024) I) -> (O) {
973+
O(n) +=! I(n, r_n)
974+
O(n) = O(n) / (1024)
975+
}
976+
)TC";
977+
auto mappingOptions =
978+
DefaultOptions()
979+
.outerScheduleFusionStrategy(tc::FusionStrategy::Preserve3Coincident)
980+
.outerScheduleAllowSkewing(false)
981+
.outerSchedulePositiveOrthant(true)
982+
.intraTileScheduleFusionStrategy(tc::FusionStrategy::Min)
983+
.intraTileScheduleAllowSkewing(false)
984+
.intraTileSchedulePositiveOrthant(true)
985+
.fixParametersBeforeScheduling(false)
986+
.tile(18, 32)
987+
.unroll(16)
988+
.tileImperfectlyNested(false)
989+
.matchLibraryCalls(true)
990+
.mapToThreads({32})
991+
.mapToBlocks({16384})
992+
.useSharedMemory(true)
993+
.usePrivateMemory(false)
994+
.unrollCopyShared(true);
995+
996+
auto code = codegenMapped(tc, mappingOptions);
997+
using tc::code::cuda::kCUBReductionName;
998+
EXPECT_TRUE(code.find(kCUBReductionName) != std::string::npos);
999+
}
1000+
9291001
static const string kTcMM = R"TC(
9301002
def fun(float(M, K) A, float(K, N) B) -> (C) {
9311003
C(m, n) +=! A(m, r_k) * B(r_k, n)

0 commit comments

Comments
 (0)