Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit e34fce7

Browse files
authored
Merge pull request #310 from facebookresearch/pr/Sum4D
Sum4D.CodeBeforeThreadMapping: perform promotion above thread mapping again
2 parents 858c1d8 + 7950b19 commit e34fce7

File tree

1 file changed

+14
-10
lines changed

1 file changed

+14
-10
lines changed

test/test_cuda_mapper_memory_promotion.cc

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -143,22 +143,26 @@ TEST_F(Sum4D, CodeOuterBand) {
143143
EXPECT_GT(posSync4, posC);
144144
}
145145

146-
TEST_F(Sum4D, CodeBeforeThreadMapping) {
147-
auto declarations = {"__shared__ float32 _A_0[16][16][16][1];",
148-
"__shared__ float32 _B_0[16][16][16][1];",
149-
"__shared__ float32 _C_0[16][16][16][1];"};
146+
/*
147+
* Check code when promotion is performed above the mapping to threads.
148+
* Note that the copying code is not mapped to threads because
149+
* promoteEverythingAt does not call mapCopiesToThreads.
150+
*/
151+
TEST_F(Sum4D, CodeAboveThreadMapping) {
152+
auto declarations = {"__shared__ float32 _A_0[16][16][16][16];",
153+
"__shared__ float32 _B_0[16][16][16][16];",
154+
"__shared__ float32 _C_0[16][16][16][16];"};
150155
auto copyA =
151-
"_A_0[c4][c5][c6][0] = A[16 * b0 + c4][16 * b1 + c5][c2 + c6][t0 + c3];";
156+
"_A_0[c4][c5][c6][c7] = A[16 * b0 + c4][16 * b1 + c5][c2 + c6][c3 + c7]";
152157
auto copyB =
153-
"_B_0[c4][c5][c6][0] = B[16 * b0 + c4][16 * b1 + c5][c2 + c6][t0 + c3];";
158+
"_B_0[c4][c5][c6][c7] = B[16 * b0 + c4][16 * b1 + c5][c2 + c6][c3 + c7]";
154159
auto compute =
155-
"_C_0[c4][c5][c6][0] = (_A_0[c4][c5][c6][0] + _B_0[c4][c5][c6][0]);";
160+
"_C_0[c4][c5][c6][t0] = (_A_0[c4][c5][c6][t0] + _B_0[c4][c5][c6][t0]);";
156161
auto copyC =
157-
"C[16 * b0 + c4][16 * b1 + c5][c2 + c6][t0 + c3] = _C_0[c4][c5][c6][0];";
162+
"C[16 * b0 + c4][16 * b1 + c5][c2 + c6][c3 + c7] = _C_0[c4][c5][c6][c7];";
158163
auto sync = "__syncthreads()";
159164

160-
auto code =
161-
emitCode({256, 128, 192, 224}, {16, 16, 16, 16}, {0, 0, 0, 0, 0, 0});
165+
auto code = emitCode({256, 128, 192, 224}, {16, 16, 16, 16}, {0, 0, 0, 0});
162166

163167
// Order of copies may be arbitrary, but syncs must be inserted before and
164168
// after

0 commit comments

Comments
 (0)