Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 7950b19

Browse files
author
Sven Verdoolaege
committed
Sum4D.CodeBeforeThreadMapping: perform promotion above thread mapping again
The original intent of this test appears to have been to evaluate the case where promotion is applied right above the mapping to threads. However, during the course of several changes in prehistory, the promotion ended up getting applied underneath this mapping to threads. Move it back.
1 parent 1371777 commit 7950b19

File tree

1 file changed

+14
-10
lines changed

1 file changed

+14
-10
lines changed

test/test_cuda_mapper_memory_promotion.cc

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -143,22 +143,26 @@ TEST_F(Sum4D, CodeOuterBand) {
143143
EXPECT_GT(posSync4, posC);
144144
}
145145

146-
TEST_F(Sum4D, CodeBeforeThreadMapping) {
147-
auto declarations = {"__shared__ float32 _A_0[16][16][16][1];",
148-
"__shared__ float32 _B_0[16][16][16][1];",
149-
"__shared__ float32 _C_0[16][16][16][1];"};
146+
/*
147+
* Check code when promotion is performed above the mapping to threads.
148+
* Note that the copying code is not mapped to threads because
149+
* promoteEverythingAt does not call mapCopiesToThreads.
150+
*/
151+
TEST_F(Sum4D, CodeAboveThreadMapping) {
152+
auto declarations = {"__shared__ float32 _A_0[16][16][16][16];",
153+
"__shared__ float32 _B_0[16][16][16][16];",
154+
"__shared__ float32 _C_0[16][16][16][16];"};
150155
auto copyA =
151-
"_A_0[c4][c5][c6][0] = A[16 * b0 + c4][16 * b1 + c5][c2 + c6][t0 + c3];";
156+
"_A_0[c4][c5][c6][c7] = A[16 * b0 + c4][16 * b1 + c5][c2 + c6][c3 + c7]";
152157
auto copyB =
153-
"_B_0[c4][c5][c6][0] = B[16 * b0 + c4][16 * b1 + c5][c2 + c6][t0 + c3];";
158+
"_B_0[c4][c5][c6][c7] = B[16 * b0 + c4][16 * b1 + c5][c2 + c6][c3 + c7]";
154159
auto compute =
155-
"_C_0[c4][c5][c6][0] = (_A_0[c4][c5][c6][0] + _B_0[c4][c5][c6][0]);";
160+
"_C_0[c4][c5][c6][t0] = (_A_0[c4][c5][c6][t0] + _B_0[c4][c5][c6][t0]);";
156161
auto copyC =
157-
"C[16 * b0 + c4][16 * b1 + c5][c2 + c6][t0 + c3] = _C_0[c4][c5][c6][0];";
162+
"C[16 * b0 + c4][16 * b1 + c5][c2 + c6][c3 + c7] = _C_0[c4][c5][c6][c7];";
158163
auto sync = "__syncthreads()";
159164

160-
auto code =
161-
emitCode({256, 128, 192, 224}, {16, 16, 16, 16}, {0, 0, 0, 0, 0, 0});
165+
auto code = emitCode({256, 128, 192, 224}, {16, 16, 16, 16}, {0, 0, 0, 0});
162166

163167
// Order of copies may be arbitrary, but syncs must be inserted before and
164168
// after

0 commit comments

Comments
 (0)