@@ -459,25 +459,67 @@ class MatMulBias : public TestMapper {
459
459
public:
460
460
std::string emitCode (
461
461
const std::unordered_map<std::string, size_t >& parameters,
462
- const std::vector< size_t >& tileSizes ) {
462
+ const MappingOptions& mappingOptions ) {
463
463
std::string tc = R"TC(
464
464
def fun(float(N,K) A, float(K,M) B, float(N,M) C) -> (O) {
465
465
O(i,j) +=! A(i,k) * B(k,j)
466
466
O(i,j) = O(i,j) + C(i,j)
467
467
}
468
468
)TC" ;
469
469
470
- auto mappingOptions = MappingOptions::makeNaiveMappingOptions ()
471
- .tile (tileSizes)
472
- .useSharedMemory (false )
473
- .usePrivateMemory (true );
474
470
auto mscop = makeMappedScop (tc, mappingOptions, parameters);
475
471
return std::get<0 >(mscop->codegen (" fun" ));
476
472
}
477
473
};
478
474
479
475
TEST_F (MatMulBias, RegisterPromotion) {
480
- emitCode ({{" N" , 42 }, {" M" , 56 }, {" K" , 37 }}, {32 , 32 , 32 });
476
+ auto mappingOptions = MappingOptions::makeNaiveMappingOptions ()
477
+ .tile ({32 , 32 , 32 })
478
+ .useSharedMemory (false )
479
+ .usePrivateMemory (true );
480
+
481
+ auto code = emitCode ({{" N" , 42 }, {" M" , 56 }, {" K" , 37 }}, mappingOptions);
482
+ auto declPos = code.find (" float32 _O_0" );
483
+ auto copyToPos =
484
+ code.find (" _O_0[0][0] = O[32*b0 + c3][t0 + 32*b1]" , declPos + 1 );
485
+ auto copyFromPos =
486
+ code.find (" O[32*b0 + c3][t0 + 32*b1] = _O_0[0][0]" , copyToPos + 1 );
487
+
488
+ auto originalAccPos = code.find (" O[32*b0 + c3][t0 + 32*b1]" , copyToPos + 1 );
489
+ auto cDeclPos = code.find (" float32 _C_0" );
490
+ auto aDeclPos = code.find (" float32 _A_0" );
491
+
492
+ EXPECT_TRUE (declPos != std::string::npos) << " no declaration of the register" ;
493
+ EXPECT_TRUE (copyToPos != std::string::npos) << " expected copy to register" ;
494
+ EXPECT_TRUE (copyFromPos != std::string::npos)
495
+ << " expected copy from register" ;
496
+
497
+ EXPECT_NE (originalAccPos, copyFromPos)
498
+ << " global array reference is used in main computation" ;
499
+ EXPECT_TRUE (cDeclPos == std::string::npos)
500
+ << " tensor C promoted to register but has no reuse" ;
501
+ EXPECT_TRUE (aDeclPos == std::string::npos)
502
+ << " tensor A promoted to register but has elements accessed by multiple threads" ;
503
+ }
504
+
505
+ TEST_F (MatMulBias, RegisterPromotionSharedPreference) {
506
+ auto mappingOptions = MappingOptions::makeNaiveMappingOptions ()
507
+ .tile ({32 , 32 , 32 })
508
+ .maxSharedMemory (32768 )
509
+ .useSharedMemory (true )
510
+ .usePrivateMemory (true );
511
+
512
+ auto code = emitCode ({{" N" , 42 }, {" M" , 56 }, {" K" , 37 }}, mappingOptions);
513
+ auto declPos = code.find (" float32 _O_0[1][1]" );
514
+ auto cDeclPos = code.find (" float32 _C_0[1][1]" );
515
+ auto aDeclPos = code.find (" float32 _A_0[1][1]" );
516
+
517
+ EXPECT_TRUE (declPos == std::string::npos)
518
+ << " not expected promotion to register because promoted to shared" ;
519
+ EXPECT_TRUE (cDeclPos == std::string::npos)
520
+ << " tensor C promoted to register but has no reuse" ;
521
+ EXPECT_TRUE (aDeclPos == std::string::npos)
522
+ << " tensor A promoted to register but has elements accessed by multiple threads" ;
481
523
}
482
524
483
525
int main (int argc, char ** argv) {
0 commit comments