From 8e626e767750e68304aa3f3740edf757fc7efaca Mon Sep 17 00:00:00 2001 From: cyy Date: Tue, 1 Jul 2025 10:39:23 -0700 Subject: [PATCH] Use static functions/variables if possible (#4423) Summary: X-link: https://github.com/facebookresearch/FBGEMM/pull/1494 There are two changes: 1. Marks in-file templates and other inner functions as static; this provides more opportunities to optimise code, i.e. followed by enabling link time optimization. An unused function in test code is removed. 2. Enables `misc-use-internal-linkage` check. Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/4423 Reviewed By: gchalump Differential Revision: D77601071 Pulled By: q10 --- .clang-tidy | 2 + bench/BenchUtils.cc | 10 ++-- bench/ConvUnifiedBenchmark.cc | 10 ++-- bench/ConvertBenchmark.cc | 2 +- bench/EmbeddingIndexRemappingBenchmark.cc | 2 +- bench/EmbeddingQuantizeBenchmark.cc | 2 +- ...dingQuantizeFloatToFloatOrHalfBenchmark.cc | 2 +- bench/EmbeddingSpMDM8BitBenchmark.cc | 15 +++--- bench/EmbeddingSpMDMBenchmark.cc | 2 +- bench/EmbeddingSpMDMNBit2Benchmark.cc | 15 +----- bench/EmbeddingSpMDMNBitBenchmark.cc | 13 ++--- ...mbeddingSpMDMNBitRowWiseSparseBenchmark.cc | 13 ++--- bench/GEMMsBenchmark.cc | 7 +-- bench/GEMMsTunableBenchmark.cc | 2 +- bench/GroupwiseConvRequantizeBenchmark.cc | 2 +- bench/Im2ColFusedRequantizeBenchmark.cc | 2 +- bench/PackedFloatInOutBenchmark.cc | 2 +- bench/PackedRequantizeAcc16Benchmark.cc | 2 +- bench/PackedRequantizeAcc32Benchmark.cc | 2 +- bench/RequantizeBenchmark.cc | 2 +- bench/RowOffsetBenchmark.cc | 2 +- bench/RowwiseAdagradBenchmark.cc | 4 +- bench/RowwiseAdagradFusedBenchmark.cc | 2 +- bench/SparseAdagradBenchmark.cc | 2 +- bench/TransposeBenchmark.cc | 2 +- src/FbgemmConv.cc | 4 +- src/FbgemmI8DepthwisePerChannelQuantAvx2.cc | 2 +- src/GroupwiseConv.cc | 10 ++-- src/PackAWithIm2Col.cc | 2 +- src/PackWeightsForDirectConv.cc | 2 +- src/RefImplementations.cc | 2 +- src/UtilsAvx512.cc | 4 +- test/EmbeddingSpMDM8BitTest.cc | 2 +- test/EmbeddingSpMDMNBitTest.cc | 2 +- test/EmbeddingSpMDMTest.cc | 2 +- test/GConvTest.cc | 8 +-- test/I8DirectconvTest.cc | 53 +++---------------- test/I8SpmdmTest.cc | 2 +- test/Im2ColFusedRequantizeTest.cc | 4 +- test/PackedRequantizeAcc16Test.cc | 4 +- test/PackedRequantizeTest.cc | 4 +- test/QuantUtilsTest.cc | 12 ++--- test/RequantizeOnlyTest.cc | 2 +- test/RowWiseSparseAdagradFusedTest.cc | 2 +- test/SparseAdagradTest.cc | 2 +- test/SparseDenseMMInt8Test.cc | 2 +- test/TransposeTest.cc | 2 +- test/TransposedRequantizeTest.cc | 2 +- test/UniConvTest.cc | 6 +-- 49 files changed, 104 insertions(+), 154 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 559268a4c8..b6ca147324 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -4,8 +4,10 @@ # Get options for config files in parent directories, # but override them if there's a conflict. InheritParentConfig: true +# NOLINT(clang-tidy-config-check-not-enabled) Checks: ' bugprone-argument-comment, +misc-use-internal-linkage, ' CheckOptions: - key: facebook-cuda-safe-api-call-check.HandlerName diff --git a/bench/BenchUtils.cc b/bench/BenchUtils.cc index 02fc6ba1bb..a2edf4e303 100644 --- a/bench/BenchUtils.cc +++ b/bench/BenchUtils.cc @@ -19,22 +19,22 @@ namespace fbgemm { -std::default_random_engine eng; +static std::default_random_engine eng; template -void randFill(aligned_vector& vec, T low, T high, std::true_type) { +static void randFill(aligned_vector& vec, T low, T high, std::true_type) { std::uniform_int_distribution dis(low, high); std::generate(vec.begin(), vec.end(), [&] { return dis(eng); }); } template -void randFill(aligned_vector& vec, T low, T high, std::false_type) { +static void randFill(aligned_vector& vec, T low, T high, std::false_type) { std::uniform_real_distribution dis(low, high); std::generate(vec.begin(), vec.end(), [&] { return dis(eng); }); } template -void randFill(aligned_vector& vec, T low, T high) { +static void randFill(aligned_vector& vec, T low, T high) { randFill(vec, low, high, std::is_integral()); } @@ -165,7 +165,7 @@ aligned_vector getRandomSparseVector( } template -aligned_vector getRandomBlockSparseMatrix( +static aligned_vector getRandomBlockSparseMatrix( int Rows, int Cols, float fractionNonZerosBlocks, diff --git a/bench/ConvUnifiedBenchmark.cc b/bench/ConvUnifiedBenchmark.cc index f0d80dc725..958075592b 100644 --- a/bench/ConvUnifiedBenchmark.cc +++ b/bench/ConvUnifiedBenchmark.cc @@ -28,7 +28,7 @@ using namespace fbgemm; // clang-format off // 1D conv shapes -vector> shapes_1d = { +static vector> shapes_1d = { // MB, IC, OC, IW, G, KW, stride_w, pad_w_left, pad_w_right, // (dilation, output_padding_w, tranpose) // regular @@ -46,7 +46,7 @@ vector> shapes_1d = { }; // 2D conv shapes -vector> shapes_2d = { +static vector> shapes_2d = { // MB, IC, OC, IH, IW, G, KH, KW, stride_h, stride_w, // pad_h_top, pad_w_left, pad_h_bottom, pad_w_right, // (dilation_h, dilation_w, output_padding_h, output_padding_w, tranpose) @@ -84,7 +84,7 @@ vector> shapes_2d = { {1, 1}, {0, 0, 0, 0}) }; -vector> shapes_2d_resnext_101 = { +static vector> shapes_2d_resnext_101 = { // ResNext-101 (unique shapes only) // conv_param_t<>(N, C, M, H, W, groups, /* kern */ {KH, KW}, /* stride */ // {stride_h, stride_w}, /* padding pad_l = pad_h */ {pad_l, pad_l, pad_l, pad_l}, /* dialation */ @@ -143,7 +143,7 @@ vector> shapes_2d_resnext_101 = { }; // 3D conv shapes -vector> shapes_3d = { +static vector> shapes_3d = { // MB, IC, OC, {IT, IH, IW}, G, {KT, KH, KW}, {stride_t, stride_h, // stride_w}, // {pad_prev, pad_h_top, pad_w_left, pad_next, pad_h_bottom, pad_w_right}, @@ -216,7 +216,7 @@ vector> shapes_3d = { // clang-format on template -void performance_test( +static void performance_test( const vector>& shapes, bool flush, int repetitions) { diff --git a/bench/ConvertBenchmark.cc b/bench/ConvertBenchmark.cc index e631788447..3287987e1a 100644 --- a/bench/ConvertBenchmark.cc +++ b/bench/ConvertBenchmark.cc @@ -21,7 +21,7 @@ using namespace std; using namespace fbgemm; -void performance_test() { +static void performance_test() { constexpr int NWARMUP = 4; constexpr int NITER = 256; diff --git a/bench/EmbeddingIndexRemappingBenchmark.cc b/bench/EmbeddingIndexRemappingBenchmark.cc index 82310f4620..a28adbd58b 100644 --- a/bench/EmbeddingIndexRemappingBenchmark.cc +++ b/bench/EmbeddingIndexRemappingBenchmark.cc @@ -37,7 +37,7 @@ static vector> GetInputs_() { return input_dims; } -int run_benchmark( +static int run_benchmark( int batch_size, int num_rows, int average_len, diff --git a/bench/EmbeddingQuantizeBenchmark.cc b/bench/EmbeddingQuantizeBenchmark.cc index cd255cbf78..6b03317fb8 100644 --- a/bench/EmbeddingQuantizeBenchmark.cc +++ b/bench/EmbeddingQuantizeBenchmark.cc @@ -25,7 +25,7 @@ using namespace fbgemm; // T is the type of scale and bias template -void performance_test() { +static void performance_test() { constexpr int NWARMUP = 4; constexpr int NITER = 256; diff --git a/bench/EmbeddingQuantizeFloatToFloatOrHalfBenchmark.cc b/bench/EmbeddingQuantizeFloatToFloatOrHalfBenchmark.cc index 79c8f79eb1..6e18f608ad 100644 --- a/bench/EmbeddingQuantizeFloatToFloatOrHalfBenchmark.cc +++ b/bench/EmbeddingQuantizeFloatToFloatOrHalfBenchmark.cc @@ -25,7 +25,7 @@ using namespace fbgemm; // T is the type of scale and bias template -void performance_test() { +static void performance_test() { constexpr int NWARMUP = 4; constexpr int NITER = 256; diff --git a/bench/EmbeddingSpMDM8BitBenchmark.cc b/bench/EmbeddingSpMDM8BitBenchmark.cc index 2e741188c1..f8774fef01 100644 --- a/bench/EmbeddingSpMDM8BitBenchmark.cc +++ b/bench/EmbeddingSpMDM8BitBenchmark.cc @@ -30,15 +30,16 @@ using namespace std; using namespace fbgemm; -void print_fused_table(int rows, int embedding_dim, const uint8_t* table) { - for (int i = 0; i < rows; i++) { - cout << "row: " << i << " : " << endl; - for (int ii = 0; ii < embedding_dim; ii++) { - cout << (int)table[i * (embedding_dim + 2 * sizeof(float)) + ii] << ","; +/* +static void print_fused_table(int rows, int embedding_dim, const uint8_t* table) +{ for (int i = 0; i < rows; i++) { cout << "row: " << i << " : " << endl; for +(int ii = 0; ii < embedding_dim; ii++) { cout << (int)table[i * (embedding_dim + +2 * sizeof(float)) + ii] << ","; } cout << endl; } } +*/ static vector> GetInputs_() { vector> input_dims = { @@ -58,10 +59,10 @@ static vector> GetInputs_() { return input_dims; } -vector benchmarkTimes; +static vector benchmarkTimes; template -int run_benchmark( +static int run_benchmark( int batch_size, int num_rows, int embedding_dim, diff --git a/bench/EmbeddingSpMDMBenchmark.cc b/bench/EmbeddingSpMDMBenchmark.cc index f74f83030f..87d7c4ff21 100644 --- a/bench/EmbeddingSpMDMBenchmark.cc +++ b/bench/EmbeddingSpMDMBenchmark.cc @@ -49,7 +49,7 @@ static vector> GetInputs_() { return input_dims; } -void run_benchmark( +static void run_benchmark( int batch_size, int num_rows, int embedding_dim, diff --git a/bench/EmbeddingSpMDMNBit2Benchmark.cc b/bench/EmbeddingSpMDMNBit2Benchmark.cc index 6ef46d96bb..21de13cfa2 100644 --- a/bench/EmbeddingSpMDMNBit2Benchmark.cc +++ b/bench/EmbeddingSpMDMNBit2Benchmark.cc @@ -171,17 +171,6 @@ static void print_benchmark_results() { } } -void print_fused_table(int rows, int embedding_dim, const uint8_t* table) { - for (int i = 0; i < rows; i++) { - std::cout << "row: " << i << " : " << std::endl; - for (int ii = 0; ii < embedding_dim; ii++) { - std::cout << (int)table[i * (embedding_dim + 2 * sizeof(float)) + ii] - << ","; - } - std::cout << std::endl; - } -} - static vector> GetInputs_() { vector> input_dims = { // batch size, number of rows of table, emb dim , avg lengthl @@ -200,7 +189,7 @@ static vector> GetInputs_() { return input_dims; } -int run_benchmark( +static int run_benchmark( int bit_rate, int batch_size, int num_rows, @@ -488,7 +477,7 @@ int run_benchmark( return 0; } -void sweep_benchmark(KernelType kern_type) { +static void sweep_benchmark(KernelType kern_type) { int batch_size; int num_rows; int embedding_dim; diff --git a/bench/EmbeddingSpMDMNBitBenchmark.cc b/bench/EmbeddingSpMDMNBitBenchmark.cc index ab99f7fba7..06610f3d57 100644 --- a/bench/EmbeddingSpMDMNBitBenchmark.cc +++ b/bench/EmbeddingSpMDMNBitBenchmark.cc @@ -32,16 +32,17 @@ using namespace std; using namespace fbgemm; -void print_fused_table(int rows, int embedding_dim, const uint8_t* table) { - for (int i = 0; i < rows; i++) { - std::cout << "row: " << i << " : " << std::endl; - for (int ii = 0; ii < embedding_dim; ii++) { - std::cout << (int)table[i * (embedding_dim + 2 * sizeof(float)) + ii] +/* +static void print_fused_table(int rows, int embedding_dim, const uint8_t* table) +{ for (int i = 0; i < rows; i++) { std::cout << "row: " << i << " : " << +std::endl; for (int ii = 0; ii < embedding_dim; ii++) { std::cout << +(int)table[i * (embedding_dim + 2 * sizeof(float)) + ii] << ","; } std::cout << std::endl; } } +*/ static vector> GetInputs_() { vector> input_dims = { @@ -62,7 +63,7 @@ static vector> GetInputs_() { } template -int run_benchmark( +static int run_benchmark( int bit_rate, int batch_size, int num_rows, diff --git a/bench/EmbeddingSpMDMNBitRowWiseSparseBenchmark.cc b/bench/EmbeddingSpMDMNBitRowWiseSparseBenchmark.cc index 6fac4c3f3c..1dd102f636 100644 --- a/bench/EmbeddingSpMDMNBitRowWiseSparseBenchmark.cc +++ b/bench/EmbeddingSpMDMNBitRowWiseSparseBenchmark.cc @@ -31,16 +31,17 @@ using namespace std; using namespace fbgemm; -void print_fused_table(int rows, int embedding_dim, const uint8_t* table) { - for (int i = 0; i < rows; i++) { - std::cout << "row: " << i << " : " << std::endl; - for (int ii = 0; ii < embedding_dim; ii++) { - std::cout << (int)table[i * (embedding_dim + 2 * sizeof(float)) + ii] +/* +static void print_fused_table(int rows, int embedding_dim, const uint8_t* table) +{ for (int i = 0; i < rows; i++) { std::cout << "row: " << i << " : " << +std::endl; for (int ii = 0; ii < embedding_dim; ii++) { std::cout << +(int)table[i * (embedding_dim + 2 * sizeof(float)) + ii] << ","; } std::cout << std::endl; } } +*/ static vector> GetInputs_() { vector> input_dims = { @@ -60,7 +61,7 @@ static vector> GetInputs_() { return input_dims; } -int run_benchmark( +static int run_benchmark( int bit_rate, int batch_size, int num_rows, diff --git a/bench/GEMMsBenchmark.cc b/bench/GEMMsBenchmark.cc index 19b7945e8f..3189b189fa 100644 --- a/bench/GEMMsBenchmark.cc +++ b/bench/GEMMsBenchmark.cc @@ -29,11 +29,8 @@ using namespace std; using namespace fbgemm; -void performance_test( - const int M, - const int N, - const int K, - const bool timebreak) { +static void +performance_test(const int M, const int N, const int K, const bool timebreak) { // clang-format off const vector> shapes = { // NOTE: clang-format wants to use a different formatting but the current diff --git a/bench/GEMMsTunableBenchmark.cc b/bench/GEMMsTunableBenchmark.cc index 079e943304..19e262efd1 100644 --- a/bench/GEMMsTunableBenchmark.cc +++ b/bench/GEMMsTunableBenchmark.cc @@ -27,7 +27,7 @@ using namespace std; using namespace fbgemm; -void performance_test( +static void performance_test( const BlockingFactors* tuning_params, set>& incorrect_configs, const vector& shape, diff --git a/bench/GroupwiseConvRequantizeBenchmark.cc b/bench/GroupwiseConvRequantizeBenchmark.cc index dac4446ae2..7043bb5bdd 100644 --- a/bench/GroupwiseConvRequantizeBenchmark.cc +++ b/bench/GroupwiseConvRequantizeBenchmark.cc @@ -25,7 +25,7 @@ using namespace std; using namespace fbgemm; -void performance_test() { +static void performance_test() { // clang-format off const vector> shapes = { // MB, IC, OC, {IH, IW}, G, {KH, KW}, {stride_h, stride_w}, pad_t, pad_l, diff --git a/bench/Im2ColFusedRequantizeBenchmark.cc b/bench/Im2ColFusedRequantizeBenchmark.cc index 0b724efc5d..906126c106 100644 --- a/bench/Im2ColFusedRequantizeBenchmark.cc +++ b/bench/Im2ColFusedRequantizeBenchmark.cc @@ -26,7 +26,7 @@ using namespace std; using namespace fbgemm; template -void performance_test() { +static void performance_test() { vector> shapes = { // MB, IC, OC, IH, IW, G, KH, KW, stride_h, stride_w, // pad_h_top, pad_w_left, pad_h_bottom, pad_w_right diff --git a/bench/PackedFloatInOutBenchmark.cc b/bench/PackedFloatInOutBenchmark.cc index b4e031fc5e..805fc9b3e1 100644 --- a/bench/PackedFloatInOutBenchmark.cc +++ b/bench/PackedFloatInOutBenchmark.cc @@ -29,7 +29,7 @@ using namespace std; using namespace fbgemm; -void performance_test() { +static void performance_test() { // clang-format off const vector> shapes = { // NOTE: clang-format wants to use a different formatting but the current diff --git a/bench/PackedRequantizeAcc16Benchmark.cc b/bench/PackedRequantizeAcc16Benchmark.cc index 33904cd369..e8a91f499d 100644 --- a/bench/PackedRequantizeAcc16Benchmark.cc +++ b/bench/PackedRequantizeAcc16Benchmark.cc @@ -38,7 +38,7 @@ enum class BenchmarkType { EVERYTHING, // row-offset in input packing, and requantization + spmdm }; -void performance_test() { +static void performance_test() { // clang-format off vector> shapes = { // NOTE: clang-format wants to use a different formatting but the current diff --git a/bench/PackedRequantizeAcc32Benchmark.cc b/bench/PackedRequantizeAcc32Benchmark.cc index 66b8849697..04c3ca530b 100644 --- a/bench/PackedRequantizeAcc32Benchmark.cc +++ b/bench/PackedRequantizeAcc32Benchmark.cc @@ -29,7 +29,7 @@ using namespace std; using namespace fbgemm; -void performance_test() { +static void performance_test() { // clang-format off vector> shapes = { // NOTE: clang-format wants to use a different formatting but the current diff --git a/bench/RequantizeBenchmark.cc b/bench/RequantizeBenchmark.cc index 9b2a209fcd..22d589f211 100644 --- a/bench/RequantizeBenchmark.cc +++ b/bench/RequantizeBenchmark.cc @@ -29,7 +29,7 @@ enum class BenchmarkType { PER_CHANNEL, }; -void performance_test() { +static void performance_test() { constexpr int NWARMUP = 4; constexpr int NITER = 256; diff --git a/bench/RowOffsetBenchmark.cc b/bench/RowOffsetBenchmark.cc index 495c9b1271..d781b99362 100644 --- a/bench/RowOffsetBenchmark.cc +++ b/bench/RowOffsetBenchmark.cc @@ -22,7 +22,7 @@ using namespace std; using namespace fbgemm; -void performance_test() { +static void performance_test() { constexpr int NWARMUP = 4; constexpr int NITER = 256; diff --git a/bench/RowwiseAdagradBenchmark.cc b/bench/RowwiseAdagradBenchmark.cc index dec6a41319..ab389354d9 100644 --- a/bench/RowwiseAdagradBenchmark.cc +++ b/bench/RowwiseAdagradBenchmark.cc @@ -36,9 +36,9 @@ static vector> GetInputs_() { return input_dims; } -vector prefetch_distances{16}; +static vector prefetch_distances{16}; -void run_benchmark( +static void run_benchmark( const int num_rows, // number of rows reading const int block_size, // number of parameters per row const uint64_t param_size, // total number of parameters diff --git a/bench/RowwiseAdagradFusedBenchmark.cc b/bench/RowwiseAdagradFusedBenchmark.cc index a01d4f8cc3..033a74ca22 100644 --- a/bench/RowwiseAdagradFusedBenchmark.cc +++ b/bench/RowwiseAdagradFusedBenchmark.cc @@ -42,7 +42,7 @@ static vector> GetInputs_() { return input_dims; } -void run_benchmark( +static void run_benchmark( int batch_size, int num_rows, int embedding_dim, diff --git a/bench/SparseAdagradBenchmark.cc b/bench/SparseAdagradBenchmark.cc index cc75341e6a..cc2d6ab517 100644 --- a/bench/SparseAdagradBenchmark.cc +++ b/bench/SparseAdagradBenchmark.cc @@ -40,7 +40,7 @@ static vector> GetInputs_() { return input_dims; } -void run_benchmark( +static void run_benchmark( const int num_rows, // number of rows reading const int block_size, // number of parameters per row const uint64_t param_size, // total number of parameters diff --git a/bench/TransposeBenchmark.cc b/bench/TransposeBenchmark.cc index ceefd58aab..5a6ccb9257 100644 --- a/bench/TransposeBenchmark.cc +++ b/bench/TransposeBenchmark.cc @@ -21,7 +21,7 @@ using namespace std; using namespace fbgemm; template -void performance_test() { +static void performance_test() { constexpr int NWARMUP = 4; constexpr int NITER = 256; diff --git a/src/FbgemmConv.cc b/src/FbgemmConv.cc index f06d993747..bf80711258 100644 --- a/src/FbgemmConv.cc +++ b/src/FbgemmConv.cc @@ -64,12 +64,12 @@ bool takePointWiseFastPath(const conv_param_t& conv_p) { } template -bool take1DFastPath(const conv_param_t& conv_p) { +static bool take1DFastPath(const conv_param_t& conv_p) { return false && !conv_p.transposed; } template -bool takeDirectConvPath(const conv_param_t& conv_p) { +static bool takeDirectConvPath(const conv_param_t& conv_p) { // Note: Direct convolutions (2D) are optimized for // filter size: 2 x 1 to 2 x 6, transposed conv, // in_channel % 8 == 0, out_channel % 8 == 0 diff --git a/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc b/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc index ad86d38cf9..fef4575762 100644 --- a/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc +++ b/src/FbgemmI8DepthwisePerChannelQuantAvx2.cc @@ -15,7 +15,7 @@ namespace fbgemm { // Old interface template -void depthwise_2d_per_channel_quantization_same_pad( +static void depthwise_2d_per_channel_quantization_same_pad( int N, int H, int W, diff --git a/src/GroupwiseConv.cc b/src/GroupwiseConv.cc index 0ee4d42031..5f3f997156 100644 --- a/src/GroupwiseConv.cc +++ b/src/GroupwiseConv.cc @@ -27,7 +27,7 @@ namespace fbgemm { using namespace std; template -void calculateRowOffsets( +static void calculateRowOffsets( const conv_param_t& conv_param, const uint8_t* activations, int32_t* rowOffsetBuf, @@ -67,7 +67,7 @@ void calculateRowOffsets( } template -kernel_sig_t getKernelSig( +static kernel_sig_t getKernelSig( const conv_param_t& conv_param, bool isAZeroPointZero, bool needRowOffset, @@ -104,7 +104,7 @@ kernel_sig_t getKernelSig( } template -jit_conv_kernel_fp getOrCreateConvKernel( +static jit_conv_kernel_fp getOrCreateConvKernel( const conv_param_t& conv_param, int a_zero_point, bool needRowOffset, @@ -808,7 +808,7 @@ void fbgemmGroupwiseConv( * This function does exactly the same compute as the JIT'ed kernel */ template -void kernel_compute( +static void kernel_compute( const conv_param_t& conv_p, const uint8_t* in_acts, int8_t* wghts, @@ -879,7 +879,7 @@ void kernel_compute( } template -void dispatchOutputProcessing( +static void dispatchOutputProcessing( const processOutputType& outProcess, int32_t* rowOffsetBuf, outT* out, diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc index 76675ef431..25584fa454 100644 --- a/src/PackAWithIm2Col.cc +++ b/src/PackAWithIm2Col.cc @@ -123,7 +123,7 @@ PackAWithIm2Col::PackAWithIm2Col( } template -void pack_a_with_im2col_opt( +static void pack_a_with_im2col_opt( const conv_param_t& conv_p, const block_type_t& block, const uint8_t* sdata, diff --git a/src/PackWeightsForDirectConv.cc b/src/PackWeightsForDirectConv.cc index 6709e1fdc0..592b70d6c0 100644 --- a/src/PackWeightsForDirectConv.cc +++ b/src/PackWeightsForDirectConv.cc @@ -161,7 +161,7 @@ PackedDirectConvMatrix::col_offsets_with_zero_pt_s8acc32_DirectConvT<3>( int ncols_per_quant_group); template -void directConvRowSum( +static void directConvRowSum( const conv_param_t& conv_p, const uint8_t* A, int32_t* inSum, diff --git a/src/RefImplementations.cc b/src/RefImplementations.cc index 342572dd5a..92fb200645 100644 --- a/src/RefImplementations.cc +++ b/src/RefImplementations.cc @@ -33,7 +33,7 @@ using fint32 = union { // // Return a random 32bit integer using xoshiro128++ // http://prng.di.unimi.it/xoshiro128plusplus.c -inline uint32_t rnd128_next(int idx, int vlen) { +static inline uint32_t rnd128_next(int idx, int vlen) { constexpr int VLEN_MAX = 16; // max vector size alignas(64) static thread_local uint32_t g_rnd128_buffer[4 * VLEN_MAX]; static thread_local bool g_rnd128_initialized = false; diff --git a/src/UtilsAvx512.cc b/src/UtilsAvx512.cc index 022c39ecb6..a354ad200c 100644 --- a/src/UtilsAvx512.cc +++ b/src/UtilsAvx512.cc @@ -1485,7 +1485,7 @@ static inline void transpose_contiguous_32x2_block( } template -void transpose_16x16_block( +static void transpose_16x16_block( const uint16_t* src, int64_t ld_src, uint16_t* dst, @@ -1611,7 +1611,7 @@ void transpose_16x16_block( } template -void transpose_16x32_block( +static void transpose_16x32_block( const uint8_t* src, int64_t ld_src, uint8_t* dst, diff --git a/test/EmbeddingSpMDM8BitTest.cc b/test/EmbeddingSpMDM8BitTest.cc index 2bab6e0a4c..a0fe570c87 100644 --- a/test/EmbeddingSpMDM8BitTest.cc +++ b/test/EmbeddingSpMDM8BitTest.cc @@ -46,7 +46,7 @@ static vector> GetInputs_() { return input_dims; } -vector prefetch_distances{0, 16, 1000000}; +static vector prefetch_distances{0, 16, 1000000}; namespace { diff --git a/test/EmbeddingSpMDMNBitTest.cc b/test/EmbeddingSpMDMNBitTest.cc index 200d919a27..7c38734c0f 100644 --- a/test/EmbeddingSpMDMNBitTest.cc +++ b/test/EmbeddingSpMDMNBitTest.cc @@ -49,7 +49,7 @@ static vector> GetInputs_() { return input_dims; } -vector prefetch_distances{0, 16, 1000000}; +static vector prefetch_distances{0, 16, 1000000}; namespace { diff --git a/test/EmbeddingSpMDMTest.cc b/test/EmbeddingSpMDMTest.cc index 0763f80612..818d3d6f1f 100644 --- a/test/EmbeddingSpMDMTest.cc +++ b/test/EmbeddingSpMDMTest.cc @@ -64,7 +64,7 @@ class IndexRemapTest : public testing::TestWithParam> {}; } // namespace -vector prefetch_distances = {0, 16, 1000000}; +static vector prefetch_distances = {0, 16, 1000000}; INSTANTIATE_TEST_CASE_P( InstantiationName, diff --git a/test/GConvTest.cc b/test/GConvTest.cc index 1a5c01963f..36e3e5bbcb 100644 --- a/test/GConvTest.cc +++ b/test/GConvTest.cc @@ -27,11 +27,11 @@ using namespace std; using namespace fbgemm; -vector transposeVals{ +static vector transposeVals{ matrix_op_t::NoTranspose, matrix_op_t::Transpose}; -vector qGranularityVals{ +static vector qGranularityVals{ QuantizationGranularity::TENSOR, QuantizationGranularity::GROUP, QuantizationGranularity::OUT_CHANNEL}; @@ -269,7 +269,7 @@ GetShapes_() { * accumulation. Output processing: requantization -> nothing */ template -void runRequantizeTest(matrix_op_t /* unused */, +static void runRequantizeTest(matrix_op_t /* unused */, matrix_op_t btrans, QuantizationGranularity q_granularity, bool a_symmetric, bool b_symmetric) { @@ -591,7 +591,7 @@ TEST_P(fbgemmGConvAcc32Test, NoRequantizeTest) { */ template -void runPackUnpackTest(matrix_op_t btrans) { +static void runPackUnpackTest(matrix_op_t btrans) { vector> shapes(GetShapes_()); for (auto conv_p : shapes) { diff --git a/test/I8DirectconvTest.cc b/test/I8DirectconvTest.cc index e6a9f8b7ca..0b857b2316 100644 --- a/test/I8DirectconvTest.cc +++ b/test/I8DirectconvTest.cc @@ -36,7 +36,7 @@ namespace fbgemm { //padding */ {pad, pad, pad, pad}, // /* dialation */ {1, 1}, /* otpt_pad */ {0,0}, /* trans */ transpose), // 2D conv shapes - vector> shapes = { + static vector> shapes = { // MB, IC, OC, IH, IW, G, KH, KW, stride_h, stride_w, // pad_h_top, pad_w_left, pad_h_bottom, pad_w_right, // (dilation_h, dilation_w, output_padding_h, output_padding_w, tranpose) @@ -56,7 +56,7 @@ namespace fbgemm { conv_param_t<>(1, 64, 64, {2, 257}, 1, {2, 6}, {1, 2}, {0, 0, 0, 0}, {1, 1}, {0, 0}, false), }; -vector> shapes_trans = { +static vector> shapes_trans = { conv_param_t<>(1, 256, 176, {2, 4}, 1, {2, 6}, {1, 2}, {0, 0, 0, 0}, {1, 1}, {0, 0}, true), conv_param_t<>(1, 128, 128, {4, 12}, 1, {2, 6}, {1, 1}, {0, 0, 0, 0}, @@ -80,7 +80,7 @@ class FBGemmDirectConvTransFbgemmTest } // namespace template -void transposeConvWeights_KwIchO8I4( +static void transposeConvWeights_KwIchO8I4( const conv_param_t& conv_p, const std::int8_t* src, std::int8_t* dest) { @@ -114,48 +114,7 @@ void transposeConvWeights_KwIchO8I4( } } -void directConvRowSum( - const conv_param_t<2>& conv_p, - uint8_t* A, - int32_t* inSum, - int32_t* rowSum) { - int IN0 = conv_p.IN_DIM[0]; - int IN1 = conv_p.IN_DIM[1]; - int IC = conv_p.IC; - int K0 = conv_p.K[0]; - int K1 = conv_p.K[1]; - int OUT0 = conv_p.OUT_DIM[0]; - int OUT1 = conv_p.OUT_DIM[1]; - int stride = conv_p.stride[1]; - - memset(rowSum, 0, sizeof(int32_t) * OUT0 * OUT1); - for (int ih = 0; ih < IN0; ++ih) - for (int iw = 0; iw < IN1; ++iw) { - inSum[ih * IN1 + iw] = reduceAvx2(A + ih * IN1 * IC + iw * IC, IC); - } - - - for (int ih = 0; ih < IN0; ++ih) - for (int iw = 0; iw < IN1; iw++) { - for (int r = 0; r < K0; ++r) { - for (int s = 0; s < K1; ++s) { - rowSum[(ih + r) * OUT1 + iw * stride + s] += inSum[ih * IN1 + iw]; - } - } - } - /* - compare_buffers( - rowSum, - rowoffsets, - OUT0, - OUT1, - OUT1, - 5); - */ -} - - -void col_offsets_with_zero_pt_s8acc32_DirectConvT_ref( +static void col_offsets_with_zero_pt_s8acc32_DirectConvT_ref( const conv_param_t<2>& conv_p, const int8_t* Bint8, const int32_t* B_zero_point, @@ -207,8 +166,9 @@ void col_offsets_with_zero_pt_s8acc32_DirectConvT_ref( } } +/* -void QuantizeDirectConv_ref( +static void QuantizeDirectConv_ref( const conv_param_t<2>& conv_p, aligned_vector Aint8, aligned_vector Bint8, @@ -288,7 +248,6 @@ void QuantizeDirectConv_ref( } } -/* INSTANTIATE_TEST_CASE_P( InstantiationName, FBGemmDirectConvTest, diff --git a/test/I8SpmdmTest.cc b/test/I8SpmdmTest.cc index 1014b543e1..0e650f23ff 100644 --- a/test/I8SpmdmTest.cc +++ b/test/I8SpmdmTest.cc @@ -27,7 +27,7 @@ using namespace std; using namespace fbgemm; -std::vector densities{0.0001f, 0.001f, 0.01f, 0.1f, 1.0f}; +static std::vector densities{0.0001f, 0.001f, 0.01f, 0.1f, 1.0f}; namespace { class fbgemmSPMDMTest diff --git a/test/Im2ColFusedRequantizeTest.cc b/test/Im2ColFusedRequantizeTest.cc index fd71d2a2f1..660219a824 100644 --- a/test/Im2ColFusedRequantizeTest.cc +++ b/test/Im2ColFusedRequantizeTest.cc @@ -26,7 +26,7 @@ using namespace std; using namespace fbgemm; -vector qGranularityVals{ +static vector qGranularityVals{ QuantizationGranularity::TENSOR, QuantizationGranularity::GROUP, QuantizationGranularity::OUT_CHANNEL}; @@ -262,7 +262,7 @@ TEST_P(fbgemmIm2colTest, Acc16Test) { } template -void SConvTest() { +static void SConvTest() { for (auto conv_p : shapes) { for (int groups : {1, 4}) { if (conv_p.IC % groups != 0 || conv_p.OC % groups != 0) { diff --git a/test/PackedRequantizeAcc16Test.cc b/test/PackedRequantizeAcc16Test.cc index 2809c0b45f..41fab88ddd 100644 --- a/test/PackedRequantizeAcc16Test.cc +++ b/test/PackedRequantizeAcc16Test.cc @@ -29,11 +29,11 @@ using namespace std; using namespace fbgemm; -vector transposeVals{ +static vector transposeVals{ matrix_op_t::NoTranspose, matrix_op_t::Transpose}; -vector qGranularityVals{ +static vector qGranularityVals{ QuantizationGranularity::TENSOR, QuantizationGranularity::GROUP, QuantizationGranularity::OUT_CHANNEL}; diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc index d34b7cab2f..6e80576e68 100644 --- a/test/PackedRequantizeTest.cc +++ b/test/PackedRequantizeTest.cc @@ -28,11 +28,11 @@ using namespace std; using namespace fbgemm; -vector transposeVals{ +static vector transposeVals{ matrix_op_t::NoTranspose, matrix_op_t::Transpose}; -vector qGranularityVals{ +static vector qGranularityVals{ QuantizationGranularity::TENSOR, QuantizationGranularity::GROUP, QuantizationGranularity::OUT_CHANNEL}; diff --git a/test/QuantUtilsTest.cc b/test/QuantUtilsTest.cc index 6ea7dd12aa..dfcea6f5cc 100644 --- a/test/QuantUtilsTest.cc +++ b/test/QuantUtilsTest.cc @@ -76,7 +76,7 @@ INSTANTIATE_TEST_CASE_P( ::testing::ValuesIn({1, 2, 5, 8, 9, 16, 20, 28, 32, 33, 64, 65}))); template -void ref_impl( +static void ref_impl( const vector& src, int K, int C, @@ -111,7 +111,7 @@ void ref_impl( } template -void runTests( +static void runTests( const vector& src, int K, int C, @@ -134,7 +134,7 @@ void runTests( * while comparing results. */ template -::testing::AssertionResult isNear( +static ::testing::AssertionResult isNear( const vector& res, const vector& res_ref) { bool match = true; @@ -154,7 +154,7 @@ ::testing::AssertionResult isNear( } template -::testing::AssertionResult isQEmbeddingClose( +static ::testing::AssertionResult isQEmbeddingClose( const vector& res_ref, const vector& res, int out_rows, @@ -297,7 +297,7 @@ TEST_P(QuantizeGroupwiseTest, quantizeGTest) { } template -void runQuantizeTests( +static void runQuantizeTests( const vector& src, float scale, int zero_point, @@ -431,7 +431,7 @@ TEST(QuantizeTestQParams, chooseQParamsSymmetric) { } template -void runFusedQuantizeDequantizeTests( +static void runFusedQuantizeDequantizeTests( const vector& src, float scale, int zero_point, diff --git a/test/RequantizeOnlyTest.cc b/test/RequantizeOnlyTest.cc index 94df42197b..99410cee2e 100644 --- a/test/RequantizeOnlyTest.cc +++ b/test/RequantizeOnlyTest.cc @@ -22,7 +22,7 @@ using namespace std; using namespace fbgemm; -vector qGranularityVals{ +static vector qGranularityVals{ QuantizationGranularity::TENSOR, QuantizationGranularity::OUT_CHANNEL}; diff --git a/test/RowWiseSparseAdagradFusedTest.cc b/test/RowWiseSparseAdagradFusedTest.cc index 1f05e97ff1..8ab08c9838 100644 --- a/test/RowWiseSparseAdagradFusedTest.cc +++ b/test/RowWiseSparseAdagradFusedTest.cc @@ -51,7 +51,7 @@ static vector> GetInputs_() { return input_dims; } -vector prefetch_distances{0, 16, 1000000}; +static vector prefetch_distances{0, 16, 1000000}; namespace { diff --git a/test/SparseAdagradTest.cc b/test/SparseAdagradTest.cc index b1590af2ca..8b92675a77 100644 --- a/test/SparseAdagradTest.cc +++ b/test/SparseAdagradTest.cc @@ -42,7 +42,7 @@ static vector> GetInputs_() { return input_dims; } -vector prefetch_distances{0, 16, 1000000}; +static vector prefetch_distances{0, 16, 1000000}; namespace { class SparseAdagradTest diff --git a/test/SparseDenseMMInt8Test.cc b/test/SparseDenseMMInt8Test.cc index a5a6d37ec9..4f33a9b613 100644 --- a/test/SparseDenseMMInt8Test.cc +++ b/test/SparseDenseMMInt8Test.cc @@ -18,7 +18,7 @@ using namespace std; using namespace fbgemm; -vector qGranularityVals{ +static vector qGranularityVals{ QuantizationGranularity::TENSOR, QuantizationGranularity::OUT_CHANNEL}; diff --git a/test/TransposeTest.cc b/test/TransposeTest.cc index ed46756803..c0cce4f665 100644 --- a/test/TransposeTest.cc +++ b/test/TransposeTest.cc @@ -18,7 +18,7 @@ using namespace std; using namespace fbgemm; template -::testing::AssertionResult compare_tranpose_results( +static ::testing::AssertionResult compare_tranpose_results( vector expected, vector acutal, int m, diff --git a/test/TransposedRequantizeTest.cc b/test/TransposedRequantizeTest.cc index 6a93d08476..24e5fda60d 100644 --- a/test/TransposedRequantizeTest.cc +++ b/test/TransposedRequantizeTest.cc @@ -22,7 +22,7 @@ using namespace std; using namespace fbgemm; -vector qGranularityVals{ +static vector qGranularityVals{ QuantizationGranularity::TENSOR, QuantizationGranularity::OUT_CHANNEL}; diff --git a/test/UniConvTest.cc b/test/UniConvTest.cc index e767071c24..359222382c 100644 --- a/test/UniConvTest.cc +++ b/test/UniConvTest.cc @@ -22,7 +22,7 @@ using namespace std; using namespace fbgemm; -vector qGranularityVals{ +static vector qGranularityVals{ QuantizationGranularity::TENSOR, QuantizationGranularity::GROUP, QuantizationGranularity::OUT_CHANNEL}; @@ -606,7 +606,7 @@ TEST(uniConvTest, cornerCases) { } template -bool takeDirectConvPath(const conv_param_t& conv_p) { +static bool takeDirectConvPath(const conv_param_t& conv_p) { // Note: Direct convolutions (2D) are optimized for // filter size: 2 x 1 to 2 x 6, transposed conv, // in_channel % 8 == 0, out_channel % 8 == 0 @@ -638,7 +638,7 @@ bool takeDirectConvPath(const conv_param_t& conv_p) { */ template -void runRequantizeTest( +static void runRequantizeTest( QuantizationGranularity q_granularity, bool a_symmetric, bool b_symmetric,