diff --git a/.clang-tidy b/.clang-tidy index c1ac04b76f..7c7f85ee9c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -10,19 +10,60 @@ Checks: ' clang-analyzer*, clang-diagnostic-missing-prototypes, cppcoreguidelines-init-variables, -bugprone-argument-comment, -misc-use-internal-linkage, +bugprone*, +-bugprone-crtp-constructor-accessibility, +-bugprone-easily-swappable-parameters, +-bugprone-exception-escape, +-bugprone-implicit*, +-bugprone-macro-parentheses, +-bugprone-narrowing-conversions, +-bugprone-reserved-identifier, +-bugprone-signed-char-misuse, +-bugprone-switch-missing-default-case, +misc-*, +-misc-confusable-identifiers, +-misc-const-correctness, +-misc-include-cleaner, +-misc-no-recursion, +-misc-non-private-member-variables-in-classes, +-misc-use-anonymous-namespace, +-misc-unused-parameters, modernize*, --modernize-use-auto, --modernize-use-constraints, --modernize-use-trailing-return-type, -modernize-avoid-c-arrays, -modernize-avoid-bind, +-modernize-return-braced-init-list, +-modernize-use-auto, +-modernize-use-constraints, -modernize-use-designated-initializers, --modernize-use-ranges, --modernize-use-integer-sign-comparison +-modernize-use-integer-sign-comparison, -modernize-use-nodiscard, +-modernize-use-ranges, +-modernize-use-trailing-return-type, +-modernize-use-transparent-functors, performance*, +-performance-enum-size, +readability*, +-readability-avoid-nested-conditional-operator, +-readability-avoid-const-params-in-decls, +-readability-avoid-unconditional-preprocessor-if, +-readability-braces-around-statements, +-readability-container-contains, +-readability-convert-member-functions-to-static, +-readability-else-after-return, +-readability-function-cognitive-complexity, +-readability-identifier-length, +-readability-implicit-bool-conversion, +-readability-isolate-declaration, +-readability-make-member-function-const, +-readability-magic-numbers, +-readability-math-missing-parentheses, +-readability-non-const-parameter, +-readability-qualified-auto, +-readability-uppercase-literal-suffix, +-readability-redundant-access-specifiers, +-readability-redundant-control-flow, +-readability-simplify-boolean-expr, +-readability-suspicious-call-argument, ' CheckOptions: - key: facebook-cuda-safe-api-call-check.HandlerName diff --git a/bench/BenchUtils.cc b/bench/BenchUtils.cc index f124744234..467879d7df 100644 --- a/bench/BenchUtils.cc +++ b/bench/BenchUtils.cc @@ -22,13 +22,13 @@ namespace fbgemm { static std::default_random_engine eng; template -void randFill(aligned_vector& vec, T low, T high, std::true_type) { +void randFill(aligned_vector& vec, T low, T high, std::true_type /*unused*/) { std::uniform_int_distribution dis(low, high); std::generate(vec.begin(), vec.end(), [&] { return dis(eng); }); } template -void randFill(aligned_vector& vec, T low, T high, std::false_type) { +void randFill(aligned_vector& vec, T low, T high, std::false_type /*unused*/) { std::uniform_real_distribution dis(low, high); std::generate(vec.begin(), vec.end(), [&] { return dis(eng); }); } @@ -124,7 +124,7 @@ bool parseArgumentBool( } #if defined(USE_MKL) -void test_xerbla(char* srname, const int* info, int) { +void test_xerbla(char* srname, const int* info, int /*unused*/) { // srname - name of the function that called xerbla // info - position of the invalid parameter in the parameter list // len - length of the name in bytes diff --git a/bench/BenchUtils.h b/bench/BenchUtils.h index 9953f77295..54801c0ea5 100644 --- a/bench/BenchUtils.h +++ b/bench/BenchUtils.h @@ -9,6 +9,7 @@ #pragma once #include #include +#include #include #if defined(__x86_64__) || defined(__i386__) || \ diff --git a/bench/EmbeddingSpMDMBenchmark.cc b/bench/EmbeddingSpMDMBenchmark.cc index 7e0ba8ebfe..d492961495 100644 --- a/bench/EmbeddingSpMDMBenchmark.cc +++ b/bench/EmbeddingSpMDMBenchmark.cc @@ -408,8 +408,8 @@ int main() { cout << "Mean "; } cout << input_dtype << " inputs"; - bool use_fp16_inputs = input_dtype == "fp16" ? true : false; - bool use_bf16_inputs = input_dtype == "fp16" ? true : false; + bool use_fp16_inputs = input_dtype == "fp16"; + bool use_bf16_inputs = input_dtype == "fp16"; cout << (use_32_bit_indices ? " 32" : " 64") << " bit indices"; if (prefetch) { cout << " with prefetching"; diff --git a/bench/GEMMsBenchmark.cc b/bench/GEMMsBenchmark.cc index 23e82dc730..afa664ca67 100644 --- a/bench/GEMMsBenchmark.cc +++ b/bench/GEMMsBenchmark.cc @@ -224,7 +224,7 @@ performance_test(const int M, const int N, const int K, const bool timebreak) { } } if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } // printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B // unpacked"); @@ -312,7 +312,7 @@ performance_test(const int M, const int N, const int K, const bool timebreak) { } } if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } // printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B // unpacked"); diff --git a/bench/GEMMsTunableBenchmark.cc b/bench/GEMMsTunableBenchmark.cc index 83ccfc3a0b..f4fa0f7295 100644 --- a/bench/GEMMsTunableBenchmark.cc +++ b/bench/GEMMsTunableBenchmark.cc @@ -160,7 +160,7 @@ static void performance_test( } } if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } #ifdef FBGEMM_MEASURE_TIME_BREAKDOWN diff --git a/bench/GroupwiseConvRequantizeBenchmark.cc b/bench/GroupwiseConvRequantizeBenchmark.cc index d0fb9d3cc5..4fc9953d31 100644 --- a/bench/GroupwiseConvRequantizeBenchmark.cc +++ b/bench/GroupwiseConvRequantizeBenchmark.cc @@ -453,7 +453,7 @@ static void performance_test() { } if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } // packedB.printPackedMatrix("bench B Packed"); diff --git a/bench/Im2ColFusedRequantizeBenchmark.cc b/bench/Im2ColFusedRequantizeBenchmark.cc index 62f7aed23d..ff25878790 100644 --- a/bench/Im2ColFusedRequantizeBenchmark.cc +++ b/bench/Im2ColFusedRequantizeBenchmark.cc @@ -289,7 +289,7 @@ static void performance_test() { } if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } // packedB.printPackedMatrix("bench B Packed"); diff --git a/bench/PackedFloatInOutBenchmark.cc b/bench/PackedFloatInOutBenchmark.cc index 4a96055024..9bad8b19d5 100644 --- a/bench/PackedFloatInOutBenchmark.cc +++ b/bench/PackedFloatInOutBenchmark.cc @@ -167,7 +167,7 @@ static void performance_test() { ttot *= 1e9; // convert to ns if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } cout << setw(6) << m << ", " << setw(6) << n << ", " << setw(6) << k @@ -269,7 +269,7 @@ static void performance_test() { } } if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } // printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B // unpacked"); diff --git a/bench/PackedRequantizeAcc16Benchmark.cc b/bench/PackedRequantizeAcc16Benchmark.cc index a32ae1b55c..359e1f0b30 100644 --- a/bench/PackedRequantizeAcc16Benchmark.cc +++ b/bench/PackedRequantizeAcc16Benchmark.cc @@ -160,7 +160,7 @@ static void performance_test() { ttot *= 1e9; // convert to ns if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } cout << setw(16) << runType << ", " << fixed << setw(5) << setprecision(1) << nops / ttot << '\n'; @@ -414,7 +414,7 @@ static void performance_test() { } if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } // printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B // unpacked"); diff --git a/bench/PackedRequantizeAcc32Benchmark.cc b/bench/PackedRequantizeAcc32Benchmark.cc index 3c1c419072..0a6c3531df 100644 --- a/bench/PackedRequantizeAcc32Benchmark.cc +++ b/bench/PackedRequantizeAcc32Benchmark.cc @@ -167,7 +167,7 @@ static void performance_test() { ttot *= 1e9; // convert to ns if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } cout << setw(6) << m << ", " << setw(6) << n << ", " << setw(6) << k @@ -300,7 +300,7 @@ static void performance_test() { } } if (flush) { - ((volatile char*)(llc.data()))[0] = llc.data()[0] + 1; + ((volatile char*)(llc.data()))[0] = llc[0] + 1; } // printMatrix(matrix_op_t::NoTranspose, Bint8.data(), k, n, n, "B // unpacked"); diff --git a/bench/RowwiseAdagradBenchmark.cc b/bench/RowwiseAdagradBenchmark.cc index d1af5b23fa..03829c9338 100644 --- a/bench/RowwiseAdagradBenchmark.cc +++ b/bench/RowwiseAdagradBenchmark.cc @@ -170,14 +170,14 @@ static void run_benchmark( for (size_t i = 0; i < w.size(); ++i) { assert(fabs(w[i] - w_ref[i]) < 1e-5); if (fabs(w[i] - w_ref[i]) >= 1e-5) { - fprintf(stderr, "%ld %f %f\n", i, w[i], w_ref[i]); + fprintf(stderr, "%zu %f %f\n", i, w[i], w_ref[i]); } } for (size_t i = 0; i < h.size(); ++i) { assert(fabs(h[i] - h_ref[i]) < 1e-5); if (fabs(h[i] - h_ref[i]) >= 1e-5) { - fprintf(stderr, "%ld %f %f\n", i, h[i], h_ref[i]); + fprintf(stderr, "%zu %f %f\n", i, h[i], h_ref[i]); } } diff --git a/bench/RowwiseAdagradFusedBenchmark.cc b/bench/RowwiseAdagradFusedBenchmark.cc index 31c6e2764b..1818397cac 100644 --- a/bench/RowwiseAdagradFusedBenchmark.cc +++ b/bench/RowwiseAdagradFusedBenchmark.cc @@ -8,10 +8,10 @@ #include #include -#include #include #include #include +#include #include #include #include diff --git a/bench/SparseAdagradBenchmark.cc b/bench/SparseAdagradBenchmark.cc index e0d02a01dd..ce7b40932c 100644 --- a/bench/SparseAdagradBenchmark.cc +++ b/bench/SparseAdagradBenchmark.cc @@ -177,14 +177,14 @@ static void run_benchmark( for (size_t i = 0; i < w.size(); ++i) { assert(fabs(w[i] - w_ref[i]) < 1e-5); if (fabs(w[i] - w_ref[i]) >= 1e-5) { - fprintf(stderr, "%ld %f %f\n", i, w[i], w_ref[i]); + fprintf(stderr, "%zu %f %f\n", i, w[i], w_ref[i]); } } for (size_t i = 0; i < h.size(); ++i) { assert(fabs(h[i] - h_ref[i]) < 1e-5); if (fabs(h[i] - h_ref[i]) >= 1e-5) { - fprintf(stderr, "%ld %f %f\n", i, h[i], h_ref[i]); + fprintf(stderr, "%zu %f %f\n", i, h[i], h_ref[i]); } } diff --git a/bench/SparseDenseMMInt8Benchmark.cc b/bench/SparseDenseMMInt8Benchmark.cc index 3d4c0ad5d8..b5fff6b848 100644 --- a/bench/SparseDenseMMInt8Benchmark.cc +++ b/bench/SparseDenseMMInt8Benchmark.cc @@ -18,7 +18,7 @@ using namespace std; using namespace fbgemm; -int main(int, char**) { +int main(int /*unused*/, char** /*unused*/) { vector> shapes = getSparseMatrixShapes(); // C is MxN -> CT is NxM diff --git a/include/fbgemm/ConvUtils.h b/include/fbgemm/ConvUtils.h index e14ed8c9ae..e46a163b84 100644 --- a/include/fbgemm/ConvUtils.h +++ b/include/fbgemm/ConvUtils.h @@ -124,11 +124,11 @@ struct conv_param_t { std::string toString() const { std::string dim_string[3] = {"T", "H", "W"}; - std::string out = ""; + std::string out; out += "MB:" + std::to_string(MB) + ", "; out += "IC:" + std::to_string(IC) + ", "; out += "OC:" + std::to_string(OC) + ", "; - if (SPATIAL_DIM <= 3) { + if constexpr (SPATIAL_DIM <= 3) { for (int d = 0; d < SPATIAL_DIM; ++d) { out += "I" + dim_string[3 - SPATIAL_DIM + d] + ":" + std::to_string(IN_DIM[d]) + ", "; @@ -139,7 +139,7 @@ struct conv_param_t { } } out += "G:" + std::to_string(G) + ", "; - if (SPATIAL_DIM <= 3) { + if constexpr (SPATIAL_DIM <= 3) { for (int d = 0; d < SPATIAL_DIM; ++d) { out += "K" + dim_string[3 - SPATIAL_DIM + d] + ":" + std::to_string(K[d]) + ", "; diff --git a/include/fbgemm/Fbgemm.h b/include/fbgemm/Fbgemm.h index c9c21f2b5c..dd373fab7c 100644 --- a/include/fbgemm/Fbgemm.h +++ b/include/fbgemm/Fbgemm.h @@ -247,7 +247,7 @@ class PackMatrix { /** * @brief Print the packed block. */ - void printPackedMatrix(std::string name) { + void printPackedMatrix(const std::string& name) { static_cast(this)->printPackedMatrix(name); } @@ -482,7 +482,7 @@ class FBGEMM_API PackBMatrix final */ void unpack(T* origin_buf, const BlockingFactors* params = nullptr); - ~PackBMatrix() {} + ~PackBMatrix() override = default; private: matrix_op_t trans_; @@ -752,7 +752,7 @@ class FBGEMM_API PackAWithIm2Col */ static int rowOffsetBufferSize(const BlockingFactors* params = nullptr); - ~PackAWithIm2Col() { + ~PackAWithIm2Col() override { if (rowOffsetAllocatedHere) { fbgemmAlignedFree(row_offset_); } @@ -842,7 +842,7 @@ class FBGEMM_API PackAWithRowOffset final */ static int rowOffsetBufferSize(const BlockingFactors* params = nullptr); - ~PackAWithRowOffset() { + ~PackAWithRowOffset() override { if (rowOffsetAllocatedHere) { fbgemmAlignedFree(row_offset_); } @@ -934,7 +934,7 @@ class FBGEMM_API PackAWithQuantRowOffset final */ static int rowOffsetBufferSize(const BlockingFactors* params = nullptr); - ~PackAWithQuantRowOffset() { + ~PackAWithQuantRowOffset() override { if (rowOffsetAllocatedHere) { fbgemmAlignedFree(row_offset_); } @@ -967,7 +967,7 @@ class FBGEMM_API DoNothing { public: using outType = outT; using inpType = inT; - DoNothing() {} + DoNothing() = default; template int f( outType* /* unused */, diff --git a/include/fbgemm/FbgemmConvert.h b/include/fbgemm/FbgemmConvert.h index bb279b2b89..055654a716 100644 --- a/include/fbgemm/FbgemmConvert.h +++ b/include/fbgemm/FbgemmConvert.h @@ -8,7 +8,6 @@ #pragma once -#include #include "fbgemm/Types.h" #include "fbgemm/Utils.h" diff --git a/include/fbgemm/FbgemmFP16.h b/include/fbgemm/FbgemmFP16.h index a8a7173645..598dd1aec2 100644 --- a/include/fbgemm/FbgemmFP16.h +++ b/include/fbgemm/FbgemmFP16.h @@ -13,10 +13,6 @@ #include #include -#include -#include -#include -#include #include "./FbgemmPackMatrixB.h" // @manual #include "./FloatConversion.h" // @manual diff --git a/include/fbgemm/FbgemmFPCommon.h b/include/fbgemm/FbgemmFPCommon.h index 6ac34a8cff..512282ea59 100644 --- a/include/fbgemm/FbgemmFPCommon.h +++ b/include/fbgemm/FbgemmFPCommon.h @@ -14,6 +14,8 @@ #include #include #include +#include +#include #if defined(FBGEMM_FP16_FALLBACK_TO_REF_KERNEL) || \ defined(FBGEMM_FP32_FALLBACK_TO_REF_KERNEL) @@ -161,7 +163,7 @@ void cblas_gemm_compute( #endif #endif GemmParams gp; - int i_begin, i_end; + int i_begin = 0, i_end = 0; i_begin = 0; i_end = m; for (auto m0 = i_begin; m0 < i_end; m0 += mb_max) { @@ -169,7 +171,7 @@ void cblas_gemm_compute( assert(mb < static_cast(partition.size())); for (auto k_ind = 0; k_ind < k; k_ind += Bp.blockRowSize()) { // set up proper accumulation to avoid "Nan" problem - float beta_; + float beta_ = NAN; if (k_ind == 0) { // accumulate of beta != 0.0 // do not!!! accumulate otherwise @@ -231,7 +233,7 @@ void cblas_gemm_compute( } #endif if ((n % Bp.blockColSize()) == 0) { - int64_t jb_begin, jb_end; + int64_t jb_begin = 0, jb_end = 0; fbgemmPartition1D( thread_id, num_threads, gp.b_block_cols, jb_begin, jb_end); gp.B += gp.k * Bp.blockColSize() * jb_begin; @@ -253,7 +255,7 @@ void cblas_gemm_compute( } else { int last_blk_col = nbcol * Bp.blockColSize(); if (nbcol) { - int64_t jb_begin, jb_end; + int64_t jb_begin = 0, jb_end = 0; fbgemmPartition1D( thread_id, num_threads, gp.b_block_cols, jb_begin, jb_end); gp.B += gp.k * Bp.blockColSize() * jb_begin; diff --git a/include/fbgemm/FbgemmI8Spmdm.h b/include/fbgemm/FbgemmI8Spmdm.h index df66fa7472..3d53d36473 100644 --- a/include/fbgemm/FbgemmI8Spmdm.h +++ b/include/fbgemm/FbgemmI8Spmdm.h @@ -124,12 +124,12 @@ class FBGEMM_API CompressedSparseColumn { std::vector ic_; // in channels // Cache IsHyperSparse to minimize its overhead. - mutable bool hyper_sparse_; + mutable bool hyper_sparse_{false}; // Whether we can reuse the cached hyper_sparse_ is determined by checking // if NumOfNonZeros() is same as old_nnz_ saved in previous invocation of // IsHyperSparse call. - mutable std::int32_t old_nnz_; + mutable std::int32_t old_nnz_{-1}; }; } // namespace fbgemm diff --git a/include/fbgemm/FbgemmPackMatrixB.h b/include/fbgemm/FbgemmPackMatrixB.h index 5ef9545b3b..c67a829ca3 100644 --- a/include/fbgemm/FbgemmPackMatrixB.h +++ b/include/fbgemm/FbgemmPackMatrixB.h @@ -11,11 +11,7 @@ #include #include -#include -#include #include -#include -#include #include "SimdUtils.h" // @manual #include "Types.h" // @manual @@ -277,13 +273,13 @@ class PackedGemmMatrixB { T* pmat() const { return pmat_; } - inline int blockRowSize() const { + int blockRowSize() const { return brow_; } - inline int blockColSize() const { + int blockColSize() const { return bcol_; } - inline int kernelNumColBlocks() const { + int kernelNumColBlocks() const { return kernel_ncol_blocks_; } diff --git a/include/fbgemm/OutputProcessing-inl.h b/include/fbgemm/OutputProcessing-inl.h index 2f6042d7c3..8013030190 100644 --- a/include/fbgemm/OutputProcessing-inl.h +++ b/include/fbgemm/OutputProcessing-inl.h @@ -1,3 +1,5 @@ +#include + /* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. @@ -17,7 +19,7 @@ inline int memCopy::f( int ld_out, int ld_in) const { static_assert( - std::is_same::value, + std::is_same_v, "input and output data type must be of same type"); // only copy if destination is not the same as source if (out + block.row_start * ld_out + block.col_start != inp) { @@ -74,7 +76,7 @@ ReQuantizeOutput::f( int ld_out, int ld_in) const { static_assert( - std::is_same::value, + std::is_same_v, "input data type must be of int32_t type"); int ncol_per_group = ncols_ / groups_; assert( @@ -82,14 +84,14 @@ ReQuantizeOutput::f( "ReQuantizeOutput should be called at most 1 group at a time."); int g = block.col_start / ncol_per_group; if constexpr ( - instSet == inst_set_t::anyarch || !std::is_same::value) { + instSet == inst_set_t::anyarch || !std::is_same_v) { for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { for (int j = block.col_start; j < block.col_start + block.col_size; ++j) { inT raw = inp[(i - block.row_start) * ld_in + (j - block.col_start)]; if (Aq_zero_point_) { raw -= Aq_zero_point_ * q_col_offsets_[j]; } - int Bq_zero_point_idx; + int Bq_zero_point_idx = 0; if constexpr (Q_GRAN == QuantizationGranularity::TENSOR) { Bq_zero_point_idx = 0; } else if constexpr (Q_GRAN == QuantizationGranularity::GROUP) { @@ -103,9 +105,9 @@ ReQuantizeOutput::f( raw -= q_row_offsets_[i - block.row_start] * Bq_zero_point_[Bq_zero_point_idx]; } - float raw_f; + float raw_f = NAN; if (bias_) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { raw_f = raw; raw_f += bias_[j] / act_times_w_scale_[Bq_zero_point_idx]; } else { @@ -203,10 +205,10 @@ inline int ReQuantizeForFloat::f( int ld_out, int ld_in) const { static_assert( - std::is_same::value, + std::is_same_v, "input data type is of not expected type"); static_assert( - std::is_same::value, + std::is_same_v, "output data type is of not expected type"); int ncol_per_group = ncols_ / groups_; assert( @@ -214,14 +216,14 @@ inline int ReQuantizeForFloat::f( "ReQuantizeOutput should be called at most 1 group at a time."); int g = block.col_start / ncol_per_group; if constexpr ( - instSet == inst_set_t::anyarch || !std::is_same::value) { + instSet == inst_set_t::anyarch || !std::is_same_v) { for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { for (int j = block.col_start; j < block.col_start + block.col_size; ++j) { inT raw = inp[(i - block.row_start) * ld_in + j - block.col_start]; if (Aq_zero_point_) { raw -= Aq_zero_point_ * q_col_offsets_[j]; } - int Bq_zero_point_idx; + int Bq_zero_point_idx = 0; if constexpr (Q_GRAN == QuantizationGranularity::TENSOR) { Bq_zero_point_idx = 0; } else if constexpr (Q_GRAN == QuantizationGranularity::GROUP) { diff --git a/include/fbgemm/PackingTraits-inl.h b/include/fbgemm/PackingTraits-inl.h index 30fdfe4d52..2029c631eb 100644 --- a/include/fbgemm/PackingTraits-inl.h +++ b/include/fbgemm/PackingTraits-inl.h @@ -57,7 +57,7 @@ struct PackingTraits< T, std::int32_t, inst_set_t::avx2, - typename std::enable_if::value>::type> { + std::enable_if_t::value>> { static constexpr int MR{12}; ///< Register block for M dimension. static constexpr int NR_MIN{8}; ///< Minimum register block for N dimension. ///< 8 because 8*ROW_INTERLEAVE int8 elements @@ -105,7 +105,7 @@ struct PackingTraits< T, std::int16_t, inst_set_t::avx2, - typename std::enable_if::value>::type> { + std::enable_if_t::value>> { static constexpr int MR{3}; ///< Register block for M dimension. static constexpr int NR_MIN{ 16}; ///< Minimum register block for N dimension. @@ -200,7 +200,7 @@ struct PackingTraits< T, std::int32_t, inst_set_t::avx512, - typename std::enable_if::value>::type> { + std::enable_if_t::value>> { static constexpr int MR{14}; ///< Register block for M dimension. static constexpr int NR_MIN{ 16}; ///< Minimum register block for N dimension. @@ -251,7 +251,7 @@ struct PackingTraits< T, std::int32_t, inst_set_t::avx512_ymm, - typename std::enable_if::value>::type> { + std::enable_if_t::value>> { static constexpr int MR{7}; ///< Register block for M dimension. static constexpr int NR_MIN{16}; ///< Minimum register block for N dimension. ///< 8 because 8*ROW_INTERLEAVE int8 elements @@ -300,7 +300,7 @@ struct PackingTraits< T, std::int16_t, inst_set_t::avx512, - typename std::enable_if::value>::type> { + std::enable_if_t::value>> { static constexpr int MR{6}; ///< Register block for M dimension static constexpr int NR_MIN{ 32}; ///< Minimum register block for N dimension; @@ -351,7 +351,7 @@ struct PackingTraits< T, std::int16_t, inst_set_t::avx512_ymm, - typename std::enable_if::value>::type> { + std::enable_if_t::value>> { static constexpr int MR{6}; ///< Register block for M dimension. static constexpr int NR_MIN{ 16}; ///< Minimum register block for N dimension. @@ -396,7 +396,7 @@ struct PackingTraits< template struct is_16or32bit { static constexpr bool value = - std::is_same::value || std::is_same::value; + std::is_same_v || std::is_same_v; }; /** @@ -414,8 +414,8 @@ struct PackingTraits< T, accT, inst_set_t::avx512_vnni, - typename std::enable_if< - is_8bit::value && is_16or32bit::value>::type> { + std::enable_if_t< + is_8bit::value && is_16or32bit::value>> { static constexpr int MR{8}; ///< Register block for M dimension. static constexpr int NR_MIN{ 16}; ///< Minimum register block for N dimension. @@ -469,8 +469,8 @@ struct PackingTraits< T, accT, inst_set_t::avx512_vnni_ymm, - typename std::enable_if< - is_8bit::value && is_16or32bit::value>::type> { + std::enable_if_t< + is_8bit::value && is_16or32bit::value>> { static constexpr int MR{4}; ///< Register block for M dimension. static constexpr int NR_MIN{ 16}; ///< Minimum register block for N dimension. diff --git a/include/fbgemm/Utils.h b/include/fbgemm/Utils.h index 8836792ece..85ce3e517e 100644 --- a/include/fbgemm/Utils.h +++ b/include/fbgemm/Utils.h @@ -125,12 +125,12 @@ FBGEMM_API void transpose_simd( /** * @brief Explicitly set instruction set to be used */ -FBGEMM_API void fbgemmForceIsa(inst_set_t); +FBGEMM_API void fbgemmForceIsa(inst_set_t /*isa*/); /** * @brief Enable AVX512-256 path for Intel(r) Xeon(r) D servers */ -FBGEMM_API void fbgemmEnableAvx512Ymm(bool); +FBGEMM_API void fbgemmEnableAvx512Ymm(bool /*flag*/); /** * @brief Are we running on a Xeon-D cpu? @@ -175,12 +175,12 @@ FBGEMM_API inst_set_t fbgemmInstructionSet(); /** * @brief Is ISA is wide vector ZMM */ -FBGEMM_API bool isZmm(inst_set_t); +FBGEMM_API bool isZmm(inst_set_t /*isa*/); /** * @brief Is ISA is wide vector ZMM */ -FBGEMM_API bool isYmm(inst_set_t); +FBGEMM_API bool isYmm(inst_set_t /*isa*/); /** * @brief Helper struct to enable autotuning of FBGEMM packing and kernels. @@ -212,7 +212,7 @@ struct FBGEMM_API thread_type_t { int n_thread_id; std::string toString() const { - std::string out = ""; + std::string out; out += "g num threads: " + std::to_string(g_num_threads) + ", "; out += "m num threads: " + std::to_string(m_num_threads) + ", "; out += "n num threads: " + std::to_string(n_num_threads) + ", "; diff --git a/include/fbgemm/UtilsAvx2.h b/include/fbgemm/UtilsAvx2.h index 761e4bba1b..bc365bde85 100644 --- a/include/fbgemm/UtilsAvx2.h +++ b/include/fbgemm/UtilsAvx2.h @@ -31,7 +31,7 @@ struct FBGEMM_API block_type_t { int col_size; std::string toString() const { - std::string out = ""; + std::string out; out += "row start:" + std::to_string(row_start) + ", "; out += "row size:" + std::to_string(row_size) + ", "; out += "col start:" + std::to_string(col_start) + ", "; diff --git a/src/EmbeddingSpMDM.cc b/src/EmbeddingSpMDM.cc index bebaac7be2..d10b251a90 100644 --- a/src/EmbeddingSpMDM.cc +++ b/src/EmbeddingSpMDM.cc @@ -13,11 +13,8 @@ #include // @manual #include -#include #include -#include #include -#include #include #include "./CodeCache.h" // @manual #include "./EmbeddingSpMDMAutovec.h" // @manual @@ -278,7 +275,7 @@ GenEmbeddingSpMDMLookup< if (!use_offsets) { filename += "_use_lengths"; } - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { filename += "_rowwise_sparse"; } filename += "_out_stride_" + std::to_string(output_stride); @@ -306,7 +303,7 @@ GenEmbeddingSpMDMLookup< x86::Gp out = a->gpz(reg_id); // 11 x86::Gp compressed_indices_table; - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { ++reg_id; compressed_indices_table = a->gpz(reg_id); // 12 } @@ -320,7 +317,7 @@ GenEmbeddingSpMDMLookup< asmjit::FuncDetail func; - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { func.init( asmjit::FuncSignatureT< bool, @@ -375,7 +372,7 @@ GenEmbeddingSpMDMLookup< : asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14)); asmjit::FuncArgsAssignment args(&func); - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { args.assignAll( output_size, index_size, @@ -603,7 +600,7 @@ GenEmbeddingSpMDMLookup< a->cmp(scratchReg1_, data_size); a->jae(error); - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { a->mov( scratchReg1_.r32(), x86::dword_ptr( @@ -645,7 +642,7 @@ GenEmbeddingSpMDMLookup< } a->bind(pref_dist_reset_end); - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { asmjit::Label rowwise_sparse_pref_corner_case_begin = a->newLabel(); asmjit::Label rowwise_sparse_pref_corner_case_end = a->newLabel(); @@ -677,7 +674,7 @@ GenEmbeddingSpMDMLookup< a->add(weights, static_cast(sizeof(float))); } - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { a->cmp(scratchReg1_.r32(), static_cast(-1)); a->je(LoopDataIndexBegin); } diff --git a/src/EmbeddingSpMDMAutovec.cc b/src/EmbeddingSpMDMAutovec.cc index 0da016ee6d..dfbb5dc99e 100644 --- a/src/EmbeddingSpMDMAutovec.cc +++ b/src/EmbeddingSpMDMAutovec.cc @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -341,7 +340,7 @@ static bool ALWAYS_INLINE EmbeddingSpMDMNBit_autovec( // We currently only support int4 to int4 for sequential TBE in this nbit // kernel. Note that assert() will be ignored in release mode, so we check // here to double check and also avoid "unused variable" warning - if (!(input_bit_rate == 4 && output_bit_rate == 4)) { + if (input_bit_rate != 4 || output_bit_rate != 4) { WARN_ONCE("no_bag is only supported for int4 to int4"); return false; } @@ -1110,10 +1109,10 @@ template ALWAYS_INLINE constexpr FixedParameter fixed(T value) { return FixedParameter{value}; } -static constexpr VariableParameter var = VariableParameter(); +constexpr VariableParameter var = VariableParameter(); template -ALWAYS_INLINE bool match(VariableParameter, T) { +ALWAYS_INLINE bool match(VariableParameter /*unused*/, T /*unused*/) { return true; } template @@ -1122,11 +1121,11 @@ ALWAYS_INLINE bool match(FixedParameter fixed_parameter, T value) { } template -ALWAYS_INLINE T specialize(VariableParameter, T value) { +ALWAYS_INLINE T specialize(VariableParameter /*unused*/, T value) { return value; } template -ALWAYS_INLINE T specialize(FixedParameter fixed_parameter, T) { +ALWAYS_INLINE T specialize(FixedParameter fixed_parameter, T /*unused*/) { return fixed_parameter.value; } } // namespace specialization_helper diff --git a/src/EmbeddingSpMDMAvx512.cc b/src/EmbeddingSpMDMAvx512.cc index 3820748bb9..3c0a74550b 100644 --- a/src/EmbeddingSpMDMAvx512.cc +++ b/src/EmbeddingSpMDMAvx512.cc @@ -271,7 +271,7 @@ static inline void compressed_indices_remap_avx512_helper( } for (int i = 0; i < UNROLL; ++i) { __m512i indices_v; - if (USE_MASK) { + if constexpr (USE_MASK) { indices_v = mask_load( zero_v, mask_rem_v[i], @@ -284,7 +284,7 @@ static inline void compressed_indices_remap_avx512_helper( // gather remapped indices from the mapping table __m512i remapped_indices_v; - if (USE_MASK) { + if constexpr (USE_MASK) { remapped_indices_v = mask_gather( zero_v, mask_rem_v[i], indices_v, compressed_indices_mapping); // mov -1 to not used places in the vector @@ -297,8 +297,8 @@ static inline void compressed_indices_remap_avx512_helper( } typename reg_t::w_reg_t weights_v; - if (HAS_WEIGHTS) { - if (USE_MASK) { + if constexpr (HAS_WEIGHTS) { + if constexpr (USE_MASK) { weights_v = mask_load_weights( zero_v, mask_rem_v[i], @@ -313,7 +313,7 @@ static inline void compressed_indices_remap_avx512_helper( // Now remove -1 from the remapped indices auto mask_indices_v = gen_mask(remapped_indices_v, zero_v); - if (USE_MASK) { + if constexpr (USE_MASK) { auto out_indices_v = compress(zero_v, mask_indices_v, remapped_indices_v); @@ -328,8 +328,8 @@ static inline void compressed_indices_remap_avx512_helper( remapped_indices_v); } - if (HAS_WEIGHTS) { - if (USE_MASK) { + if constexpr (HAS_WEIGHTS) { + if constexpr (USE_MASK) { mask_compress_and_store_weights( reinterpret_cast( out_weights + offsets[i] + count_indices[i]), @@ -352,7 +352,7 @@ static inline void compressed_indices_remap_avx512_helper( template void compressed_indices_remap_avx512( - std::int32_t offsets_len, + std::int32_t offsets_numel, const IndexType* indices, const int32_t* compressed_indices_mapping, const IndexType* offsets, @@ -366,7 +366,7 @@ void compressed_indices_remap_avx512( constexpr int UNROLL = 8; constexpr int VLEN = get_vlen(); int k = 1; - for (; k < (offsets_len - 1) / UNROLL * UNROLL; k += UNROLL) { + for (; k < (offsets_numel - 1) / UNROLL * UNROLL; k += UNROLL) { int32_t len[UNROLL]; int32_t rem[UNROLL]; for (int l = 0; l < UNROLL; ++l) { @@ -448,9 +448,9 @@ void compressed_indices_remap_avx512( } } - // work on remaining offsets_len serially + // work on remaining offsets_numel serially constexpr int UNROLL_REM = 1; - for (; k < offsets_len; ++k) { + for (; k < offsets_numel; ++k) { int32_t len[UNROLL_REM]; int32_t rem[UNROLL_REM] = {0}; for (int l = 0; l < UNROLL_REM; ++l) { @@ -506,13 +506,13 @@ void compressed_indices_remap_avx512( // Results are stored at input offsets in output variables // copy results to right output locations - for (int i = 1; i < offsets_len; ++i) { + for (int i = 1; i < offsets_numel; ++i) { int out_len = out_offsets[i] - out_offsets[i - 1]; mymemcpy( reinterpret_cast(out_indices + offsets[i - 1]), reinterpret_cast(out_indices + out_offsets[i - 1]), out_len * sizeof(IndexType)); - if (HAS_WEIGHTS) { + if constexpr (HAS_WEIGHTS) { mymemcpy( reinterpret_cast(out_weights + offsets[i - 1]), reinterpret_cast(out_weights + out_offsets[i - 1]), diff --git a/src/EmbeddingSpMDMNBit.cc b/src/EmbeddingSpMDMNBit.cc index 2ffe733c7c..53a5834153 100644 --- a/src/EmbeddingSpMDMNBit.cc +++ b/src/EmbeddingSpMDMNBit.cc @@ -13,11 +13,8 @@ #include // @manual #include #include -#include #include -#include #include -#include #include #include "./CodeCache.h" // @manual #include "./EmbeddingSpMDMAutovec.h" // @manual @@ -244,7 +241,7 @@ GenEmbeddingSpMDMNBitLookup< if (!use_offsets) { filename += "_use_lengths"; } - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { filename += "_rowwise_sparse"; } if (!scale_bias_last) { @@ -271,7 +268,7 @@ GenEmbeddingSpMDMNBitLookup< x86::Gp out = a->gpz(reg_id); // 11 x86::Gp compressed_indices_table; - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { ++reg_id; compressed_indices_table = a->gpz(reg_id); // 12 } @@ -291,7 +288,7 @@ GenEmbeddingSpMDMNBitLookup< asmjit::FuncDetail func; - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { func.init( asmjit::FuncSignatureT< bool, @@ -339,7 +336,7 @@ GenEmbeddingSpMDMNBitLookup< : asmjit::Support::bitMask(8, 9, 10, 11, 12, 13, 14)); asmjit::FuncArgsAssignment args(&func); - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { args.assignAll( output_size, index_size, @@ -627,7 +624,7 @@ GenEmbeddingSpMDMNBitLookup< a->cmp(scratchReg1_, data_size); a->jae(error); - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { a->mov( scratchReg1_.r32(), x86::dword_ptr( @@ -669,7 +666,7 @@ GenEmbeddingSpMDMNBitLookup< } a->bind(pref_dist_reset_end); - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { asmjit::Label rowwise_sparse_pref_corner_case_begin = a->newLabel(); asmjit::Label rowwise_sparse_pref_corner_case_end = a->newLabel(); @@ -702,7 +699,7 @@ GenEmbeddingSpMDMNBitLookup< a->add(weights, static_cast(sizeof(float))); } - if (ROWWISE_SPARSE) { + if constexpr (ROWWISE_SPARSE) { a->cmp(scratchReg1_.r32(), static_cast(-1)); a->je(LoopDataIndexBegin); } diff --git a/src/EmbeddingStatsTracker.cc b/src/EmbeddingStatsTracker.cc index 184ce536d9..5ae5b3c4dc 100644 --- a/src/EmbeddingStatsTracker.cc +++ b/src/EmbeddingStatsTracker.cc @@ -21,8 +21,8 @@ EmbeddingStatsTracker& EmbeddingStatsTracker::getInstance() { void EmbeddingStatsTracker::recordPattern( int64_t rows, int64_t dims, - DataType input_type, - DataType output_type, + DataType input_data_type, + DataType output_data_type, int64_t batch_size, int64_t bag_size) { if (!is_stats_enabled() || bag_size == 0) { @@ -32,7 +32,7 @@ void EmbeddingStatsTracker::recordPattern( // Create the entry and ensure the pattern exists AccessPatternEntry key( - rows, dims, batch_size, bag_size, input_type, output_type); + rows, dims, batch_size, bag_size, input_data_type, output_data_type); auto result = tables_.find(key); if (result == tables_.end()) { tables_[key] = 1; diff --git a/src/FbgemmFP16.cc b/src/FbgemmFP16.cc index 90c9f28fe6..036066607c 100644 --- a/src/FbgemmFP16.cc +++ b/src/FbgemmFP16.cc @@ -125,7 +125,7 @@ constexpr kernel_array_t kernel_fp16_avx512 = { } // namespace template <> -const isa_descriptor& getIsaHandlers(inst_set_t isa, float16) { +const isa_descriptor& getIsaHandlers(inst_set_t isa, float16 /*unused*/) { static isa_descriptor avx2_descriptor = std::make_tuple(kernel_fp16_avx2, partition_avx2); static isa_descriptor avx512_descriptor = diff --git a/src/FbgemmFloat16Convert.cc b/src/FbgemmFloat16Convert.cc index e67d34e103..c463dc2393 100644 --- a/src/FbgemmFloat16Convert.cc +++ b/src/FbgemmFloat16Convert.cc @@ -74,8 +74,8 @@ void RoundToFloat16( bool clamp, bool clamp_denorms) { std::vector data_fp16(size); - FloatToFloat16_simd(input, &(data_fp16[0]), size, /*do_clip=*/clamp); - Float16ToFloat_simd(&(data_fp16[0]), output, size); + FloatToFloat16_simd(input, data_fp16.data(), size, /*do_clip=*/clamp); + Float16ToFloat_simd(data_fp16.data(), output, size); if (clamp_denorms) { // FloatToFloat16_simd always preserve fp16 denorm, so we need to manually // clamp. diff --git a/src/FbgemmI64.cc b/src/FbgemmI64.cc index a0f58c5aff..31cc8d0dca 100644 --- a/src/FbgemmI64.cc +++ b/src/FbgemmI64.cc @@ -158,8 +158,7 @@ CodeGenBase::getOrCreate( } #endif - const int maxMRegs = mRegBlockSize; - (void)maxMRegs; // Suppress unused variable warning + const int maxMRegs [[maybe_unused]] = mRegBlockSize; const int maxNRegs = nRegBlockSize / vectorLen; assert( maxMRegs * maxNRegs <= 30 && diff --git a/src/FbgemmI8DepthwiseAvx2-inl.h b/src/FbgemmI8DepthwiseAvx2-inl.h index 6b4f7542a1..ab748f7642 100644 --- a/src/FbgemmI8DepthwiseAvx2-inl.h +++ b/src/FbgemmI8DepthwiseAvx2-inl.h @@ -230,7 +230,7 @@ static ALWAYS_INLINE void requantize_( // convert to float __m256 xf_v, yf_v, zf_v, wf_v; - if (HAS_BIAS) { // static if + if constexpr (HAS_BIAS) { // static if if constexpr (std::is_same_v) { __m256 x_bias_v, y_bias_v, z_bias_v, w_bias_v; if constexpr ( @@ -441,7 +441,7 @@ static ALWAYS_INLINE void requantize_( // Convert to float __m256 xf_v; - if (HAS_BIAS) { // static if + if constexpr (HAS_BIAS) { // static if if constexpr (std::is_same_v) { __m256 x_bias_v; if constexpr ( diff --git a/src/FbgemmI8Spmdm.cc b/src/FbgemmI8Spmdm.cc index 33bb7e30ca..6e0c2c47b5 100644 --- a/src/FbgemmI8Spmdm.cc +++ b/src/FbgemmI8Spmdm.cc @@ -32,9 +32,8 @@ namespace fbgemm { CompressedSparseColumn::CompressedSparseColumn(int num_of_rows, int num_of_cols) : num_rows_(num_of_rows), - colptr_(num_of_cols + 1), - hyper_sparse_(false), - old_nnz_(-1) {} + colptr_(num_of_cols + 1) + {} double CompressedSparseColumn::Density() const { return static_cast(NumOfNonZeros()) / (NumOfRows() * NumOfCols()); diff --git a/src/FbgemmSparseDenseAvx512.cc b/src/FbgemmSparseDenseAvx512.cc index f7009283f8..adf4773fab 100644 --- a/src/FbgemmSparseDenseAvx512.cc +++ b/src/FbgemmSparseDenseAvx512.cc @@ -33,7 +33,7 @@ void SparseDenseMMAvx512( // size of col_idx is equal to nnzs constexpr int VLEN = 16; int j = 0; - const int effective_N = ((int)((N + VLEN - 1) / (2 * VLEN))) * (2 * VLEN); + const int effective_N = (((N + VLEN - 1) / (2 * VLEN))) * (2 * VLEN); for (; j < effective_N; j += 2 * VLEN) { // r1 is for j:j+VLEN // r2 is for j+VLEN:j+2*VLEN diff --git a/src/FbgemmSparseDenseInt8Avx2.cc b/src/FbgemmSparseDenseInt8Avx2.cc index 26edf16f0b..86016e8ed6 100644 --- a/src/FbgemmSparseDenseInt8Avx2.cc +++ b/src/FbgemmSparseDenseInt8Avx2.cc @@ -69,8 +69,7 @@ void SparseDenseInt8MMAvx2( // Calcualtes accum ? C += A * B : C = A * B constexpr int VLEN_INT8 = 32; constexpr int VLEN_INT32 = 8; - constexpr int rowBlockSize = BCSRMatrix<>::RB; - (void)rowBlockSize; // Suppress unused variable warning + constexpr int rowBlockSize [[maybe_unused]] = BCSRMatrix<>::RB; constexpr int colBlockSize = BCSRMatrix<>::CB; constexpr int colTileSize = BCSRMatrix<>::COLTILE; diff --git a/src/FbgemmSparseDenseInt8Avx512.cc b/src/FbgemmSparseDenseInt8Avx512.cc index 69974444da..827cba1270 100644 --- a/src/FbgemmSparseDenseInt8Avx512.cc +++ b/src/FbgemmSparseDenseInt8Avx512.cc @@ -38,7 +38,7 @@ requantizeForMM(__m512i x[], int rowIdx, trRequantizationParams_t& rParams) { 0x0C, 0x08, 0x04, 0x00); // clang-format on int32_t row_offset = 0; - if (!ACT_ZP_0) { + if constexpr (!ACT_ZP_0) { row_offset = rParams.act_zero_point * rParams.weight_row_offsets[rowIdx]; } __m512i row_offset_v = _mm512_set1_epi32(row_offset); @@ -49,7 +49,7 @@ requantizeForMM(__m512i x[], int rowIdx, trRequantizationParams_t& rParams) { } __m512 bias_v; - if (HAS_BIAS) { + if constexpr (HAS_BIAS) { float bias = rParams.bias[rowIdx] / rParams.act_times_w_scale[weight_zeropoint_idx]; bias_v = _mm512_set1_ps(bias); @@ -63,7 +63,7 @@ requantizeForMM(__m512i x[], int rowIdx, trRequantizationParams_t& rParams) { act_times_w_div_c_v = _mm512_set1_ps( rParams.act_times_w_scale[weight_zeropoint_idx] / rParams.C_scale); } - if (!ACT_ZP_0) { + if constexpr (!ACT_ZP_0) { x[0] = _mm512_sub_epi32(x[0], row_offset_v); x[1] = _mm512_sub_epi32(x[1], row_offset_v); x[2] = _mm512_sub_epi32(x[2], row_offset_v); @@ -71,7 +71,7 @@ requantizeForMM(__m512i x[], int rowIdx, trRequantizationParams_t& rParams) { } __m512 xf_v, yf_v, zf_v, wf_v; - if (HAS_BIAS) { + if constexpr (HAS_BIAS) { xf_v = _mm512_add_ps(_mm512_cvtepi32_ps(x[0]), bias_v); yf_v = _mm512_add_ps(_mm512_cvtepi32_ps(x[1]), bias_v); zf_v = _mm512_add_ps(_mm512_cvtepi32_ps(x[2]), bias_v); @@ -101,7 +101,7 @@ requantizeForMM(__m512i x[], int rowIdx, trRequantizationParams_t& rParams) { _mm512_packs_epi32(z_rounded_v, w_rounded_v), C_zero_point_epi16_v); // _mm512_packus_epi16 takes care of saturating to uint8 range __m512i xyzw_clamped_v = _mm512_packus_epi16(xy_packed_v, zw_packed_v); - if (FUSE_RELU) { + if constexpr (FUSE_RELU) { xyzw_clamped_v = _mm512_max_epu8(C_zero_point_epi8_v, xyzw_clamped_v); } @@ -225,7 +225,7 @@ static inline void loadBRows( __mmask64 mask_int8_v = 0) { int idx = 0; for (; idx < ROWSIZE; ++idx) { - if (MASKLOAD) { + if constexpr (MASKLOAD) { br_v[idx] = _mm512_maskz_loadu_epi8(mask_int8_v, B_start + idx * ld); } else { br_v[idx] = _mm512_loadu_si512(B_start + idx * ld); diff --git a/src/FbgemmSparseDenseVectorInt8Avx512.cc b/src/FbgemmSparseDenseVectorInt8Avx512.cc index 8c3b372081..d141d62e61 100644 --- a/src/FbgemmSparseDenseVectorInt8Avx512.cc +++ b/src/FbgemmSparseDenseVectorInt8Avx512.cc @@ -50,7 +50,7 @@ static inline void requantizeForMV( int i = 0; for (; i < len / VLEN_INT32 * VLEN_INT32; i += VLEN_INT32) { __m512i x_v = _mm512_loadu_si512(src + i); - if (!ACT_ZP_0) { + if constexpr (!ACT_ZP_0) { __m512i weight_row_offset_v = _mm512_loadu_si512(rParams.weight_row_offsets + i); __m512i act_zero_point_v = _mm512_set1_epi32(rParams.act_zero_point); @@ -67,7 +67,7 @@ static inline void requantizeForMV( __m512 act_times_w_div_c_v = _mm512_div_ps(act_times_w_scale_v, c_scale_v); __m512 xf_v; - if (HAS_BIAS) { + if constexpr (HAS_BIAS) { __m512 bias_v = _mm512_loadu_ps(rParams.bias + i); bias_v = _mm512_div_ps(bias_v, act_times_w_scale_v); xf_v = _mm512_add_ps(_mm512_cvtepi32_ps(x_v), bias_v); @@ -81,7 +81,7 @@ static inline void requantizeForMV( __m512i x_clamped_v = _mm512_packs_epi32(x_added_v, _mm512_setzero_si512()); x_clamped_v = _mm512_packus_epi16(x_clamped_v, _mm512_setzero_si512()); - if (FUSE_RELU) { + if constexpr (FUSE_RELU) { x_clamped_v = _mm512_max_epu8(C_zero_point_epi8_v, x_clamped_v); } x_clamped_v = _mm512_permutexvar_epi32(permute_mask_v, x_clamped_v); @@ -96,7 +96,7 @@ static inline void requantizeForMV( __mmask16 mask_int32_v = (1ULL << rem_int32) - 1; __m512i x_v = _mm512_maskz_loadu_epi32(mask_int32_v, src + i); - if (!ACT_ZP_0) { + if constexpr (!ACT_ZP_0) { __m512i weight_row_offset_v = _mm512_maskz_loadu_epi32( mask_int32_v, rParams.weight_row_offsets + i); __m512i act_zero_point_v = _mm512_set1_epi32(rParams.act_zero_point); @@ -114,7 +114,7 @@ static inline void requantizeForMV( __m512 act_times_w_div_c_v = _mm512_div_ps(act_times_w_scale_v, c_scale_v); __m512 xf_v; - if (HAS_BIAS) { + if constexpr (HAS_BIAS) { __m512 bias_v = _mm512_maskz_loadu_ps(mask_int32_v, rParams.bias + i); bias_v = _mm512_div_ps(bias_v, act_times_w_scale_v); xf_v = _mm512_add_ps(_mm512_cvtepi32_ps(x_v), bias_v); @@ -128,7 +128,7 @@ static inline void requantizeForMV( __m512i x_clamped_v = _mm512_packs_epi32(x_added_v, _mm512_setzero_si512()); x_clamped_v = _mm512_packus_epi16(x_clamped_v, _mm512_setzero_si512()); - if (FUSE_RELU) { + if constexpr (FUSE_RELU) { x_clamped_v = _mm512_max_epu8(C_zero_point_epi8_v, x_clamped_v); } x_clamped_v = _mm512_permutexvar_epi32(permute_mask_v, x_clamped_v); @@ -149,8 +149,7 @@ void SparseDenseInt8MVAvx512( trRequantizationParams_t& rParams, bool accum, int thread_id, - int num_threads) { - (void)num_threads; // Suppress unused variable warning + int num_threads [[maybe_unused]]) { // Calcualtes accum ? C += A * B : C = A * B constexpr int VLEN_INT32 = 16; diff --git a/src/GenerateKernelDirectConvU8S8S32ACC32.cc b/src/GenerateKernelDirectConvU8S8S32ACC32.cc index 8ba0815531..e565d4156b 100644 --- a/src/GenerateKernelDirectConvU8S8S32ACC32.cc +++ b/src/GenerateKernelDirectConvU8S8S32ACC32.cc @@ -207,8 +207,7 @@ DirectConvCodeGenBase::getOrCreateDirectConv( } #endif - const int maxMRegs = mRegBlockSize; - (void)maxMRegs; // Suppress unused variable warning + const int maxMRegs [[maybe_unused]] = mRegBlockSize; const int maxNRegs = nRegBlockSize * row_interleave / vectorLen; assert( maxMRegs * maxNRegs <= numRegs - 4 && @@ -634,8 +633,7 @@ DirectConvCodeGenBase:: } #endif - const int maxMRegs = mRegBlockSize; - (void)maxMRegs; // Suppress unused variable warning + const int maxMRegs [[maybe_unused]] = mRegBlockSize; const int maxNRegs = nRegBlockSize * row_interleave / vectorLen; assert( maxMRegs * maxNRegs <= numRegs - 4 && diff --git a/src/GenerateKernelU8S8S32ACC16.cc b/src/GenerateKernelU8S8S32ACC16.cc index ca8b750672..e9c5139f9a 100644 --- a/src/GenerateKernelU8S8S32ACC16.cc +++ b/src/GenerateKernelU8S8S32ACC16.cc @@ -103,15 +103,14 @@ CodeGenBase::getOrCreate( bool accum, int32_t mc, int32_t nc, - int32_t kc) { - (void)kc; // Suppress unused variable warning + int32_t kc [[maybe_unused]]) { constexpr int vectorLen = simd_info::WIDTH_BYTES; int kBlock = 0; int nBlock = 0; int mRegBlockSize = 0; int nRegBlockSize = 0; - int nRegBlockSizeMin = 0; + int nRegBlockSizeMin [[maybe_unused]] = 0; int row_interleave = 0; if (blocking_params) { @@ -131,7 +130,6 @@ CodeGenBase::getOrCreate( row_interleave = PackingTraits::ROW_INTERLEAVE; } - (void)nRegBlockSizeMin; // Suppress unused variable warning auto kernelSig = std::make_tuple( accum, mc, nc, nBlock, kBlock, mRegBlockSize, nRegBlockSize); @@ -158,10 +156,8 @@ CodeGenBase::getOrCreate( assert( kc % row_interleave == 0 && "kc must be a multiple of row_interleave"); assert(nc % nRegBlockSizeMin == 0 && "nc must be a multiple of NR_MIN"); - const int maxMRegs = mRegBlockSize; - const int maxNRegs = nRegBlockSize * row_interleave / vectorLen; - (void)maxMRegs; // Suppress unused variable warning - (void)maxNRegs; // Suppress unused variable warning + const int maxMRegs [[maybe_unused]] = mRegBlockSize; + const int maxNRegs [[maybe_unused]] = nRegBlockSize * row_interleave / vectorLen; assert( maxMRegs * maxNRegs <= 13 && "MR*(NR*ROW_INTERLEAVE*8/256" diff --git a/src/GenerateKernelU8S8S32ACC16Avx512.cc b/src/GenerateKernelU8S8S32ACC16Avx512.cc index bfebc152be..50b1b12a7d 100644 --- a/src/GenerateKernelU8S8S32ACC16Avx512.cc +++ b/src/GenerateKernelU8S8S32ACC16Avx512.cc @@ -78,7 +78,7 @@ CodeGenBase::getOrCreate( int nBlock = 0; int mRegBlockSize = 0; int nRegBlockSize = 0; - int nRegBlockSizeMin = 0; + int nRegBlockSizeMin [[maybe_unused]] = 0; int row_interleave = 0; if (blocking_params) { @@ -96,7 +96,6 @@ CodeGenBase::getOrCreate( nRegBlockSizeMin = PackingTraits::NR_MIN; row_interleave = PackingTraits::ROW_INTERLEAVE; } - (void)nRegBlockSizeMin; // Suppress unused variable warning auto kernelSig = std::make_tuple( accum, mc, nc, nBlock, kBlock, mRegBlockSize, nRegBlockSize); @@ -123,8 +122,7 @@ CodeGenBase::getOrCreate( assert( kc % row_interleave == 0 && "kc must be a multiple of row_interleave"); assert(nc % nRegBlockSizeMin == 0 && "nc must be a multiple of NR_MIN"); - const int maxMRegs = mRegBlockSize; - (void)maxMRegs; // Suppress unused variable warning + const int maxMRegs [[maybe_unused]] = mRegBlockSize; const int maxNRegs = nRegBlockSize * row_interleave / vectorLen; assert( (maxMRegs + 1) * maxNRegs <= 29 && diff --git a/src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc index b81c21cb7b..9cee00bdf6 100644 --- a/src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc +++ b/src/GenerateKernelU8S8S32ACC16Avx512VNNI.cc @@ -10,7 +10,6 @@ namespace fbgemm { -namespace x86 = asmjit::x86; /** * Get or Create the AVX512 instructions for 16-bit Accumulation macro-kernel. diff --git a/src/GenerateKernelU8S8S32ACC32.cc b/src/GenerateKernelU8S8S32ACC32.cc index d71dbe7f33..a0f22ea8b5 100644 --- a/src/GenerateKernelU8S8S32ACC32.cc +++ b/src/GenerateKernelU8S8S32ACC32.cc @@ -108,8 +108,7 @@ CodeGenBase::getOrCreate( bool accum, int32_t mc, int32_t nc, - int32_t kc) { - (void)kc; // Suppress unused variable warning + int32_t kc [[maybe_unused]]) { using VecRegT = typename simd_info::vec_reg_t; constexpr int numRegs = simd_info::NUM_VEC_REGS; static constexpr int vectorLen = simd_info::WIDTH_BYTES; @@ -118,7 +117,7 @@ CodeGenBase::getOrCreate( int nBlock = 0; int mRegBlockSize = 0; int nRegBlockSize = 0; - int nRegBlockSizeMin = 0; + int nRegBlockSizeMin [[maybe_unused]] = 0; int row_interleave = 0; if (blocking_params) { @@ -136,7 +135,6 @@ CodeGenBase::getOrCreate( nRegBlockSizeMin = PackingTraits::NR_MIN; row_interleave = PackingTraits::ROW_INTERLEAVE; } - (void)nRegBlockSizeMin; // Suppress unused variable warning auto kernelSig = std::make_tuple( accum, mc, nc, nBlock, kBlock, mRegBlockSize, nRegBlockSize); @@ -162,8 +160,7 @@ CodeGenBase::getOrCreate( assert( kc % row_interleave == 0 && "kc must be a multiple of row_interleave"); assert(nc % nRegBlockSizeMin == 0 && "nc must be a multiple of NR_MIN"); - const int maxMRegs = mRegBlockSize; - (void)maxMRegs; // Suppress unused variable warning + const int maxMRegs [[maybe_unused]] = mRegBlockSize; const int maxNRegs = nRegBlockSize * row_interleave / vectorLen; assert( maxMRegs * maxNRegs <= numRegs - 4 && diff --git a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc index d4b4981f71..fb35171fe6 100644 --- a/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc +++ b/src/GenerateKernelU8S8S32ACC32Avx512VNNI.cc @@ -59,8 +59,7 @@ CodeGenBase::getOrCreate( bool accum, int32_t mc, int32_t nc, - int32_t kc) { - (void)kc; // Suppress unused variable warning + int32_t kc [[maybe_unused]]) { static constexpr int vectorLen = simd_info::WIDTH_BYTES; static constexpr inst_set_t storeInstType = simd_info::WIDTH_BITS == 512 ? inst_set_t::avx512 @@ -70,7 +69,7 @@ CodeGenBase::getOrCreate( int nBlock = 0; int mRegBlockSize = 0; int nRegBlockSize = 0; - int nRegBlockSizeMin = 0; + int nRegBlockSizeMin [[maybe_unused]] = 0; int row_interleave = 0; if (blocking_params) { @@ -88,7 +87,6 @@ CodeGenBase::getOrCreate( nRegBlockSizeMin = PackingTraits::NR_MIN; row_interleave = PackingTraits::ROW_INTERLEAVE; } - (void)nRegBlockSizeMin; // Suppress unused variable warning auto kernelSig = std::make_tuple( accum, mc, nc, nBlock, kBlock, mRegBlockSize, nRegBlockSize); @@ -115,9 +113,8 @@ CodeGenBase::getOrCreate( assert( kc % row_interleave == 0 && "kc must be a multiple of row_interleave"); assert(nc % nRegBlockSizeMin == 0 && "nc must be a multiple of NR_MIN"); - const int maxMRegs = mRegBlockSize; + const int maxMRegs [[maybe_unused]] = mRegBlockSize; const int maxNRegs = nRegBlockSize * row_interleave / vectorLen; - (void)maxMRegs; // Suppress unused variable warning assert( maxMRegs * maxNRegs <= 30 && "MR*(NR*ROW_INTERLEAVE*8/512) \ diff --git a/src/GroupwiseConv.cc b/src/GroupwiseConv.cc index 5b90bf5612..b973161620 100644 --- a/src/GroupwiseConv.cc +++ b/src/GroupwiseConv.cc @@ -889,7 +889,7 @@ static void dispatchOutputProcessing( int ld_in, int groups, int C_per_G, - true_type) { + true_type /*unused*/) { constexpr QuantizationGranularity Q_GRAN = processOutputType::QGRANType; constexpr int FUSE_RELU = processOutputType::RELU_FUSED; bool b_symmetric = (Q_GRAN == QuantizationGranularity::TENSOR && diff --git a/src/PackAMatrix.cc b/src/PackAMatrix.cc index e4b09f56d9..dfc3e588d6 100644 --- a/src/PackAMatrix.cc +++ b/src/PackAMatrix.cc @@ -170,18 +170,18 @@ void PackAMatrix::pack(const block_type_t& block) { } template -int32_t PackAMatrix::addr(int32_t r, int32_t c) const { - int32_t block_row_id = r / BaseType::blockRowSize(); +int32_t PackAMatrix::addr(int32_t i, int32_t j) const { + int32_t block_row_id = i / BaseType::blockRowSize(); int32_t brow_offset = (block_row_id * BaseType::blockCols()) * (BaseType::blockRowSize() * BaseType::blockColSize()); - int32_t block_col_id = c / BaseType::blockColSize(); + int32_t block_col_id = j / BaseType::blockColSize(); int32_t bcol_offset = block_col_id * BaseType::blockRowSize() * BaseType::blockColSize(); int32_t block_offset = brow_offset + bcol_offset; int32_t inblock_offset = - (r % BaseType::blockRowSize()) * BaseType::blockColSize() + - (c % BaseType::blockColSize()); + (i % BaseType::blockRowSize()) * BaseType::blockColSize() + + (j % BaseType::blockColSize()); int32_t index = block_offset + inblock_offset; diff --git a/src/PackAWithQuantRowOffset.cc b/src/PackAWithQuantRowOffset.cc index 110b795bc0..6b3049f0a9 100644 --- a/src/PackAWithQuantRowOffset.cc +++ b/src/PackAWithQuantRowOffset.cc @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -190,18 +189,18 @@ void PackAWithQuantRowOffset::pack(const block_type_t& block) { } template -int32_t PackAWithQuantRowOffset::addr(int32_t r, int32_t c) const { - int32_t block_row_id = r / BaseType::blockRowSize(); +int32_t PackAWithQuantRowOffset::addr(int32_t i, int32_t j) const { + int32_t block_row_id = i / BaseType::blockRowSize(); int32_t brow_offset = (block_row_id * BaseType::blockCols()) * (BaseType::blockRowSize() * BaseType::blockColSize()); - int32_t block_col_id = c / BaseType::blockColSize(); + int32_t block_col_id = j / BaseType::blockColSize(); int32_t bcol_offset = block_col_id * BaseType::blockRowSize() * BaseType::blockColSize(); int32_t block_offset = brow_offset + bcol_offset; int32_t inblock_offset = - (r % BaseType::blockRowSize()) * BaseType::blockColSize() + - (c % BaseType::blockColSize()); + (i % BaseType::blockRowSize()) * BaseType::blockColSize() + + (j % BaseType::blockColSize()); int32_t index = block_offset + inblock_offset; diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc index e0d03e4fc8..f19254f2ec 100644 --- a/src/PackAWithRowOffset.cc +++ b/src/PackAWithRowOffset.cc @@ -169,18 +169,18 @@ void PackAWithRowOffset::pack(const block_type_t& block) { } template -int32_t PackAWithRowOffset::addr(int32_t r, int32_t c) const { - int32_t block_row_id = r / BaseType::blockRowSize(); +int32_t PackAWithRowOffset::addr(int32_t i, int32_t j) const { + int32_t block_row_id = i / BaseType::blockRowSize(); int32_t brow_offset = (block_row_id * BaseType::blockCols()) * (BaseType::blockRowSize() * BaseType::blockColSize()); - int32_t block_col_id = c / BaseType::blockColSize(); + int32_t block_col_id = j / BaseType::blockColSize(); int32_t bcol_offset = block_col_id * BaseType::blockRowSize() * BaseType::blockColSize(); int32_t block_offset = brow_offset + bcol_offset; int32_t inblock_offset = - (r % BaseType::blockRowSize()) * BaseType::blockColSize() + - (c % BaseType::blockColSize()); + (i % BaseType::blockRowSize()) * BaseType::blockColSize() + + (j % BaseType::blockColSize()); int32_t index = block_offset + inblock_offset; diff --git a/src/PackBMatrix.cc b/src/PackBMatrix.cc index 79a2ec884a..b7d23fb1c6 100644 --- a/src/PackBMatrix.cc +++ b/src/PackBMatrix.cc @@ -368,18 +368,18 @@ void PackBMatrix::unpack( } template -int32_t PackBMatrix::addr(int32_t r, int32_t c) const { - int32_t block_row_id = r / BaseType::blockRowSize(); +int32_t PackBMatrix::addr(int32_t i, int32_t j) const { + int32_t block_row_id = i / BaseType::blockRowSize(); int32_t brow_offset = (block_row_id * BaseType::blockCols()) * (BaseType::blockRowSize() * BaseType::blockColSize()); - int32_t block_col_id = c / BaseType::blockColSize(); + int32_t block_col_id = j / BaseType::blockColSize(); int32_t bcol_offset = block_col_id * BaseType::blockRowSize() * BaseType::blockColSize(); int32_t block_offset = brow_offset + bcol_offset; - int32_t inblock_offset = (r % BaseType::blockRowSize() / row_interleave_) * + int32_t inblock_offset = (i % BaseType::blockRowSize() / row_interleave_) * BaseType::blockColSize() * row_interleave_ + - (c % BaseType::blockColSize()) * row_interleave_ + r % row_interleave_; + (j % BaseType::blockColSize()) * row_interleave_ + i % row_interleave_; int32_t index = block_offset + inblock_offset; diff --git a/src/PackMatrix.cc b/src/PackMatrix.cc index c4b2f92d01..def130922a 100644 --- a/src/PackMatrix.cc +++ b/src/PackMatrix.cc @@ -9,9 +9,7 @@ #define FBGEMM_EXPORTS #include -#include #include -#include #include "fbgemm/Fbgemm.h" namespace fbgemm { @@ -20,10 +18,10 @@ template PackMatrix::PackMatrix( int32_t rows, int32_t cols, - inpType* buf, + inpType* pmat, int groups, const BlockingFactors* params) - : buf_(buf), nrows_(rows), ncols_(cols), G_(groups) { + : buf_(pmat), nrows_(rows), ncols_(cols), G_(groups) { bufAllocatedHere_ = false; blocking_params = params; if (!cpuinfo_initialize()) { diff --git a/src/PackWeightsForConv.cc b/src/PackWeightsForConv.cc index 986f7dc37f..55c9c042ed 100644 --- a/src/PackWeightsForConv.cc +++ b/src/PackWeightsForConv.cc @@ -127,7 +127,7 @@ bool PackWeightsForConv::isPackingCompliant( template std::string PackWeightsForConv::mismatchingParams( const conv_param_t& test_conv_p) { - std::string msg = ""; + std::string msg; auto combineStr = [](const std::string& id, const std::string& str1, diff --git a/src/PackWeightsForDirectConv.cc b/src/PackWeightsForDirectConv.cc index b40da63c07..4ea693be3b 100644 --- a/src/PackWeightsForDirectConv.cc +++ b/src/PackWeightsForDirectConv.cc @@ -82,7 +82,7 @@ void PackedDirectConvMatrix::col_offsets_with_zero_pt_s8acc32_DirectConvT( // at initialization stage like other quantized conv implementation. // Thus the col_offsets computation will be invoked at forward pass, // and only the first pass will prepare the col_offsets. - if (first_call == false) { + if (!first_call) { return; } int IC = conv_p.IC; @@ -243,7 +243,7 @@ void fbgemmDirectConv( return; } - if (SPATIAL_DIM != 2) { + if constexpr (SPATIAL_DIM != 2) { assert(false && "1d/3d direct conv not supported"); } else { if (conv_p.transposed) { diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc index 4b7da50ff3..e1d21d5b26 100644 --- a/src/QuantUtils.cc +++ b/src/QuantUtils.cc @@ -8,8 +8,6 @@ #define FBGEMM_EXPORTS #include -#include -#include #include #include "fbgemm/QuantUtils.h" diff --git a/src/QuantUtilsAvx2.cc b/src/QuantUtilsAvx2.cc index ebbb080131..756db579d7 100644 --- a/src/QuantUtilsAvx2.cc +++ b/src/QuantUtilsAvx2.cc @@ -278,23 +278,23 @@ SPECIALIZE_FUSEDDQAVX2(int8_t) #undef SPECIALIZE_FUSEDDQAVX2 -void FindMinMax(const float* a, float* min, float* max, int64_t len) { +void FindMinMax(const float* m, float* min, float* max, int64_t len) { if (len <= 0) { *min = 0.0f; *max = 0.0f; return; } - float temp_min = *a, temp_max = *a; + float temp_min = *m, temp_max = *m; int64_t i = 0; #ifdef __AVX__ - __m256 min_v = _mm256_set1_ps(*a), max_v = _mm256_set1_ps(*a); + __m256 min_v = _mm256_set1_ps(*m), max_v = _mm256_set1_ps(*m); constexpr int VLEN = 8; if (len >= VLEN) { for (; i < len / VLEN * VLEN; i += VLEN) { - min_v = _mm256_min_ps(min_v, _mm256_loadu_ps(a + i)); - max_v = _mm256_max_ps(max_v, _mm256_loadu_ps(a + i)); + min_v = _mm256_min_ps(min_v, _mm256_loadu_ps(m + i)); + max_v = _mm256_max_ps(max_v, _mm256_loadu_ps(m + i)); } float min_buf[VLEN], max_buf[VLEN]; @@ -308,8 +308,8 @@ void FindMinMax(const float* a, float* min, float* max, int64_t len) { #endif for (; i < len; i++) { - temp_min = std::min(temp_min, a[i]); - temp_max = std::max(temp_max, a[i]); + temp_min = std::min(temp_min, m[i]); + temp_max = std::max(temp_max, m[i]); } *min = temp_min; *max = temp_max; @@ -1413,8 +1413,7 @@ void requantizeOutputProcessingGConvAvx2( _mm256_castsi256_si128(x_clamped_v)); } // j loop vectorized - const int64_t remainder = block.col_start + block.col_size - j; - (void)remainder; // Suppress unused variable warning + const int64_t remainder [[maybe_unused]] = block.col_start + block.col_size - j; assert(remainder == 0); } // i loop } diff --git a/src/QuantUtilsAvx512.cc b/src/QuantUtilsAvx512.cc index 830aecd4bc..38b20f0183 100644 --- a/src/QuantUtilsAvx512.cc +++ b/src/QuantUtilsAvx512.cc @@ -12,10 +12,7 @@ (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))) #include #endif -#include //for std::min/std::max #include -#include //for nearbyint -#include //for numeric_limits namespace fbgemm { @@ -133,7 +130,7 @@ void requantizeOutputProcessingGConvAvx512( mask, inp + (i - block.row_start) * ld_in + (j - block.col_start)); } - if (!A_SYMMETRIC) { + if constexpr (!A_SYMMETRIC) { __m512i col_off_raw_v; if constexpr (C_PER_G != 8) { col_off_raw_v = _mm512_loadu_si512( @@ -146,7 +143,7 @@ void requantizeOutputProcessingGConvAvx512( x_v = _mm512_sub_epi32(x_v, col_off_v); } - if (!B_SYMMETRIC) { + if constexpr (!B_SYMMETRIC) { __m512i row_offset_v; if constexpr (C_PER_G == 2) { @@ -209,8 +206,6 @@ void requantizeOutputProcessingGConvAvx512( _mm512_broadcast_i32x4( _mm_loadu_si128(reinterpret_cast( r.B_zero_point + quant_param_idx)))); - } else if constexpr (C_PER_G == 8) { - B_zero_point_v = _mm512_set1_epi32(r.B_zero_point[quant_param_idx]); } else { B_zero_point_v = _mm512_set1_epi32(r.B_zero_point[quant_param_idx]); } @@ -219,7 +214,7 @@ void requantizeOutputProcessingGConvAvx512( x_v = _mm512_sub_epi32(x_v, row_offset_v); } __m512 xf_v; - if (HAS_BIAS) { + if constexpr (HAS_BIAS) { if constexpr (is_same_v) { __m512 x_bias_v; if constexpr (C_PER_G != 8) { @@ -305,8 +300,6 @@ void requantizeOutputProcessingGConvAvx512( permute_mask_v_g4, _mm512_broadcast_f32x4( _mm_loadu_ps(r.C_multiplier + quant_param_idx))); - } else if constexpr (C_PER_G == 8) { - multiplier_v = _mm512_set1_ps(r.C_multiplier[quant_param_idx]); } else { multiplier_v = _mm512_set1_ps(r.C_multiplier[quant_param_idx]); } diff --git a/src/RefImplementations.cc b/src/RefImplementations.cc index 00e6916d4e..83e6343af6 100644 --- a/src/RefImplementations.cc +++ b/src/RefImplementations.cc @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -1456,7 +1455,7 @@ bool EmbeddingSpMDMNBit_ref( // We currently only support int4 to int4 for sequential TBE in this nbit // kernel. Note that assert() will be ignored in release mode, so we check // here to double check and also avoid "unused variable" warning - if (!(input_bit_rate == 4 && output_bit_rate == 4)) { + if (input_bit_rate != 4 || output_bit_rate != 4) { WARN_ONCE("no_bag is only supported for int4 to int4"); return false; } diff --git a/src/RowWiseSparseAdagradFused.cc b/src/RowWiseSparseAdagradFused.cc index 0b1373ae7c..113e25c6e2 100644 --- a/src/RowWiseSparseAdagradFused.cc +++ b/src/RowWiseSparseAdagradFused.cc @@ -774,8 +774,8 @@ typename ReturnFunctionSignature:: // Per-thread global buffer for random number generating, with max vector size constexpr size_t VLEN_MAX = simd_info::WIDTH_32BIT_ELEMS; -alignas(64) static thread_local uint32_t g_rnd128v_buffer[4 * VLEN_MAX]; -static thread_local bool g_rnd128v_initialized = false; +alignas(64) thread_local uint32_t g_rnd128v_buffer[4 * VLEN_MAX]; +thread_local bool g_rnd128v_initialized = false; void rand_initialize() { // Splitmix64: http://prng.di.unimi.it/splitmix64.c diff --git a/src/TransposeUtilsAvx2.h b/src/TransposeUtilsAvx2.h index 3a7921b3be..8230168dfb 100644 --- a/src/TransposeUtilsAvx2.h +++ b/src/TransposeUtilsAvx2.h @@ -549,7 +549,7 @@ inline static void transpose_kernel_8x16_avx2( // f : f0 f1 f2 f3 f4 f5 f6 f7 ... f15 // g : g0 g1 g2 g3 g4 g5 g6 g7 ... g15 // h : h0 h1 h2 h3 h4 h5 h6 h7 ... h15 - if (MREM || NREM) { + if constexpr (MREM || NREM) { load_with_remainders_i16(src, ld_src, r, mrem, nrem); } else { r[0] = _mm256_loadu_si256( @@ -659,7 +659,7 @@ inline static void transpose_kernel_8x16_avx2( r[7] = _mm256_unpackhi_epi64(__t3, __t7); // 7, 15 // stores back 16 rows: - if (MREM || NREM) { + if constexpr (MREM || NREM) { store_with_remainders_i16(dst, ld_dst, r, mrem, nrem); } else { _mm_storeu_si128( diff --git a/src/Utils.cc b/src/Utils.cc index 1e91d67ea5..9dd55b7fcc 100644 --- a/src/Utils.cc +++ b/src/Utils.cc @@ -365,11 +365,8 @@ bool isYmm(inst_set_t isa) { bool fbgemmIsIntelXeonD() { auto const pkgInfo = cpuinfo_get_packages(); - if (strstr(pkgInfo->name, "Intel Xeon D-") || - cpuinfo_get_packages_count() == 1) { - return true; - } - return false; + return strstr(pkgInfo->name, "Intel Xeon D-") || + cpuinfo_get_packages_count() == 1; } bool fbgemmHasAvx512Support() { @@ -423,7 +420,8 @@ void fbgemmPartition1DBlocked( int64_t& start, int64_t& end) { if (block_size == 1) { - return fbgemmPartition1D(thread_id, num_threads, total_work, start, end); + fbgemmPartition1D(thread_id, num_threads, total_work, start, end); + return; } int64_t total_work_in_blocks = total_work / block_size; int64_t start_block = 0, end_block = 0; @@ -629,7 +627,7 @@ void update_prefsum_and_offset_in_range( void combine_prefix_sum( const int nthreads, - const int64_t elements_count, + const int64_t elements_count [[maybe_unused]], const int64_t* const histogram, int64_t* const histogram_ps) { int64_t offset = 0; @@ -638,13 +636,11 @@ void combine_prefix_sum( // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will // work only in debug build. assert(offset == elements_count); - // Suppress unused variable warning - (void)elements_count; } void combine_prefix_sum_for_msb( const int nthreads, - const int64_t elements_count, + const int64_t elements_count [[maybe_unused]], const int64_t* const histogram, int64_t* const histogram_ps) { int64_t offset = 0; @@ -655,8 +651,6 @@ void combine_prefix_sum_for_msb( // TODO(DamianSzwichtenberg): Is assert sufficient? In most cases, it will // work only in debug build. assert(offset == elements_count); - // Suppress unused variable warning - (void)elements_count; } template diff --git a/src/UtilsAvx512.cc b/src/UtilsAvx512.cc index 1898129a52..a2bcc276f2 100644 --- a/src/UtilsAvx512.cc +++ b/src/UtilsAvx512.cc @@ -1493,7 +1493,7 @@ static void transpose_16x16_block( int mrem = 16, int nrem = 16) { __m512i r[8]; - if (MREM || NREM) { + if constexpr (MREM || NREM) { load_with_remainders_i16(src, ld_src, r, mrem, nrem); } else { __m256i t00 = @@ -1556,7 +1556,7 @@ static void transpose_16x16_block( } __m512i u[8]; core_transpose_16x16_block(r, u); - if (MREM || NREM) { + if constexpr (MREM || NREM) { store_with_remainders_i16(dst, ld_dst, u, mrem, nrem); } else { _mm256_storeu_si256( @@ -1639,7 +1639,7 @@ static void transpose_16x32_block( // 15_00 15_01 15_02 15_03 15_04 15_05 15_06 15_07 __m512i r[8]; - if (MREM || NREM) { + if constexpr (MREM || NREM) { load_with_remainders_i8(src, ld_src, r, mrem, nrem); } else { __m256i t00 = @@ -1710,7 +1710,7 @@ static void transpose_16x32_block( __m512i u[8]; core_transpose_16x32_block_i8(r, u); - if (MREM || NREM) { + if constexpr (MREM || NREM) { store_with_remainders_i8(dst, ld_dst, u, mrem, nrem); } else { _mm_storeu_si128( diff --git a/src/codegen_fp16fp32.cc b/src/codegen_fp16fp32.cc index a05680035d..2fc510f49e 100644 --- a/src/codegen_fp16fp32.cc +++ b/src/codegen_fp16fp32.cc @@ -240,8 +240,6 @@ int main(int argc, const char* argv[]) { hdrfile << "using GemmParams" << d_type.second << " = GemmParams;\n\n"; - unsigned labelId = 0; - bool fixedA = false, fixedB = false, fixedC = false; vector>& ukernel_shape = s.shapes; diff --git a/src/spmmUtilsAvx2.cc b/src/spmmUtilsAvx2.cc index 4c4d8cfd89..b96aecbde9 100644 --- a/src/spmmUtilsAvx2.cc +++ b/src/spmmUtilsAvx2.cc @@ -36,7 +36,7 @@ FBGEMM_API void trRequantizeOpt( // Broadcasted act_times_w_scale / C_scale __m256 act_times_w_div_c_v; - if (Q_GRAN != QuantizationGranularity::OUT_CHANNEL) { + if constexpr (Q_GRAN != QuantizationGranularity::OUT_CHANNEL) { act_times_w_div_c_v = _mm256_set1_ps(r.act_times_w_scale[0] / r.C_scale); } @@ -67,7 +67,7 @@ FBGEMM_API void trRequantizeOpt( for (int i = block.row_start; i < block.row_start + block.row_size; ++i) { // Scale weight_row_offset with act_zero_point int32_t row_offset = 0; - if (!ACT_SYMMETRIC) { + if constexpr (!ACT_SYMMETRIC) { row_offset = r.act_zero_point * r.weight_row_offsets[i]; } @@ -78,7 +78,7 @@ FBGEMM_API void trRequantizeOpt( weight_zeropoint_idx = i; } __m256 bias_v; - if (HAS_BIAS) { + if constexpr (HAS_BIAS) { float bias = r.bias[i] / r.act_times_w_scale[weight_zeropoint_idx]; bias_v = _mm256_set1_ps(bias); } @@ -107,13 +107,13 @@ FBGEMM_API void trRequantizeOpt( inp + (i - block.row_start) * ld_in + (j - block.col_start) + 3 * VLEN)); - if (!ACT_SYMMETRIC) { + if constexpr (!ACT_SYMMETRIC) { x_v = _mm256_sub_epi32(x_v, row_offset_v); y_v = _mm256_sub_epi32(y_v, row_offset_v); z_v = _mm256_sub_epi32(z_v, row_offset_v); w_v = _mm256_sub_epi32(w_v, row_offset_v); } - if (!WEIGHT_SYMMETRIC) { + if constexpr (!WEIGHT_SYMMETRIC) { __m256i col_offset_v = _mm256_mullo_epi32( _mm256_loadu_si256(reinterpret_cast( r.act_col_offsets + j - block.col_start)), @@ -152,7 +152,7 @@ FBGEMM_API void trRequantizeOpt( * FP32 value with ties to even with default MXCSR rounding mode. */ __m256 xf_v, yf_v, zf_v, wf_v; - if (HAS_BIAS) { + if constexpr (HAS_BIAS) { xf_v = _mm256_add_ps(_mm256_cvtepi32_ps(x_v), bias_v); yf_v = _mm256_add_ps(_mm256_cvtepi32_ps(y_v), bias_v); zf_v = _mm256_add_ps(_mm256_cvtepi32_ps(z_v), bias_v); @@ -225,10 +225,10 @@ FBGEMM_API void trRequantizeOpt( __m256i x_v = _mm256_loadu_si256(reinterpret_cast( inp + (i - block.row_start) * ld_in + (j - block.col_start))); - if (!ACT_SYMMETRIC) { + if constexpr (!ACT_SYMMETRIC) { x_v = _mm256_sub_epi32(x_v, row_offset_v); } - if (!WEIGHT_SYMMETRIC) { + if constexpr (!WEIGHT_SYMMETRIC) { __m256i col_offset_v = _mm256_mullo_epi32( _mm256_loadu_si256(reinterpret_cast( r.act_col_offsets + j - block.col_start)), @@ -236,7 +236,7 @@ FBGEMM_API void trRequantizeOpt( x_v = _mm256_sub_epi32(x_v, col_offset_v); } __m256 xf_v; - if (HAS_BIAS) { + if constexpr (HAS_BIAS) { xf_v = _mm256_add_ps(_mm256_cvtepi32_ps(x_v), bias_v); } else { xf_v = _mm256_cvtepi32_ps(x_v); @@ -272,10 +272,10 @@ FBGEMM_API void trRequantizeOpt( __m256i x_v = _mm256_maskload_epi32( inp + (i - block.row_start) * ld_in + (j - block.col_start), mask_v); - if (!ACT_SYMMETRIC) { + if constexpr (!ACT_SYMMETRIC) { x_v = _mm256_sub_epi32(x_v, row_offset_v); } - if (!WEIGHT_SYMMETRIC) { + if constexpr (!WEIGHT_SYMMETRIC) { __m256i col_offset_v = _mm256_mullo_epi32( _mm256_maskload_epi32( r.act_col_offsets + j - block.col_start, mask_v), @@ -284,7 +284,7 @@ FBGEMM_API void trRequantizeOpt( } __m256 xf_v; - if (HAS_BIAS) { + if constexpr (HAS_BIAS) { xf_v = _mm256_add_ps(_mm256_cvtepi32_ps(x_v), bias_v); } else { xf_v = _mm256_cvtepi32_ps(x_v); diff --git a/test/Bfloat16ConvertTest.cc b/test/Bfloat16ConvertTest.cc index a6ac9cb304..db257ab6fe 100644 --- a/test/Bfloat16ConvertTest.cc +++ b/test/Bfloat16ConvertTest.cc @@ -67,7 +67,7 @@ TEST(FBGemmBfloat16Test, Conversion_simd2) { int m = s[0]; int n = s[1]; - cerr << "m = " << m << " n = " << n << endl; + cerr << "m = " << m << " n = " << n << '\n'; aligned_vector A_fp32_ref(m * n); // fp32 type aligned_vector A_bfloat16(m * n); // bfloat16 type aligned_vector A_fp32_final(m * n); // fp32 type diff --git a/test/EmbeddingSpMDMTest.cc b/test/EmbeddingSpMDMTest.cc index f1011d9d4e..e077a64724 100644 --- a/test/EmbeddingSpMDMTest.cc +++ b/test/EmbeddingSpMDMTest.cc @@ -13,7 +13,7 @@ #include #include -#include +#include #include "./EmbeddingSpMDMTestUtils.h" #include "fbgemm/Fbgemm.h" diff --git a/test/EmbeddingSpMDMTestUtils.cc b/test/EmbeddingSpMDMTestUtils.cc index d0083e11ec..9dfe7f9bb8 100644 --- a/test/EmbeddingSpMDMTestUtils.cc +++ b/test/EmbeddingSpMDMTestUtils.cc @@ -39,7 +39,7 @@ int GenerateLengthsIndicesWeights( } // Compute the number of indices - int lengths_sum = accumulate(lengths.begin(), lengths.end(), 0); + int64_t lengths_sum = accumulate(lengths.begin(), lengths.end(), 0l); // Generate indices indices.resize(lengths_sum); diff --git a/test/Float16ConvertTest.cc b/test/Float16ConvertTest.cc index e1b46f37a3..aa549fede4 100644 --- a/test/Float16ConvertTest.cc +++ b/test/Float16ConvertTest.cc @@ -98,7 +98,7 @@ TEST_P(FBGemmFloat16Test, Conversion_simd2) { int m = s[0]; int n = s[1]; - cerr << "m = " << m << " n = " << n << endl; + cerr << "m = " << m << " n = " << n << '\n'; aligned_vector A_fp32_ref(m * n); // fp32 type aligned_vector A_float16(m * n); // float16 type aligned_vector A_fp32_final(m * n); // fp32 type @@ -150,7 +150,7 @@ TEST_P(FBGemmFloat16Test, Conversion_fake_rounding) { for (auto s : shapes) { int m = s[0]; - cerr << "m = " << m << endl; + cerr << "m = " << m << '\n'; aligned_vector A_fp32_ref(m); // fp32 type aligned_vector A_float16(m); // float16 type aligned_vector A_fp32_final(m); // fp32 type diff --git a/test/I8DirectconvTest.cc b/test/I8DirectconvTest.cc index 4f01a47ad3..e4261c86ab 100644 --- a/test/I8DirectconvTest.cc +++ b/test/I8DirectconvTest.cc @@ -547,8 +547,6 @@ TEST_P(FBGemmDirectConvTransTest, Test2D) { conv_p.OC); } - string runType; - PackedDirectConvMatrix packedB(conv_p.IC, conv_p.OC, kernel_dim, Bint8.data()); DoNothing<> doNothingObj{}; diff --git a/test/I8SpmdmTest.cc b/test/I8SpmdmTest.cc index 682fedfcbe..3db38c5405 100644 --- a/test/I8SpmdmTest.cc +++ b/test/I8SpmdmTest.cc @@ -16,7 +16,7 @@ #include #ifdef _OPENMP -#include +#include #include #endif diff --git a/test/PackedRequantizeTest.cc b/test/PackedRequantizeTest.cc index 91912153cb..f1776f2381 100644 --- a/test/PackedRequantizeTest.cc +++ b/test/PackedRequantizeTest.cc @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -111,9 +110,9 @@ static vector> GetShapes_() { */ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, Test) { vector> shapes(GetShapes_()); - matrix_op_t atrans, btrans; + matrix_op_t atrans{}, btrans{}; bool test_ld = false; - QuantizationGranularity q_granularity; + QuantizationGranularity q_granularity{}; tie(atrans, btrans, test_ld, q_granularity) = GetParam(); for (auto shape : shapes) { @@ -379,9 +378,9 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, Test) { */ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, TestFloatInputOutput) { vector> shapes(GetShapes_()); - matrix_op_t atrans, btrans; + matrix_op_t atrans{}, btrans{}; bool test_ld = false; - QuantizationGranularity q_granularity; + QuantizationGranularity q_granularity{}; tie(atrans, btrans, test_ld, q_granularity) = GetParam(); for (auto shape : shapes) { @@ -635,7 +634,7 @@ TEST_P(fbgemmu8s8acc32WithQuantGranularityTest, TestFloatInputOutput) { */ TEST_P(fbgemmu8s8acc32Test, TestSymmetricQuantizedInputOutput) { vector> shapes(GetShapes_()); - matrix_op_t atrans, btrans; + matrix_op_t atrans{}, btrans{}; bool test_ld = false; tie(atrans, btrans, test_ld) = GetParam(); @@ -779,7 +778,7 @@ TEST_P(fbgemmu8s8acc32Test, TestSymmetricQuantizedInputOutput) { */ TEST_P(fbgemmPackUnpackAcc32Test, TestPackUnpack) { vector> shapes(GetShapes_()); - matrix_op_t btrans; + matrix_op_t btrans{}; bool test_ld = false; tie(btrans, test_ld) = GetParam(); diff --git a/test/QuantUtilsTest.cc b/test/QuantUtilsTest.cc index f5f305a263..4460c75b18 100644 --- a/test/QuantUtilsTest.cc +++ b/test/QuantUtilsTest.cc @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include #include #include diff --git a/test/SparseDenseMMInt8Test.cc b/test/SparseDenseMMInt8Test.cc index 3053d7799a..fd61384738 100644 --- a/test/SparseDenseMMInt8Test.cc +++ b/test/SparseDenseMMInt8Test.cc @@ -7,7 +7,7 @@ */ #include -#include +#include #include #include "bench/BenchUtils.h" // @manual diff --git a/test/SparsePackUnpackTest.cc b/test/SparsePackUnpackTest.cc index 616640b4a9..cb34cfb8aa 100644 --- a/test/SparsePackUnpackTest.cc +++ b/test/SparsePackUnpackTest.cc @@ -7,7 +7,7 @@ */ #include -#include +#include #include #include "bench/BenchUtils.h" // @manual diff --git a/test/TestUtils.cc b/test/TestUtils.cc index 6016375fc6..a2a84e6837 100644 --- a/test/TestUtils.cc +++ b/test/TestUtils.cc @@ -123,13 +123,13 @@ ::testing::AssertionResult floatCloseAll( match = relDiff <= rtol; } if (!match) { - ss << " mismatch at (" << i << ") " << std::endl; - ss << "\t ref: " << a[i] << " test: " << b[i] << std::endl; + ss << " mismatch at (" << i << ") " << '\n'; + ss << "\t ref: " << a[i] << " test: " << b[i] << '\n'; if (consider_absDiff) { - ss << "\t absolute diff: " << absDiff << " > " << atol << std::endl; + ss << "\t absolute diff: " << absDiff << " > " << atol << '\n'; } if (consider_relDiff) { - ss << "\t relative diff: " << relDiff << " > " << rtol << std::endl; + ss << "\t relative diff: " << relDiff << " > " << rtol << '\n'; } return ::testing::AssertionFailure() << " results do not match. " << ss.str();