Use if constexpr (#4426)

cyyever · facebook-github-bot · commit f6100fc09412 · 2025-07-01T14:29:44.000-07:00
Summary: Pull Request resolved: #4426 X-link: facebookresearch/FBGEMM#1493 Add `if constexpr` to all possible if statements. Pull Request resolved: #4422 Reviewed By: gchalump Differential Revision: D77571436 Pulled By: q10 fbshipit-source-id: 056aee5283dfb6b9f2c39ba987383fa6ce394a6b
diff --git a/bench/EmbeddingSpMDM8BitBenchmark.cc b/bench/EmbeddingSpMDM8BitBenchmark.cc
@@ -261,10 +261,10 @@ int run_benchmark(
           for (size_t i = 0; i < output.size(); ++i) {
             float tmp1 = 0;
             float tmp2 = 0;
-            if (std::is_same<OutType, float>::value) {
+            if constexpr (std::is_same<OutType, float>::value) {
               tmp1 = output[i];
               tmp2 = output_ref[i];
-            } else if (std::is_same<OutType, uint16_t>::value) {
+            } else if constexpr (std::is_same<OutType, uint16_t>::value) {
               if (is_bf16_out) {
                 tmp1 = cpu_bf162float(output[i]);
                 tmp2 = cpu_bf162float(output_ref[i]);
@@ -288,9 +288,9 @@ int run_benchmark(
 #pragma omp barrier
 #endif
       if (fbgemm_get_thread_num() == 0) {
-        if (std::is_same<OutType, float>::value) {
+        if constexpr (std::is_same<OutType, float>::value) {
           cout << "out type fp32";
-        } else if (std::is_same<OutType, uint16_t>::value) {
+        } else if constexpr (std::is_same<OutType, uint16_t>::value) {
           if (is_bf16_out) {
             cout << "out type bf16";
           } else {
diff --git a/bench/EmbeddingSpMDMNBitBenchmark.cc b/bench/EmbeddingSpMDMNBitBenchmark.cc
@@ -375,10 +375,10 @@ int run_benchmark(
           for (size_t i = 0; i < output.size(); ++i) {
             float tmp1 = 0;
             float tmp2 = 0;
-            if (std::is_same<OutType, float>::value) {
+            if constexpr (std::is_same<OutType, float>::value) {
               tmp1 = output[i];
               tmp2 = output_ref[i];
-            } else if (std::is_same<OutType, uint16_t>::value) {
+            } else if constexpr (std::is_same<OutType, uint16_t>::value) {
               if (is_bf16_out) {
                 tmp1 = cpu_bf162float(output[i]);
                 tmp2 = cpu_bf162float(output_ref[i]);
@@ -411,10 +411,10 @@ int run_benchmark(
           for (size_t i = 0; i < output_autovec.size(); ++i) {
             float tmp1 = 0;
             float tmp2 = 0;
-            if (std::is_same<OutType, float>::value) {
+            if constexpr (std::is_same<OutType, float>::value) {
               tmp1 = output_autovec[i];
               tmp2 = output_ref[i];
-            } else if (std::is_same<OutType, uint16_t>::value) {
+            } else if constexpr (std::is_same<OutType, uint16_t>::value) {
               if (is_bf16_out) {
                 tmp1 = cpu_bf162float(output_autovec[i]);
                 tmp2 = cpu_bf162float(output_ref[i]);
@@ -437,9 +437,9 @@ int run_benchmark(
 #endif
       }
 
-      if (std::is_same<OutType, float>::value) {
+      if constexpr (std::is_same<OutType, float>::value) {
         cout << "out type fp32, ";
-      } else if (std::is_same<OutType, uint16_t>::value) {
+      } else if constexpr (std::is_same<OutType, uint16_t>::value) {
         if (is_bf16_out) {
           cout << "out type bf16, ";
         } else {
diff --git a/fbgemm_gpu/src/quantize_ops/quantize_ops_cpu.cpp b/fbgemm_gpu/src/quantize_ops/quantize_ops_cpu.cpp
@@ -130,7 +130,7 @@ Tensor _fusednbitrowwise_to_float_cpu(
       (ncols - 2 * sizeof(at::Half)) * num_elem_per_byte;
 
   Tensor output;
-  if (std::is_same<output_t, float>::value) {
+  if constexpr (std::is_same<output_t, float>::value) {
     output = at::empty(
         {nrows, output_columns}, // 4 = sizeof(float)
         input.options().dtype(at::kFloat));
@@ -167,15 +167,15 @@ Tensor _fusednbitrowwise_sbfront_to_float_or_half_cpu(
       (ncols - 2 * sizeof(at::Half)) * num_elem_per_byte;
 
   Tensor output;
-  if (std::is_same<output_t, float>::value) {
+  if constexpr (std::is_same<output_t, float>::value) {
     output = at::empty(
         {nrows, output_columns}, // 4 = sizeof(float)
         input.options().dtype(at::kFloat));
-  } else if (std::is_same<output_t, at::Half>::value) {
+  } else if constexpr (std::is_same<output_t, at::Half>::value) {
     output = at::empty(
         {nrows, output_columns}, // 2 = sizeof(half)
         input.options().dtype(at::kHalf));
-  } else if (std::is_same<output_t, at::BFloat16>::value) {
+  } else if constexpr (std::is_same<output_t, at::BFloat16>::value) {
     output = at::empty(
         {nrows, output_columns}, // 2 = sizeof(half)
         input.options().dtype(at::kBFloat16));
@@ -258,7 +258,7 @@ Tensor float_or_half_to_fused8bitrowwise_cpu(const Tensor& input) {
       input.options().dtype(at::kByte)); // at::kBytes for uint8_t
   FBGEMM_DISPATCH_FLOAT_AND_HALF(
       input.scalar_type(), "float_or_half_to_fused8bitrowwise_cpu", [&] {
-        if (std::is_same<scalar_t, float>::value) {
+        if constexpr (std::is_same<scalar_t, float>::value) {
           _float_to_fused8bitrowwise_cpu_out(output, input);
         } else { // scalar_t = at::Half
           _half_to_fused8bitrowwise_cpu_out(output, input);
@@ -419,7 +419,7 @@ Tensor float_or_half_to_fusednbitrowwise_cpu(
   Tensor output;
   FBGEMM_DISPATCH_FLOAT_AND_HALF(
       input.scalar_type(), "float_or_half_to_fusednbitrowwise_cpu", [&] {
-        if (std::is_same<scalar_t, float>::value) {
+        if constexpr (std::is_same<scalar_t, float>::value) {
           output = _float_to_fusednbitrowwise_cpu<float>(input, bit_rate);
         } else { // scalar_t = at::Half
           output =
diff --git a/fbgemm_gpu/src/ssd_split_embeddings_cache/ssd_split_embeddings_cache_cuda.cu b/fbgemm_gpu/src/ssd_split_embeddings_cache/ssd_split_embeddings_cache_cuda.cu
@@ -141,7 +141,7 @@ Tensor masked_index_impl(
         const auto func_name = is_index_put ? "masked_index_put_kernel"
                                             : "masked_index_select_kernel";
 #endif
-        if (std::is_same_v<value_t, uint8_t>) {
+        if constexpr (std::is_same_v<value_t, uint8_t>) {
           TORCH_CHECK(D % 16 == 0, "D needs to be padded to be multiple of 16");
         }
         FBGEMM_DISPATCH_INTEGRAL_TYPES(
diff --git a/include/fbgemm/FbgemmPackMatrixB.h b/include/fbgemm/FbgemmPackMatrixB.h
@@ -65,7 +65,7 @@ class PackedGemmMatrixB {
       const int brow = 512)
       : nrow_(nrow), ncol_(ncol), brow_(brow), kernel_ncol_blocks_(2) {
 #ifdef FBGEMM_ENABLE_KLEIDIAI
-    if (std::is_same<T, float16>::value) {
+    if constexpr (std::is_same<T, float16>::value) {
       kernel_ncol_blocks_ = 1;
     }
 #endif
@@ -94,7 +94,7 @@ class PackedGemmMatrixB {
         size_(size),
         kernel_ncol_blocks_(2) {
 #ifdef FBGEMM_ENABLE_KLEIDIAI
-    if (std::is_same<T, float16>::value) {
+    if constexpr (std::is_same<T, float16>::value) {
       kernel_ncol_blocks_ = 1;
     }
 #endif
@@ -122,7 +122,7 @@ class PackedGemmMatrixB {
         size_(size),
         kernel_ncol_blocks_(kernel_ncol_blocks) {
 #ifdef FBGEMM_ENABLE_KLEIDIAI
-    if (std::is_same<T, float16>::value) {
+    if constexpr (std::is_same<T, float16>::value) {
       kernel_ncol_blocks_ = 1;
     }
 #endif
diff --git a/include/fbgemm/OutputProcessing-inl.h b/include/fbgemm/OutputProcessing-inl.h
@@ -104,7 +104,7 @@ ReQuantizeOutput<FUSE_RELU, Q_GRAN, BIAS_TYPE, outT, inT, nextOPType>::f(
         }
         float raw_f;
         if (bias_) {
-          if (std::is_same<BIAS_TYPE, float>::value) {
+          if constexpr (std::is_same<BIAS_TYPE, float>::value) {
             raw_f = raw;
             raw_f += bias_[j] / act_times_w_scale_[Bq_zero_point_idx];
           } else {
diff --git a/include/fbgemm/Utils.h b/include/fbgemm/Utils.h
@@ -447,7 +447,7 @@ void nbit_embedding_sanity_check(
   assert(
       (input_bit_rate == 2 || input_bit_rate == 4) &&
       "input_bit_rate must be 2 or 4");
-  if (std::is_same<OutType, uint8_t>::value) {
+  if constexpr (std::is_same<OutType, uint8_t>::value) {
     assert(
         (no_bag && input_bit_rate == 4 && output_bit_rate == 4) &&
         "we currently only support int4 to int4 for sequential TBE");
diff --git a/src/DirectConv.h b/src/DirectConv.h
@@ -113,9 +113,9 @@ class DirectConvCodeGenBase {
       int NR) {
     std::ostringstream oss;
     oss << "directconv_";
-    if (std::is_same<accT, std::int16_t>::value) {
+    if constexpr (std::is_same<accT, std::int16_t>::value) {
       oss << "acc16_";
-    } else if (std::is_same<accT, std::int32_t>::value) {
+    } else if constexpr (std::is_same<accT, std::int32_t>::value) {
       oss << "acc32_";
     } else {
       oss << "unknown_";
diff --git a/src/EmbeddingSpMDM.cc b/src/EmbeddingSpMDM.cc
@@ -862,7 +862,7 @@ GenEmbeddingSpMDMLookup<
               a->vmulps(out_vreg, out_vreg, vlen_inv_vreg);
             }
 
-            if (std::is_same_v<outType, float>) {
+            if constexpr (std::is_same_v<outType, float>) {
               if (remainder && vec_idx + v == num_vec_regs_per_block - 1) {
                 if (instSet == inst_set_t::avx2) {
                   a->vmaskmovps(dst_addr, mask_vreg, out_vreg.ymm());
@@ -1042,7 +1042,7 @@ typename EmbeddingSpMDMKernelSignature<inType, indxType, offsetType, outType>::
     output_stride = block_size;
   }
   if (input_stride == -1) {
-    if (std::is_same_v<inType, uint8_t>) {
+    if constexpr (std::is_same_v<inType, uint8_t>) {
       const auto scale_bias_offset =
           2 * (scale_bias_last ? sizeof(float) : sizeof(uint16_t));
       input_stride = block_size + scale_bias_offset;
@@ -1351,7 +1351,7 @@ GenerateEmbeddingSpMDMRowWiseSparse(
     bool use_offsets) {
 #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
   int64_t input_stride = block_size;
-  if (std::is_same_v<inType, uint8_t>) {
+  if constexpr (std::is_same_v<inType, uint8_t>) {
     const auto scale_bias_offset = 2 * sizeof(float);
     input_stride = block_size + scale_bias_offset;
   }
diff --git a/src/EmbeddingSpMDMAutovec.cc b/src/EmbeddingSpMDMAutovec.cc
@@ -54,7 +54,7 @@ static inline void fill_output(
     const float* src,
     const int64_t block_size,
     const bool is_bf16_out) {
-  if (std::is_same_v<OutType, float>) {
+  if constexpr (std::is_same_v<OutType, float>) {
     for (int j = 0; j < block_size; ++j) {
       out[j] = src[j];
     }
@@ -72,7 +72,7 @@ static inline void fill_output(
 template <typename OutType>
 static inline EmbeddingStatsTracker::DataType get_output_type(
     const bool is_bf16_out) {
-  if (std::is_same_v<OutType, float>) {
+  if constexpr (std::is_same_v<OutType, float>) {
     return EmbeddingStatsTracker::DataType::FP32;
   } else if (std::is_same_v<OutType, uint16_t> && is_bf16_out) {
     return EmbeddingStatsTracker::DataType::BF16;
@@ -1139,7 +1139,7 @@ template <typename InType>
 static int64_t stride_SpMDMWithStrides(
     int64_t block_size,
     bool scale_bias_last) {
-  if (std::is_same_v<InType, uint8_t>) {
+  if constexpr (std::is_same_v<InType, uint8_t>) {
     const size_t scale_bias_offset =
         2 * (scale_bias_last ? sizeof(float) : sizeof(uint16_t));
     return block_size + scale_bias_offset;
@@ -1215,7 +1215,7 @@ typename EmbeddingSpMDMKernelSignature<InType, IndexType, OffsetType, OutType>::
       } else {                                                            \
         weights = nullptr;                                                \
       }                                                                   \
-      if (std::is_same<InType, uint8_t>::value) {                         \
+      if constexpr (std::is_same<InType, uint8_t>::value) {               \
         assert(!specialize(IS_BF16_IN, is_bf16_in));                      \
         return EmbeddingSpMDM8Bit_autovec(                                \
             specialize(BLOCK_SIZE, block_size),                           \
diff --git a/src/EmbeddingSpMDMAvx2.cc b/src/EmbeddingSpMDMAvx2.cc
@@ -41,7 +41,7 @@ bool EmbeddingSpMDMBlockSize1_(
 #if 0
     constexpr int VLEN = std::is_same<IndexType, std::int64_t>::value ? 4 : 8;
     for (; i < lengths[m] / VLEN * VLEN; i += VLEN) {
-      if (std::is_same<IndexType, std::int64_t>::value) {
+      if constexpr (std::is_same<IndexType, std::int64_t>::value) {
         __m256i idx_v = _mm256_lddqu_si256(
             reinterpret_cast<const __m256i*>(indices + current));
         // Should be none true
diff --git a/src/EmbeddingSpMDMNBit.cc b/src/EmbeddingSpMDMNBit.cc
@@ -475,7 +475,7 @@ GenEmbeddingSpMDMNBitLookup<
                 mask_vreg,
                 x86::ymmword_ptr(
                     scratchReg1_, (vlen - remainder) % vlen * sizeof(int32_t)));
-            if (std::is_same_v<outType, uint16_t>) {
+            if constexpr (std::is_same_v<outType, uint16_t>) {
               if (remainder > 1) {
                 a->vmovups(
                     mask_fp16_vreg,
@@ -866,7 +866,7 @@ GenEmbeddingSpMDMNBitLookup<
               a->vmulps(out_vreg, out_vreg, vlen_inv_vreg);
             }
 
-            if (std::is_same_v<outType, float>) {
+            if constexpr (std::is_same_v<outType, float>) {
               if (remainder && vec_idx + v == num_vec_regs_per_block - 1) {
                 if (instSet == inst_set_t::avx512) {
                   a->k(x86::k(1)).vmovups(dst_addr, out_vreg);
@@ -1040,7 +1040,7 @@ typename EmbeddingSpMDMKernelSignature<uint8_t, indxType, offsetType, outType>::
   assert(
       (input_bit_rate == 2 || input_bit_rate == 4) &&
       "input_bit_rate must be 2 or 4");
-  if (std::is_same_v<outType, uint8_t>) {
+  if constexpr (std::is_same_v<outType, uint8_t>) {
     assert(
         (no_bag && input_bit_rate == 4 && output_bit_rate == 4) &&
         "we currently only support int4 to int4 when using sequential TBE");
diff --git a/src/ExecuteKernelU8S8.cc b/src/ExecuteKernelU8S8.cc
@@ -143,7 +143,9 @@ void ExecuteKernel<
   const inst_set_t isa = fbgemmInstructionSet();
   switch (isa) {
     case inst_set_t::avx512_vnni:
-      if (std::is_same_v<typename packingAMatrix::accType, std::int16_t>) {
+      if constexpr (std::is_same_v<
+                        typename packingAMatrix::accType,
+                        std::int16_t>) {
         // For AVX512VNNI, we redirect int16_t to int32_t accumulation.
         CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj;
         fn = codeObj.getOrCreate<inst_set_t::avx512_vnni>(
@@ -161,7 +163,9 @@ void ExecuteKernel<
       break;
 
     case inst_set_t::avx512_vnni_ymm:
-      if (std::is_same_v<typename packingAMatrix::accType, std::int16_t>) {
+      if constexpr (std::is_same_v<
+                        typename packingAMatrix::accType,
+                        std::int16_t>) {
         // For AVX512VNNI, we redirect int16_t to int32_t accumulation.
         CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj;
         fn = codeObj.getOrCreate<inst_set_t::avx512_vnni_ymm>(
diff --git a/src/FbgemmI8DepthwiseAvx2-inl.h b/src/FbgemmI8DepthwiseAvx2-inl.h
@@ -49,7 +49,7 @@ static ALWAYS_INLINE void requantize_(
   __m256i B_zero_point_v = _mm256_setzero_si256();
   if (Q_GRAN == QuantizationGranularity::TENSOR) {
     multiplier_v = _mm256_set1_ps(*C_multiplier);
-    if (std::is_same<BIAS_TYPE, float>::value) {
+    if constexpr (std::is_same<BIAS_TYPE, float>::value) {
       act_times_w_rcp_v = _mm256_set1_ps(1.0 / (*act_times_w_scale));
     }
     B_zero_point_v = _mm256_set1_epi32(B_zero_point[0]);
@@ -226,7 +226,7 @@ static ALWAYS_INLINE void requantize_(
     // convert to float
     __m256 xf_v, yf_v, zf_v, wf_v;
     if (HAS_BIAS) { // static if
-      if (std::is_same<BIAS_TYPE, float>::value) {
+      if constexpr (std::is_same<BIAS_TYPE, float>::value) {
         __m256 x_bias_v, y_bias_v, z_bias_v, w_bias_v;
         if (Q_GRAN == QuantizationGranularity::OUT_CHANNEL ||
             (Q_GRAN == QuantizationGranularity::GROUP && K_PER_G == 1)) {
@@ -431,7 +431,7 @@ static ALWAYS_INLINE void requantize_(
     // Convert to float
     __m256 xf_v;
     if (HAS_BIAS) { // static if
-      if (std::is_same<BIAS_TYPE, float>::value) {
+      if constexpr (std::is_same<BIAS_TYPE, float>::value) {
         __m256 x_bias_v;
         if (Q_GRAN == QuantizationGranularity::OUT_CHANNEL ||
             (Q_GRAN == QuantizationGranularity::GROUP && K_PER_G == 1)) {
@@ -504,7 +504,7 @@ static ALWAYS_INLINE void requantize_(
     }
     float raw_f;
     if (HAS_BIAS) { // static if
-      if (std::is_same<BIAS_TYPE, float>::value) {
+      if constexpr (std::is_same<BIAS_TYPE, float>::value) {
         raw_f = raw;
         raw_f += bias[j] / act_times_w_scale[quant_param_idx];
       } else {
diff --git a/src/GenerateKernel.h b/src/GenerateKernel.h
@@ -107,9 +107,9 @@ class CodeGenBase {
       int NR) {
     std::ostringstream oss;
     oss << "gemm_";
-    if (std::is_same<accT, std::int16_t>::value) {
+    if constexpr (std::is_same<accT, std::int16_t>::value) {
       oss << "acc16_";
-    } else if (std::is_same<accT, std::int32_t>::value) {
+    } else if constexpr (std::is_same<accT, std::int32_t>::value) {
       oss << "acc32_";
     } else {
       oss << "unknown_";
diff --git a/src/PackAMatrix.cc b/src/PackAMatrix.cc
@@ -197,7 +197,7 @@ void PackAMatrix<T, accT>::printPackedMatrix(std::string name) {
   for (auto r = 0; r < BaseType::numPackedRows(); ++r) {
     for (auto c = 0; c < BaseType::numPackedCols(); ++c) {
       T val = out[addr(r, c)];
-      if (std::is_integral_v<T>) {
+      if constexpr (std::is_integral_v<T>) {
         // cast to int64 because cout doesn't print int8_t type directly
         std::cout << std::setw(5) << static_cast<int64_t>(val) << " ";
       } else {
diff --git a/src/PackAWithIm2Col.cc b/src/PackAWithIm2Col.cc
@@ -711,7 +711,7 @@ void PackAWithIm2Col<T, accT, SPATIAL_DIM>::printPackedMatrix(
   for (auto r = 0; r < BaseType::numPackedRows(); ++r) {
     for (auto c = 0; c < BaseType::numPackedCols(); ++c) {
       T val = out[r * BaseType::blockColSize() + c];
-      if (std::is_integral_v<T>) {
+      if constexpr (std::is_integral_v<T>) {
         // cast to int64 because cout doesn't print int8_t type directly
         std::cout << std::setw(5) << static_cast<int64_t>(val) << " ";
       } else {
diff --git a/src/PackAWithQuantRowOffset.cc b/src/PackAWithQuantRowOffset.cc
@@ -217,7 +217,7 @@ void PackAWithQuantRowOffset<T, accT>::printPackedMatrix(std::string name) {
   for (auto r = 0; r < BaseType::numPackedRows(); ++r) {
     for (auto c = 0; c < BaseType::numPackedCols(); ++c) {
       T val = out[addr(r, c)];
-      if (std::is_integral_v<T>) {
+      if constexpr (std::is_integral_v<T>) {
         // cast to int64 because cout doesn't print int8_t type directly
         std::cout << std::setw(5) << static_cast<int64_t>(val) << " ";
       } else {
diff --git a/src/PackAWithRowOffset.cc b/src/PackAWithRowOffset.cc
@@ -196,7 +196,7 @@ void PackAWithRowOffset<T, accT>::printPackedMatrix(std::string name) {
   for (auto r = 0; r < BaseType::numPackedRows(); ++r) {
     for (auto c = 0; c < BaseType::numPackedCols(); ++c) {
       T val = out[addr(r, c)];
-      if (std::is_integral_v<T>) {
+      if constexpr (std::is_integral_v<T>) {
         // cast to int64 because cout doesn't print int8_t type directly
         std::cout << std::setw(5) << static_cast<int64_t>(val) << " ";
       } else {
diff --git a/src/PackBMatrix.cc b/src/PackBMatrix.cc
@@ -417,7 +417,7 @@ void PackBMatrix<T, accT>::printPackedMatrix(
                         BaseType::blockColSize() +
                     nc * BaseType::blockRowSize() * BaseType::blockColSize() +
                     r * BaseType::blockColSize() * row_interleave_ + c];
-            if (std::is_integral_v<T>) {
+            if constexpr (std::is_integral_v<T>) {
               // cast to int64 because cout doesn't print int8_t type directly
               std::cout << std::setw(5) << static_cast<int64_t>(val) << " ";
             } else {
diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc
diff --git a/src/QuantUtilsAvx2.cc b/src/QuantUtilsAvx2.cc
diff --git a/src/QuantUtilsAvx512.cc b/src/QuantUtilsAvx512.cc
diff --git a/src/RefImplementations.h b/src/RefImplementations.h
diff --git a/src/Utils.cc b/src/Utils.cc
diff --git a/test/TestUtils.cc b/test/TestUtils.cc

Original file line number	Diff line number	Diff line change
`@@ -141,7 +141,7 @@ Tensor masked_index_impl(`
`141`	`141`	`const auto func_name = is_index_put ? "masked_index_put_kernel"`
`142`	`142`	`: "masked_index_select_kernel";`
`143`	`143`	`#endif`
`144`		`- if (std::is_same_v<value_t, uint8_t>) {`
	`144`	`+ if constexpr (std::is_same_v<value_t, uint8_t>) {`
`145`	`145`	`TORCH_CHECK(D % 16 == 0, "D needs to be padded to be multiple of 16");`
`146`	`146`	`}`
`147`	`147`	`FBGEMM_DISPATCH_INTEGRAL_TYPES(`
Original file line number	Diff line number	Diff line change
`@@ -104,7 +104,7 @@ ReQuantizeOutput<FUSE_RELU, Q_GRAN, BIAS_TYPE, outT, inT, nextOPType>::f(`
`104`	`104`	`}`
`105`	`105`	`float raw_f;`
`106`	`106`	`if (bias_) {`
`107`		`- if (std::is_same<BIAS_TYPE, float>::value) {`
	`107`	`+ if constexpr (std::is_same<BIAS_TYPE, float>::value) {`
`108`	`108`	`raw_f = raw;`
`109`	`109`	`raw_f += bias_[j] / act_times_w_scale_[Bq_zero_point_idx];`
`110`	`110`	`} else {`