[Kernel][Bugfix] Fixup some warnings in nvfp4_blockwise_moe when CUDA < 12.8 (#20324)

tlrmchlsmth · web-flow · commit 3be8d312a216 · 2025-07-01T18:05:47.000-07:00
Signed-off-by: Tyler Michael Smith &lt;tyler@neuralmagic.com&gt;
diff --git a/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu b/csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
@@ -335,8 +335,10 @@ void run_fp4_blockwise_scaled_group_mm(
   TORCH_CHECK(status == cutlass::Status::kSuccess, "Failed to run GEMM");
 }
 
+#if defined ENABLE_NVFP4 && ENABLE_NVFP4
 constexpr auto FLOAT4_E2M1X2 = at::ScalarType::Byte;
 constexpr auto SF_DTYPE = at::ScalarType::Float8_e4m3fn;
+#endif
 
 #define CHECK_TYPE(x, st, m) \
   TORCH_CHECK(x.scalar_type() == st, ": Inconsistency of Tensor type:", m)

Original file line number	Diff line number	Diff line change
`@@ -335,8 +335,10 @@ void run_fp4_blockwise_scaled_group_mm(`
`335`	`335`	`TORCH_CHECK(status == cutlass::Status::kSuccess, "Failed to run GEMM");`
`336`	`336`	`}`
`337`	`337`
	`338`	`+#if defined ENABLE_NVFP4 && ENABLE_NVFP4`
`338`	`339`	`constexpr auto FLOAT4_E2M1X2 = at::ScalarType::Byte;`
`339`	`340`	`constexpr auto SF_DTYPE = at::ScalarType::Float8_e4m3fn;`
	`341`	`+#endif`
`340`	`342`
`341`	`343`	`#define CHECK_TYPE(x, st, m) \`
`342`	`344`	`TORCH_CHECK(x.scalar_type() == st, ": Inconsistency of Tensor type:", m)`