properly skip float8 inference tests without fbgemm

vkuzo · vkuzo · commit 1ce8ecbbfd44 · 2025-10-29T08:05:41.000-07:00
Summary: Makes the float8 inference tests pass if `fbgemm_gpu_genai` is not installed, nice for local development. Test Plan: ``` pytest test/quantization/quantize_/workflows/float8/test_float8_tensor.py -s -x ``` Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 357c0eb ghstack-comment-id: 3460951577 Pull-Request: #3255
diff --git a/test/quantization/quantize_/workflows/float8/test_float8_tensor.py b/test/quantization/quantize_/workflows/float8/test_float8_tensor.py
@@ -294,6 +294,7 @@ def test_slice_and_copy_similar_to_vllm(self, granularity):
         self._test_slice_and_copy_similar_to_vllm(config)
 
     @unittest.skipIf(not is_sm_at_least_90(), "Nedd sm90+")
+    @unittest.skipIf(not _is_fbgemm_gpu_genai_available(), "Need fbgemm_gpu_genai")
     def test_bmm(self):
         # only support per row quantization
         config = Float8DynamicActivationFloat8WeightConfig(granularity=PerRow())
@@ -406,6 +407,7 @@ def test_cat(self, granularity, sizes):
         self.assertEqual(cat_qweight2.scale, ref_scale)
 
     @unittest.skipIf(not is_sm_at_least_90(), "Nedd sm90+")
+    @unittest.skipIf(not _is_fbgemm_gpu_genai_available(), "Need fbgemm_gpu_genai")
     def test_moe_weight_reshape_ops(self):
         # only per row quantization is supported for bmm
         granularity = PerRow()
@@ -416,6 +418,7 @@ def test_moe_weight_reshape_ops(self):
     # that should be moved here after v1 config is deprecated:
     # https://github.com/pytorch/ao/issues/2649
     @unittest.skipIf(not is_sm_at_least_90(), "Nedd sm90+")
+    @unittest.skipIf(not _is_fbgemm_gpu_genai_available(), "Need fbgemm_gpu_genai")
     def test_expected_gpu_kernel_fbgemm(self):
         """Making sure KernelPreference.FBGEMM calls correct quantize and gemm kernels
         and the bias add happens in the gemm kernel for per row quantization