fixing tests that aren't skipping

HDCharles · HDCharles · commit 8f6fdda02897 · 2025-05-08T11:07:18.000-07:00
Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/test/quantization/test_moe_quant.py b/test/quantization/test_moe_quant.py
@@ -92,15 +92,18 @@ def _test_impl_moe_quant(
         self.assertGreaterEqual(compute_error(out_q, out), 10)
         self.assertGreaterEqual(compute_error(out_qc, out), 10)
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")
     @parameterized.expand(
         [
             ("single_token", 1, False),
             ("multiple_tokens", 8, False),
         ]
     )
     def test_int4wo_fake_dim(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not TORCH_VERSION_AT_LEAST_2_5:
+            self.skipTest("Test only enabled for 2.5+")
+            
         config = MoEQuantConfig(Int4WeightOnlyConfig())
         tensor_impl_class = TensorCoreTiledAQTTensorImpl
 
@@ -111,16 +114,20 @@ def test_int4wo_fake_dim(self, name, num_tokens, fullgraph):
             fullgraph=fullgraph,
         )
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")
-    @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")
     @parameterized.expand(
         [
             ("single_token", 1, True),
             ("multiple_tokens", 8, False),
         ]
     )
     def test_int4wo_base(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not is_sm_at_least_90():
+            self.skipTest("Requires CUDA capability >= 9.0")
+        if not TORCH_VERSION_AT_LEAST_2_5:
+            self.skipTest("Test only enabled for 2.5+")
+            
         config = Int4WeightOnlyConfig()
         tensor_impl_class = TensorCoreTiledAQTTensorImpl
 
@@ -131,15 +138,18 @@ def test_int4wo_base(self, name, num_tokens, fullgraph):
             fullgraph=fullgraph,
         )
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")
     @parameterized.expand(
         [
             ("single_token", 1, False),
             ("multiple_tokens", 8, False),
         ]
     )
     def test_int8wo_fake_dim(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not TORCH_VERSION_AT_LEAST_2_5:
+            self.skipTest("Test only enabled for 2.5+")
+            
         config = MoEQuantConfig(Int8WeightOnlyConfig())
         tensor_impl_class = PlainAQTTensorImpl
 
@@ -150,15 +160,18 @@ def test_int8wo_fake_dim(self, name, num_tokens, fullgraph):
             fullgraph=fullgraph,
         )
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")
     @parameterized.expand(
         [
             ("single_token", 1, True),
             ("multiple_tokens", 8, False),
         ]
     )
     def test_int8wo_base(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not TORCH_VERSION_AT_LEAST_2_5:
+            self.skipTest("Test only enabled for 2.5+")
+            
         config = Int8WeightOnlyConfig()
         tensor_impl_class = PlainAQTTensorImpl
 
@@ -169,14 +182,16 @@ def test_int8wo_base(self, name, num_tokens, fullgraph):
             fullgraph=fullgraph,
         )
 
-    @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")
     @parameterized.expand(
         [
             ("single_token", 1, True),
             ("multiple_tokens", 8, False),
         ]
     )
     def test_int8wo_base_cpu(self, name, num_tokens, fullgraph):
+        if not TORCH_VERSION_AT_LEAST_2_5:
+            self.skipTest("Test only enabled for 2.5+")
+            
         config = Int8WeightOnlyConfig()
         tensor_impl_class = PlainAQTTensorImpl
 
@@ -188,14 +203,17 @@ def test_int8wo_base_cpu(self, name, num_tokens, fullgraph):
             device="cpu",
         )
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")
     @parameterized.expand(
         [
             ("multiple_tokens", 32, False),
         ]
     )
     def test_int8dq_fake_dim(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not TORCH_VERSION_AT_LEAST_2_5:
+            self.skipTest("Test only enabled for 2.5+")
+            
         config = MoEQuantConfig(Int8DynamicActivationInt8WeightConfig())
         base_class = LinearActivationQuantizedTensor
 
@@ -207,14 +225,17 @@ def test_int8dq_fake_dim(self, name, num_tokens, fullgraph):
             fullgraph=fullgraph,
         )
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")
     @parameterized.expand(
         [
             ("multiple_tokens", 32, False),
         ]
     )
     def test_int8dq_base(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not TORCH_VERSION_AT_LEAST_2_5:
+            self.skipTest("Test only enabled for 2.5+")
+            
         config = Int8DynamicActivationInt8WeightConfig()
         base_class = LinearActivationQuantizedTensor
 
@@ -226,15 +247,18 @@ def test_int8dq_base(self, name, num_tokens, fullgraph):
             fullgraph=fullgraph,
         )
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")
     @parameterized.expand(
         [
             ("single_token", 1, False),
             ("multiple_tokens", 8, False),
         ]
     )
     def test_fp8wo_fake_dim(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not is_sm_at_least_90():
+            self.skipTest("Requires CUDA capability >= 9.0")
+            
         config = MoEQuantConfig(Float8WeightOnlyConfig())
         tensor_impl_class = Float8AQTTensorImpl
 
@@ -245,15 +269,18 @@ def test_fp8wo_fake_dim(self, name, num_tokens, fullgraph):
             fullgraph=fullgraph,
         )
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")
     @parameterized.expand(
         [
             ("single_token", 1, True),
             ("multiple_tokens", 8, False),
         ]
     )
     def test_fp8wo_base(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not is_sm_at_least_90():
+            self.skipTest("Requires CUDA capability >= 9.0")
+            
         config = Float8WeightOnlyConfig()
         tensor_impl_class = Float8AQTTensorImpl
 
@@ -264,15 +291,18 @@ def test_fp8wo_base(self, name, num_tokens, fullgraph):
             fullgraph=fullgraph,
         )
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")
     @parameterized.expand(
         [
             ("single_token", 1, False),
             ("multiple_tokens", 8, False),
         ]
     )
     def test_fp8dq_fake_dim(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not is_sm_at_least_90():
+            self.skipTest("Requires CUDA capability >= 9.0")
+            
         config = MoEQuantConfig(Float8DynamicActivationFloat8WeightConfig())
         base_class = LinearActivationQuantizedTensor
 
@@ -283,15 +313,18 @@ def test_fp8dq_fake_dim(self, name, num_tokens, fullgraph):
             fullgraph=fullgraph,
         )
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")
     @parameterized.expand(
         [
             ("single_token", 1, True),
             ("multiple_tokens", 8, False),
         ]
     )
     def test_fp8dq_base(self, name, num_tokens, fullgraph):
+        if not torch.cuda.is_available():
+            self.skipTest("Need CUDA available")
+        if not is_sm_at_least_90():
+            self.skipTest("Requires CUDA capability >= 9.0")
+            
         config = Float8DynamicActivationFloat8WeightConfig()
         base_class = LinearActivationQuantizedTensor
 

Original file line number	Diff line number	Diff line change
`@@ -92,15 +92,18 @@ def _test_impl_moe_quant(`
`92`	`92`	`self.assertGreaterEqual(compute_error(out_q, out), 10)`
`93`	`93`	`self.assertGreaterEqual(compute_error(out_qc, out), 10)`
`94`	`94`
`95`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`96`		`- @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")`
`97`	`95`	`@parameterized.expand(`
`98`	`96`	`[`
`99`	`97`	`("single_token", 1, False),`
`100`	`98`	`("multiple_tokens", 8, False),`
`101`	`99`	`]`
`102`	`100`	`)`
`103`	`101`	`def test_int4wo_fake_dim(self, name, num_tokens, fullgraph):`
	`102`	`+ if not torch.cuda.is_available():`
	`103`	`+ self.skipTest("Need CUDA available")`
	`104`	`+ if not TORCH_VERSION_AT_LEAST_2_5:`
	`105`	`+ self.skipTest("Test only enabled for 2.5+")`
	`106`	`+`
`104`	`107`	`config = MoEQuantConfig(Int4WeightOnlyConfig())`
`105`	`108`	`tensor_impl_class = TensorCoreTiledAQTTensorImpl`
`106`	`109`
`@@ -111,16 +114,20 @@ def test_int4wo_fake_dim(self, name, num_tokens, fullgraph):`
`111`	`114`	`fullgraph=fullgraph,`
`112`	`115`	`)`
`113`	`116`
`114`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`115`		`- @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")`
`116`		`- @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")`
`117`	`117`	`@parameterized.expand(`
`118`	`118`	`[`
`119`	`119`	`("single_token", 1, True),`
`120`	`120`	`("multiple_tokens", 8, False),`
`121`	`121`	`]`
`122`	`122`	`)`
`123`	`123`	`def test_int4wo_base(self, name, num_tokens, fullgraph):`
	`124`	`+ if not torch.cuda.is_available():`
	`125`	`+ self.skipTest("Need CUDA available")`
	`126`	`+ if not is_sm_at_least_90():`
	`127`	`+ self.skipTest("Requires CUDA capability >= 9.0")`
	`128`	`+ if not TORCH_VERSION_AT_LEAST_2_5:`
	`129`	`+ self.skipTest("Test only enabled for 2.5+")`
	`130`	`+`
`124`	`131`	`config = Int4WeightOnlyConfig()`
`125`	`132`	`tensor_impl_class = TensorCoreTiledAQTTensorImpl`
`126`	`133`
`@@ -131,15 +138,18 @@ def test_int4wo_base(self, name, num_tokens, fullgraph):`
`131`	`138`	`fullgraph=fullgraph,`
`132`	`139`	`)`
`133`	`140`
`134`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`135`		`- @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")`
`136`	`141`	`@parameterized.expand(`
`137`	`142`	`[`
`138`	`143`	`("single_token", 1, False),`
`139`	`144`	`("multiple_tokens", 8, False),`
`140`	`145`	`]`
`141`	`146`	`)`
`142`	`147`	`def test_int8wo_fake_dim(self, name, num_tokens, fullgraph):`
	`148`	`+ if not torch.cuda.is_available():`
	`149`	`+ self.skipTest("Need CUDA available")`
	`150`	`+ if not TORCH_VERSION_AT_LEAST_2_5:`
	`151`	`+ self.skipTest("Test only enabled for 2.5+")`
	`152`	`+`
`143`	`153`	`config = MoEQuantConfig(Int8WeightOnlyConfig())`
`144`	`154`	`tensor_impl_class = PlainAQTTensorImpl`
`145`	`155`
`@@ -150,15 +160,18 @@ def test_int8wo_fake_dim(self, name, num_tokens, fullgraph):`
`150`	`160`	`fullgraph=fullgraph,`
`151`	`161`	`)`
`152`	`162`
`153`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`154`		`- @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")`
`155`	`163`	`@parameterized.expand(`
`156`	`164`	`[`
`157`	`165`	`("single_token", 1, True),`
`158`	`166`	`("multiple_tokens", 8, False),`
`159`	`167`	`]`
`160`	`168`	`)`
`161`	`169`	`def test_int8wo_base(self, name, num_tokens, fullgraph):`
	`170`	`+ if not torch.cuda.is_available():`
	`171`	`+ self.skipTest("Need CUDA available")`
	`172`	`+ if not TORCH_VERSION_AT_LEAST_2_5:`
	`173`	`+ self.skipTest("Test only enabled for 2.5+")`
	`174`	`+`
`162`	`175`	`config = Int8WeightOnlyConfig()`
`163`	`176`	`tensor_impl_class = PlainAQTTensorImpl`
`164`	`177`
`@@ -169,14 +182,16 @@ def test_int8wo_base(self, name, num_tokens, fullgraph):`
`169`	`182`	`fullgraph=fullgraph,`
`170`	`183`	`)`
`171`	`184`
`172`		`- @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")`
`173`	`185`	`@parameterized.expand(`
`174`	`186`	`[`
`175`	`187`	`("single_token", 1, True),`
`176`	`188`	`("multiple_tokens", 8, False),`
`177`	`189`	`]`
`178`	`190`	`)`
`179`	`191`	`def test_int8wo_base_cpu(self, name, num_tokens, fullgraph):`
	`192`	`+ if not TORCH_VERSION_AT_LEAST_2_5:`
	`193`	`+ self.skipTest("Test only enabled for 2.5+")`
	`194`	`+`
`180`	`195`	`config = Int8WeightOnlyConfig()`
`181`	`196`	`tensor_impl_class = PlainAQTTensorImpl`
`182`	`197`
`@@ -188,14 +203,17 @@ def test_int8wo_base_cpu(self, name, num_tokens, fullgraph):`
`188`	`203`	`device="cpu",`
`189`	`204`	`)`
`190`	`205`
`191`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`192`		`- @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")`
`193`	`206`	`@parameterized.expand(`
`194`	`207`	`[`
`195`	`208`	`("multiple_tokens", 32, False),`
`196`	`209`	`]`
`197`	`210`	`)`
`198`	`211`	`def test_int8dq_fake_dim(self, name, num_tokens, fullgraph):`
	`212`	`+ if not torch.cuda.is_available():`
	`213`	`+ self.skipTest("Need CUDA available")`
	`214`	`+ if not TORCH_VERSION_AT_LEAST_2_5:`
	`215`	`+ self.skipTest("Test only enabled for 2.5+")`
	`216`	`+`
`199`	`217`	`config = MoEQuantConfig(Int8DynamicActivationInt8WeightConfig())`
`200`	`218`	`base_class = LinearActivationQuantizedTensor`
`201`	`219`
`@@ -207,14 +225,17 @@ def test_int8dq_fake_dim(self, name, num_tokens, fullgraph):`
`207`	`225`	`fullgraph=fullgraph,`
`208`	`226`	`)`
`209`	`227`
`210`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`211`		`- @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_5, "Test only enabled for 2.5+")`
`212`	`228`	`@parameterized.expand(`
`213`	`229`	`[`
`214`	`230`	`("multiple_tokens", 32, False),`
`215`	`231`	`]`
`216`	`232`	`)`
`217`	`233`	`def test_int8dq_base(self, name, num_tokens, fullgraph):`
	`234`	`+ if not torch.cuda.is_available():`
	`235`	`+ self.skipTest("Need CUDA available")`
	`236`	`+ if not TORCH_VERSION_AT_LEAST_2_5:`
	`237`	`+ self.skipTest("Test only enabled for 2.5+")`
	`238`	`+`
`218`	`239`	`config = Int8DynamicActivationInt8WeightConfig()`
`219`	`240`	`base_class = LinearActivationQuantizedTensor`
`220`	`241`
`@@ -226,15 +247,18 @@ def test_int8dq_base(self, name, num_tokens, fullgraph):`
`226`	`247`	`fullgraph=fullgraph,`
`227`	`248`	`)`
`228`	`249`
`229`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`230`		`- @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")`
`231`	`250`	`@parameterized.expand(`
`232`	`251`	`[`
`233`	`252`	`("single_token", 1, False),`
`234`	`253`	`("multiple_tokens", 8, False),`
`235`	`254`	`]`
`236`	`255`	`)`
`237`	`256`	`def test_fp8wo_fake_dim(self, name, num_tokens, fullgraph):`
	`257`	`+ if not torch.cuda.is_available():`
	`258`	`+ self.skipTest("Need CUDA available")`
	`259`	`+ if not is_sm_at_least_90():`
	`260`	`+ self.skipTest("Requires CUDA capability >= 9.0")`
	`261`	`+`
`238`	`262`	`config = MoEQuantConfig(Float8WeightOnlyConfig())`
`239`	`263`	`tensor_impl_class = Float8AQTTensorImpl`
`240`	`264`
`@@ -245,15 +269,18 @@ def test_fp8wo_fake_dim(self, name, num_tokens, fullgraph):`
`245`	`269`	`fullgraph=fullgraph,`
`246`	`270`	`)`
`247`	`271`
`248`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`249`		`- @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")`
`250`	`272`	`@parameterized.expand(`
`251`	`273`	`[`
`252`	`274`	`("single_token", 1, True),`
`253`	`275`	`("multiple_tokens", 8, False),`
`254`	`276`	`]`
`255`	`277`	`)`
`256`	`278`	`def test_fp8wo_base(self, name, num_tokens, fullgraph):`
	`279`	`+ if not torch.cuda.is_available():`
	`280`	`+ self.skipTest("Need CUDA available")`
	`281`	`+ if not is_sm_at_least_90():`
	`282`	`+ self.skipTest("Requires CUDA capability >= 9.0")`
	`283`	`+`
`257`	`284`	`config = Float8WeightOnlyConfig()`
`258`	`285`	`tensor_impl_class = Float8AQTTensorImpl`
`259`	`286`
`@@ -264,15 +291,18 @@ def test_fp8wo_base(self, name, num_tokens, fullgraph):`
`264`	`291`	`fullgraph=fullgraph,`
`265`	`292`	`)`
`266`	`293`
`267`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`268`		`- @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")`
`269`	`294`	`@parameterized.expand(`
`270`	`295`	`[`
`271`	`296`	`("single_token", 1, False),`
`272`	`297`	`("multiple_tokens", 8, False),`
`273`	`298`	`]`
`274`	`299`	`)`
`275`	`300`	`def test_fp8dq_fake_dim(self, name, num_tokens, fullgraph):`
	`301`	`+ if not torch.cuda.is_available():`
	`302`	`+ self.skipTest("Need CUDA available")`
	`303`	`+ if not is_sm_at_least_90():`
	`304`	`+ self.skipTest("Requires CUDA capability >= 9.0")`
	`305`	`+`
`276`	`306`	`config = MoEQuantConfig(Float8DynamicActivationFloat8WeightConfig())`
`277`	`307`	`base_class = LinearActivationQuantizedTensor`
`278`	`308`
`@@ -283,15 +313,18 @@ def test_fp8dq_fake_dim(self, name, num_tokens, fullgraph):`
`283`	`313`	`fullgraph=fullgraph,`
`284`	`314`	`)`
`285`	`315`
`286`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`287`		`- @unittest.skipIf(not is_sm_at_least_90(), "Requires CUDA capability >= 9.0")`
`288`	`316`	`@parameterized.expand(`
`289`	`317`	`[`
`290`	`318`	`("single_token", 1, True),`
`291`	`319`	`("multiple_tokens", 8, False),`
`292`	`320`	`]`
`293`	`321`	`)`
`294`	`322`	`def test_fp8dq_base(self, name, num_tokens, fullgraph):`
	`323`	`+ if not torch.cuda.is_available():`
	`324`	`+ self.skipTest("Need CUDA available")`
	`325`	`+ if not is_sm_at_least_90():`
	`326`	`+ self.skipTest("Requires CUDA capability >= 9.0")`
	`327`	`+`
`295`	`328`	`config = Float8DynamicActivationFloat8WeightConfig()`
`296`	`329`	`base_class = LinearActivationQuantizedTensor`
`297`	`330`