@@ -103,7 +103,7 @@ def test_int4wo_fake_dim(self, name, num_tokens, fullgraph):
103
103
self .skipTest ("Need CUDA available" )
104
104
if not TORCH_VERSION_AT_LEAST_2_5 :
105
105
self .skipTest ("Test only enabled for 2.5+" )
106
-
106
+
107
107
config = MoEQuantConfig (Int4WeightOnlyConfig ())
108
108
tensor_impl_class = TensorCoreTiledAQTTensorImpl
109
109
@@ -127,7 +127,7 @@ def test_int4wo_base(self, name, num_tokens, fullgraph):
127
127
self .skipTest ("Requires CUDA capability >= 9.0" )
128
128
if not TORCH_VERSION_AT_LEAST_2_5 :
129
129
self .skipTest ("Test only enabled for 2.5+" )
130
-
130
+
131
131
config = Int4WeightOnlyConfig ()
132
132
tensor_impl_class = TensorCoreTiledAQTTensorImpl
133
133
@@ -149,7 +149,7 @@ def test_int8wo_fake_dim(self, name, num_tokens, fullgraph):
149
149
self .skipTest ("Need CUDA available" )
150
150
if not TORCH_VERSION_AT_LEAST_2_5 :
151
151
self .skipTest ("Test only enabled for 2.5+" )
152
-
152
+
153
153
config = MoEQuantConfig (Int8WeightOnlyConfig ())
154
154
tensor_impl_class = PlainAQTTensorImpl
155
155
@@ -171,7 +171,7 @@ def test_int8wo_base(self, name, num_tokens, fullgraph):
171
171
self .skipTest ("Need CUDA available" )
172
172
if not TORCH_VERSION_AT_LEAST_2_5 :
173
173
self .skipTest ("Test only enabled for 2.5+" )
174
-
174
+
175
175
config = Int8WeightOnlyConfig ()
176
176
tensor_impl_class = PlainAQTTensorImpl
177
177
@@ -191,7 +191,7 @@ def test_int8wo_base(self, name, num_tokens, fullgraph):
191
191
def test_int8wo_base_cpu (self , name , num_tokens , fullgraph ):
192
192
if not TORCH_VERSION_AT_LEAST_2_5 :
193
193
self .skipTest ("Test only enabled for 2.5+" )
194
-
194
+
195
195
config = Int8WeightOnlyConfig ()
196
196
tensor_impl_class = PlainAQTTensorImpl
197
197
@@ -213,7 +213,7 @@ def test_int8dq_fake_dim(self, name, num_tokens, fullgraph):
213
213
self .skipTest ("Need CUDA available" )
214
214
if not TORCH_VERSION_AT_LEAST_2_5 :
215
215
self .skipTest ("Test only enabled for 2.5+" )
216
-
216
+
217
217
config = MoEQuantConfig (Int8DynamicActivationInt8WeightConfig ())
218
218
base_class = LinearActivationQuantizedTensor
219
219
@@ -235,7 +235,7 @@ def test_int8dq_base(self, name, num_tokens, fullgraph):
235
235
self .skipTest ("Need CUDA available" )
236
236
if not TORCH_VERSION_AT_LEAST_2_5 :
237
237
self .skipTest ("Test only enabled for 2.5+" )
238
-
238
+
239
239
config = Int8DynamicActivationInt8WeightConfig ()
240
240
base_class = LinearActivationQuantizedTensor
241
241
@@ -258,7 +258,7 @@ def test_fp8wo_fake_dim(self, name, num_tokens, fullgraph):
258
258
self .skipTest ("Need CUDA available" )
259
259
if not is_sm_at_least_90 ():
260
260
self .skipTest ("Requires CUDA capability >= 9.0" )
261
-
261
+
262
262
config = MoEQuantConfig (Float8WeightOnlyConfig ())
263
263
tensor_impl_class = Float8AQTTensorImpl
264
264
@@ -280,7 +280,7 @@ def test_fp8wo_base(self, name, num_tokens, fullgraph):
280
280
self .skipTest ("Need CUDA available" )
281
281
if not is_sm_at_least_90 ():
282
282
self .skipTest ("Requires CUDA capability >= 9.0" )
283
-
283
+
284
284
config = Float8WeightOnlyConfig ()
285
285
tensor_impl_class = Float8AQTTensorImpl
286
286
@@ -302,7 +302,7 @@ def test_fp8dq_fake_dim(self, name, num_tokens, fullgraph):
302
302
self .skipTest ("Need CUDA available" )
303
303
if not is_sm_at_least_90 ():
304
304
self .skipTest ("Requires CUDA capability >= 9.0" )
305
-
305
+
306
306
config = MoEQuantConfig (Float8DynamicActivationFloat8WeightConfig ())
307
307
base_class = LinearActivationQuantizedTensor
308
308
@@ -324,7 +324,7 @@ def test_fp8dq_base(self, name, num_tokens, fullgraph):
324
324
self .skipTest ("Need CUDA available" )
325
325
if not is_sm_at_least_90 ():
326
326
self .skipTest ("Requires CUDA capability >= 9.0" )
327
-
327
+
328
328
config = Float8DynamicActivationFloat8WeightConfig ()
329
329
base_class = LinearActivationQuantizedTensor
330
330
0 commit comments