fixing generate.py device stuff

HDCharles · HDCharles · commit 89ec74b26dd9 · 2025-05-08T11:07:18.000-07:00
Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/torchao/_models/mixtral-moe/generate.py b/torchao/_models/mixtral-moe/generate.py
@@ -271,10 +271,10 @@ def main(
             config = Int4WeightOnlyConfig()
 
         elif "int4wo" in moe_quant:
-            config = MoEQuantConfig(Float8WeightOnlyConfig())
+            config = MoEQuantConfig(Int4WeightOnlyConfig())
 
         elif "fp8wo-base" in moe_quant:
-            config = Int4WeightOnlyConfig()
+            config = Float8WeightOnlyConfig()
 
         elif "fp8wo" in moe_quant:
             config = MoEQuantConfig(Float8WeightOnlyConfig())
@@ -297,7 +297,7 @@ def main(
             )
 
         if config is not None:
-            quantize_(model, config, filter_fn=cond_ffn_filter)
+            quantize_(model, config, filter_fn=cond_ffn_filter, device=device)
             print(
                 f"Time to apply quantization to model: {time.time() - t0:.02f} seconds"
             )
@@ -392,10 +392,10 @@ def callback(x):
         tokens_generated = y.size(-1) - prompt_length
         tokens_sec = tokens_generated / t
         aggregate_metrics["tokens_per_sec"].append(tokens_sec)
-        print(
-            f"Time for inference {i + 1}: {t:.02f} sec total, {tokens_sec:.02f} tokens/sec"
-        )
-        print(f"Bandwidth achieved: {model_size * tokens_sec / 1e9:.02f} GB/s")
+        # print(
+        #     f"Time for inference {i + 1}: {t:.02f} sec total, {tokens_sec:.02f} tokens/sec"
+        # )
+        # print(f"Bandwidth achieved: {model_size * tokens_sec / 1e9:.02f} GB/s")
 
         if i == 0 and device == "cuda" and memory_profile is not None:
             snapshot = torch.cuda.memory._snapshot()
diff --git a/torchao/dtypes/floatx/float8_layout.py b/torchao/dtypes/floatx/float8_layout.py
@@ -159,7 +159,7 @@ def __torch_dispatch__(cls, func, types, args, kwargs):
             raise ValueError(
                 f"Not supported args for copy_ due to metadata mistach: {args[0], args[1]}"
             )
-        elif func in [aten.select.int, func is aten.index.Tensor]:
+        elif func in [aten.select.int, aten.index.Tensor]:
             return return_and_correct_aliasing(
                 func,
                 args,

Original file line number	Diff line number	Diff line change
`@@ -159,7 +159,7 @@ def __torch_dispatch__(cls, func, types, args, kwargs):`
`159`	`159`	`raise ValueError(`
`160`	`160`	`f"Not supported args for copy_ due to metadata mistach: {args[0], args[1]}"`
`161`	`161`	`)`
`162`		`- elif func in [aten.select.int, func is aten.index.Tensor]:`
	`162`	`+ elif func in [aten.select.int, aten.index.Tensor]:`
`163`	`163`	`return return_and_correct_aliasing(`
`164`	`164`	`func,`
`165`	`165`	`args,`