fixing CI

HDCharles · HDCharles · commit c2660fdf1fe7 · 2025-05-08T11:07:18.000-07:00
Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/torchao/_models/mixtral-moe/README.md b/torchao/_models/mixtral-moe/README.md
@@ -0,0 +1,3 @@
+This is the benchmarking setup primarily used for testing quantized moe. You can reproduce the above numbers by running
+
+`sh scripts/prepare.sh`
diff --git a/torchao/_models/mixtral-moe/scripts/download.py b/torchao/_models/mixtral-moe/scripts/download.py
@@ -37,7 +37,7 @@ def hf_download(repo_id: Optional[str] = None, hf_token: Optional[str] = None) -
     parser.add_argument(
         "--repo_id",
         type=str,
-        default="checkpoints/mistralai/Mixtral-8x7B-Instruct-v0.1",
+        default="mistralai/Mixtral-8x7B-Instruct-v0.1",
         help="Repository ID to download from.",
     )
     parser.add_argument(
diff --git a/torchao/_models/mixtral-moe/scripts/prepare.sh b/torchao/_models/mixtral-moe/scripts/prepare.sh
@@ -0,0 +1,2 @@
+python scripts/download.py --repo_id mistralai/Mixtral-8x7B-Instruct-v0.1
+python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/mistralai/Mixtral-8x7B-v0.1
diff --git a/torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py b/torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py
@@ -646,7 +646,9 @@ def test_moe_quant_intx(self):
         from torchao.quantization.utils import compute_error
 
         with torch.device("cpu"):
-            model = MOEFeedForwardAOQuantizable(512, 256, 8, 2, empty_init=False).to(torch.bfloat16)
+            model = MOEFeedForwardAOQuantizable(512, 256, 8, 2, empty_init=False).to(
+                torch.bfloat16
+            )
             x = torch.randn(8, 512, dtype=torch.bfloat16)
 
         out = model(x).clone()
diff --git a/torchao/quantization/prototype/moe_quant/quantizable_moe_modules.py b/torchao/quantization/prototype/moe_quant/quantizable_moe_modules.py
@@ -120,8 +120,10 @@ def forward(
             ordered_token_indices = (
                 ordered_token_activations.div(top_k).floor().to(torch.int64)
             )  #  [T]
-            if not expert_indices.is_cuda: # histc doesn't work on cpu for integers
-                num_tokens_per_expert = torch.bincount(expert_indices.view(-1)+1, minlength=self.num_experts+1)
+            if not expert_indices.is_cuda:  # histc doesn't work on cpu for integers
+                num_tokens_per_expert = torch.bincount(
+                    expert_indices.view(-1) + 1, minlength=self.num_experts + 1
+                )
             else:
                 num_tokens_per_expert = torch.histc(
                     expert_indices,

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+This is the benchmarking setup primarily used for testing quantized moe. You can reproduce the above numbers by running`
	`2`	`+`
	`3`	+`sh scripts/prepare.sh`
Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ def hf_download(repo_id: Optional[str] = None, hf_token: Optional[str] = None) -`
`37`	`37`	`parser.add_argument(`
`38`	`38`	`"--repo_id",`
`39`	`39`	`type=str,`
`40`		`- default="checkpoints/mistralai/Mixtral-8x7B-Instruct-v0.1",`
	`40`	`+ default="mistralai/Mixtral-8x7B-Instruct-v0.1",`
`41`	`41`	`help="Repository ID to download from.",`
`42`	`42`	`)`
`43`	`43`	`parser.add_argument(`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+python scripts/download.py --repo_id mistralai/Mixtral-8x7B-Instruct-v0.1`
	`2`	`+python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/mistralai/Mixtral-8x7B-v0.1`