[Bugfix] Fix topk_ids indices_type for cutlass w8a8 fp8 moe

minosfuture · minosfuture · commit b76ba2e6d8f3 · 2025-06-24T14:59:58.000-07:00
Signed-off-by: Ming Yang &lt;yming@meta.com&gt;
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -605,8 +605,7 @@ def apply(
             num_expert_group=num_expert_group,
             custom_routing_function=custom_routing_function,
             scoring_func=scoring_func,
-            e_score_correction_bias=e_score_correction_bias,
-            indices_type=torch.uint32)
+            e_score_correction_bias=e_score_correction_bias)
 
         return self.fused_experts(
             x,