Fix unknown attribute of topk_indices_dtype in CompressedTensorsW8A8Fp8MoECutlassMethod (#20507)

luccafong · Lucia (Lu) Fang · web-flow · commit 8aeaa910a2c5 · 2025-07-05T14:03:20.000+08:00
Co-authored-by: Lucia (Lu) Fang &lt;fanglu@meta.com&gt;
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -368,6 +368,7 @@ def __init__(
             "weights")
         self.input_quant = self.quant_config.target_scheme_map["Linear"].get(
             "input_activations")
+        self.topk_indices_dtype = None
 
         per_tensor = (self.weight_quant.strategy == QuantizationStrategy.TENSOR
                       and self.input_quant.strategy
@@ -738,6 +739,7 @@ def __init__(
 
         from vllm.model_executor.layers.fused_moe.cutlass_moe import (
             cutlass_moe_fp8)
+        self.topk_indices_dtype = None
         self.fused_experts = cutlass_moe_fp8  # type: ignore
         self.disable_expert_map = False