Skip to content

Commit 6ca83a4

Browse files
committed
Fix after rebase: cutlass_moe_fp8 signature is changed
Signed-off-by: Ming Yang <yming@meta.com>
1 parent 376bcde commit 6ca83a4

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -885,19 +885,25 @@ def apply(
885885
scoring_func=scoring_func,
886886
e_score_correction_bias=e_score_correction_bias)
887887

888+
a1_scale = layer.w13_input_scale
889+
a2_scale = layer.w2_input_scale
890+
per_act_token = a1_scale.numel() != 1 if a1_scale is not None else (
891+
a2_scale.numel() != 1 if a2_scale is not None else False)
892+
888893
return self.fused_experts(
889894
x,
890895
layer.w13_weight,
891896
layer.w2_weight,
892897
topk_weights,
893898
topk_ids,
899+
per_act_token=per_act_token,
894900
activation=activation,
895901
global_num_experts=global_num_experts,
896902
expert_map=None if self.disable_expert_map else expert_map,
897903
w1_scale=layer.w13_weight_scale,
898904
w2_scale=layer.w2_weight_scale,
899-
a1_scale=layer.w13_input_scale,
900-
a2_scale=layer.w2_input_scale,
905+
a1_scale=a1_scale,
906+
a2_scale=a2_scale,
901907
)
902908

903909

0 commit comments

Comments
 (0)