File tree Expand file tree Collapse file tree 1 file changed +8
-2
lines changed
vllm/model_executor/layers/quantization/compressed_tensors Expand file tree Collapse file tree 1 file changed +8
-2
lines changed Original file line number Diff line number Diff line change @@ -885,19 +885,25 @@ def apply(
885
885
scoring_func = scoring_func ,
886
886
e_score_correction_bias = e_score_correction_bias )
887
887
888
+ a1_scale = layer .w13_input_scale
889
+ a2_scale = layer .w2_input_scale
890
+ per_act_token = a1_scale .numel () != 1 if a1_scale is not None else (
891
+ a2_scale .numel () != 1 if a2_scale is not None else False )
892
+
888
893
return self .fused_experts (
889
894
x ,
890
895
layer .w13_weight ,
891
896
layer .w2_weight ,
892
897
topk_weights ,
893
898
topk_ids ,
899
+ per_act_token = per_act_token ,
894
900
activation = activation ,
895
901
global_num_experts = global_num_experts ,
896
902
expert_map = None if self .disable_expert_map else expert_map ,
897
903
w1_scale = layer .w13_weight_scale ,
898
904
w2_scale = layer .w2_weight_scale ,
899
- a1_scale = layer . w13_input_scale ,
900
- a2_scale = layer . w2_input_scale ,
905
+ a1_scale = a1_scale ,
906
+ a2_scale = a2_scale ,
901
907
)
902
908
903
909
You can’t perform that action at this time.
0 commit comments