bugfix

Dingli · Dingli · commit f69fcb54ed55 · 2025-05-29T11:05:06.000+08:00
Signed-off-by: Dingli &lt;dingli40@huawei.com&gt;
diff --git a/vllm_ascend/models/deepseek_v2.py b/vllm_ascend/models/deepseek_v2.py
@@ -228,7 +228,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
 
         cv_parallel = VLLM_ENABLE_CV_PARALLEL and not is_prefill
 
-        if self.n_shared_experts is not None
+        if self.n_shared_experts is not None:
             if not cv_parallel:
                 shared_output = self.shared_experts(hidden_states)
             else:
diff --git a/vllm_ascend/quantization/w8a8_dynamic.py b/vllm_ascend/quantization/w8a8_dynamic.py
@@ -127,7 +127,7 @@ def apply_mlp(hidden_states_wrapper: List[torch.Tensor],
                 output_dtype=torch.bfloat16,
             )
             if shared_experts.down_proj.reduce_results and shared_experts.down_proj.tp_size > 1:
-                shared_output = tensor_model_parallel_all_reduce(x)
+                shared_output = tensor_model_parallel_all_reduce(shared_output)
     if shared_experts:
         return hidden_states, shared_output
     return hidden_states

Original file line number	Diff line number	Diff line change
`@@ -127,7 +127,7 @@ def apply_mlp(hidden_states_wrapper: List[torch.Tensor],`
`127`	`127`	`output_dtype=torch.bfloat16,`
`128`	`128`	`)`
`129`	`129`	`if shared_experts.down_proj.reduce_results and shared_experts.down_proj.tp_size > 1:`
`130`		`- shared_output = tensor_model_parallel_all_reduce(x)`
	`130`	`+ shared_output = tensor_model_parallel_all_reduce(shared_output)`
`131`	`131`	`if shared_experts:`
`132`	`132`	`return hidden_states, shared_output`
`133`	`133`	`return hidden_states`