fix: fix deepseek accuracy when ep_size=1

zzzzwwjj · whx-sjtu · commit f05ee4638aff · 2025-05-20T22:49:31.000+08:00
Signed-off-by: zzzzwwjj &lt;1183291235@qq.com&gt;
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -198,6 +198,7 @@ def fused_experts(
     num_experts = w1.shape[0]
     dtype = hidden_states.dtype
     device = hidden_states.device
+    topk_weights = topk_weights.to(dtype)
     # assert dtype in [torch.float32, torch.float16, torch.bfloat16
     #                  ], "Only float32, float16, and bfloat16 are supported"
 
diff --git a/vllm_ascend/quantization/w8a8_dynamic.py b/vllm_ascend/quantization/w8a8_dynamic.py
@@ -342,6 +342,7 @@ def fused_experts(hidden_states: torch.Tensor,
     num_experts = w1.shape[0]
     dtype = hidden_states.dtype
     device = hidden_states.device
+    topk_weights = topk_weights.to(dtype)
 
     if expert_map is not None:
         # Generate token indices and flatten