add mc2 mask

weiguihua2 · weiguihua2 · commit c1979f40b6c5 · 2025-07-08T10:05:42.000+08:00
Signed-off-by: weiguihua2 &lt;weiguihua2@huawei.com&gt;
diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py
@@ -523,8 +523,8 @@ def build(
                                                   num_reqs_pad_size]
             else:
                 seq_lens_list = seq_lens.tolist()
-            mc2_mask = self.generate_active_mask(
-                num_actual_tokens, num_reqs + num_reqs_pad_size)
+            mc2_mask = self.generate_active_mask(num_actual_tokens,
+                                                 num_reqs + num_reqs_pad_size)
 
             decode_metadata = AscendMLADecodeMetadata(
                 input_positions=input_positions,
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -138,7 +138,7 @@ def fused_experts_with_mc2(
     # NOTE: Currently, when in A3 or in torchair graph, we need to pass in some extra param into dispatch & combine
     need_extra_args = (get_ascend_soc_version() == AscendSocVersion.A3
                        or is_torchair)
-    
+
     # NOTE: Currently, when in A3, we need to pass in some extra param into dispatch & combine
     a3_need_extra_args = get_ascend_soc_version() == AscendSocVersion.A3
 
@@ -1168,7 +1168,7 @@ def forward(self,
         if shared_experts:
             if not self.enable_multistream_moe or fused_moe_state != FusedMoEState.MC2:
                 shared_hidden_states = shared_experts(hidden_states)
-        
+
         attn_metadata = get_forward_context().attn_metadata
         mc2_mask = attn_metadata.decode.mc2_mask if attn_metadata is not None and attn_metadata.decode is not None else None
 
@@ -1180,8 +1180,8 @@ def forward(self,
                 router_logits = nn.functional.pad(
                     router_logits, (0, 0, 0, tp_size - num_tokens))
                 if mc2_mask is not None:
-                    mc2_mask = nn.functional.pad(
-                        mc2_mask, (0, tp_size - num_tokens))
+                    mc2_mask = nn.functional.pad(mc2_mask,
+                                                 (0, tp_size - num_tokens))
             chunk_hidden_states = torch.tensor_split(hidden_states,
                                                      tp_size,
                                                      dim=0)
diff --git a/vllm_ascend/quantization/w8a8_dynamic.py b/vllm_ascend/quantization/w8a8_dynamic.py
@@ -233,7 +233,7 @@ def fused_experts_with_mc2(
     # NOTE: Currently, when in A3 or in torchair graph, we need to pass in some extra param into dispatch & combine
     need_extra_args = (get_ascend_soc_version() == AscendSocVersion.A3
                        or is_torchair)
-    
+
     # NOTE: Currently, when in A3, we need to pass in some extra param into dispatch & combine
     a3_need_extra_args = get_ascend_soc_version() == AscendSocVersion.A3