[CI] Follow vLLM FusedMoEParallelConfig interface change and clean up unused config (#1625)

wangxiyuan · web-flow · commit 343955c7ac7c · 2025-07-04T17:54:33.000+08:00
This commit vllm-project/vllm@78fe775 from vllm reverted the change for FusedMoEParallelConfig This PR do the same to fix the CI error Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
diff --git a/tests/ut/test_ascend_config.py b/tests/ut/test_ascend_config.py
@@ -5,8 +5,8 @@
 from transformers import PretrainedConfig
 from vllm.config import ModelConfig, VllmConfig
 
-from vllm_ascend.ascend_config import (check_ascend_config,
-                                       check_torchair_supported,
+from vllm_ascend.ascend_config import (_check_torchair_supported,
+                                       check_ascend_config,
                                        clear_ascend_config, get_ascend_config,
                                        init_ascend_config)
 
@@ -248,5 +248,5 @@ def test_check_torchair_supported(self):
         test_cases = [('deepseek_v3', True), ('PanguProMoE', True),
                       ('qwen', False), ('llama', False)]
         for model_type, expected_output in test_cases:
-            self.assertEqual(check_torchair_supported(model_type),
+            self.assertEqual(_check_torchair_supported(model_type),
                              expected_output)
diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py
@@ -21,7 +21,7 @@
 TORCHAIR_MODEL_LIST = ["deepseek", "pangu"]
 
 
-def check_torchair_supported(model_type: str):
+def _check_torchair_supported(model_type: str):
     for supported_model in TORCHAIR_MODEL_LIST:
         if supported_model in model_type.lower():
             return True
@@ -147,10 +147,10 @@ def check_ascend_config(vllm_config, enforce_eager):
         else:
             # torchair_graph case
             if ascend_config.torchair_graph_config.enabled:
-                # torchair_graph is supported for deepseek model only currently.
+                # torchair_graph is supported for deepseek/pangu model only.
                 if vllm_config.model_config:
                     model_type = vllm_config.model_config.hf_config.model_type
-                    if not check_torchair_supported(model_type):
+                    if not _check_torchair_supported(model_type):
                         raise NotImplementedError(
                             "Torchair graph mode only works with following model types:"
                             f"{TORCHAIR_MODEL_LIST}.")
diff --git a/vllm_ascend/attention/attention_v1_torchair.py b/vllm_ascend/attention/attention_v1_torchair.py
@@ -27,7 +27,6 @@
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.worker.gpu_input_batch import InputBatch
 
-from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.attention.attention_v1 import AscendAttentionState
 from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
                                nd_to_nz_2d)
@@ -160,8 +159,6 @@ class AscendAttentionTorchairMetadataBuilder:
 
     def __init__(self, runner):
         self.runner = runner
-        self.torchair_graph_enabled = get_ascend_config(
-        ).torchair_graph_config.enabled
 
     def reorder_batch(self, input_batch: "InputBatch",
                       scheduler_output: "SchedulerOutput") -> bool:
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -26,8 +26,7 @@
 from vllm.distributed import (GroupCoordinator, get_tensor_model_parallel_rank,
                               get_tensor_model_parallel_world_size,
                               tensor_model_parallel_all_reduce)
-from vllm.distributed.parallel_state import (get_dp_group, get_tp_group,
-                                             get_world_group)
+from vllm.distributed.parallel_state import get_dp_group, get_tp_group
 from vllm.forward_context import get_forward_context
 from vllm.model_executor.layers.fused_moe.layer import (
     FusedMoE, UnquantizedFusedMoEMethod, determine_expert_map)
@@ -1119,21 +1118,12 @@ def __init__(
 
         vllm_config = get_current_vllm_config()
 
-        if vllm_version_is("0.9.1"):
-            self.moe_parallel_config = FusedMoEParallelConfig.make(
-                tp_size_=(tp_size if tp_size is not None else
-                          get_tensor_model_parallel_world_size()),
-                dp_size_=(dp_size if dp_size is not None else
-                          get_dp_group().world_size),
-                vllm_parallel_config=vllm_config.parallel_config)
-        else:
-            self.moe_parallel_config = FusedMoEParallelConfig.make(
-                tp_size_=(tp_size if tp_size is not None else
-                          get_tensor_model_parallel_world_size()),
-                dp_size_=(dp_size if dp_size is not None else
-                          get_dp_group().world_size),
-                world_size_=get_world_group().world_size,
-                vllm_parallel_config=vllm_config.parallel_config)
+        self.moe_parallel_config = FusedMoEParallelConfig.make(
+            tp_size_=(tp_size if tp_size is not None else
+                      get_tensor_model_parallel_world_size()),
+            dp_size_=(dp_size
+                      if dp_size is not None else get_dp_group().world_size),
+            vllm_parallel_config=vllm_config.parallel_config)
 
         self.top_k = top_k
         self.num_experts = num_experts