Rename enable_multistream_shared_expert to enable_multistream_moe

sdmyzlp · sdmyzlp · commit 5ed12bbd3226 · 2025-06-07T15:02:49.000+08:00
Signed-off-by: sdmyzlp &lt;lrwei2@petalmail.com&gt;
diff --git a/docs/source/user_guide/additional_config.md b/docs/source/user_guide/additional_config.md
@@ -38,11 +38,11 @@ The details of each config option are as follows:
 | Name | Type | Default | Description |
 | ---- | ---- | ------- | ----------- |
 | `enabled` | bool | `False` | Whether to enable torchair graph mode |
+| `enable_multistream_moe`| bool | `False` | Whether to enable multistream shared expert |
 | `enable_view_optimize` | bool | `True` | Whether to enable torchair view optimization |
 | `use_cached_graph` | bool | `False` | Whether to use cached graph |
 | `graph_batch_sizes` | list[int] | `[]` | The batch size for torchair graph cache |
 | `graph_batch_sizes_init` | bool | `False` | Init graph batch size dynamically if `graph_batch_sizes` is empty |
-| `enable_multistream_shared_expert`| bool | `False` | Whether to enable multistream shared expert |
 
 **ascend_scheduler_config**
 
@@ -63,7 +63,7 @@ A full example of additional configuration is as follows:
         "use_cached_graph": true,
         "graph_batch_sizes": [1, 2, 4, 8],
         "graph_batch_sizes_init": false,
-        "enable_multistream_shared_expert": false
+        "enable_multistream_moe": false
     },
     "ascend_scheduler_config": {
         "enabled": true,
diff --git a/tests/singlecard/test_ascend_config.py b/tests/singlecard/test_ascend_config.py
@@ -58,7 +58,7 @@ def test_run_with_ascend_config():
             "use_cached_graph": True,
             "graph_batch_sizes": [1, 2, 4, 8],
             "graph_batch_sizes_init": False,
-            "enable_multistream_shared_expert": True,
+            "enable_multistream_moe": True,
         },
         "ascend_scheduler_config": {
             "enabled": True,
@@ -79,7 +79,7 @@ def test_run_with_ascend_config():
             1, 2, 4, 8
         ]
         assert not ascend_config.torchair_graph_config.graph_batch_sizes_init
-        assert ascend_config.torchair_graph_config.enable_multistream_shared_expert
+        assert ascend_config.torchair_graph_config.enable_multistream_moe
         assert ascend_config.ascend_scheduler_config.enabled
         assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
         assert ascend_config.expert_tensor_parallel_size == 1
diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py
@@ -53,8 +53,8 @@ def __init__(self, torchair_graph_config):
             "graph_batch_sizes", [])
         self.graph_batch_sizes_init = torchair_graph_config.get(
             "graph_batch_sizes_init", False)
-        self.enable_multistream_shared_expert = torchair_graph_config.get(
-            "enable_multistream_shared_expert", False)
+        self.enable_multistream_moe = torchair_graph_config.get(
+            "enable_multistream_moe", False)
         self.enable_view_optimize = torchair_graph_config.get(
             "enable_view_optimize", True)
 
diff --git a/vllm_ascend/models/deepseek_v2.py b/vllm_ascend/models/deepseek_v2.py
@@ -224,8 +224,8 @@ def __init__(
 
         ascend_config = get_ascend_config()
         self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
-        self.enable_multistream_shared_expert = \
-            ascend_config.torchair_graph_config.enable_multistream_shared_expert
+        self.enable_multistream_moe = \
+            ascend_config.torchair_graph_config.enable_multistream_moe
 
     def forward(
             self,
@@ -248,7 +248,7 @@ def forward(
 
         num_tokens, hidden_size = hidden_states.shape
 
-        multistream = self.enable_multistream_shared_expert and not is_prefill
+        multistream = self.enable_multistream_moe and not is_prefill
 
         old_hidden_states = hidden_states.clone()
 
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -1027,8 +1027,8 @@ def __init__(
 
         ascend_config = get_ascend_config()
         self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
-        self.enable_multistream_shared_expert = \
-            ascend_config.torchair_graph_config.enable_multistream_shared_expert
+        self.enable_multistream_moe = \
+            ascend_config.torchair_graph_config.enable_multistream_moe
 
         if self.scoring_func != "softmax" and not self.use_grouped_topk:
             raise ValueError("Only softmax scoring function is supported for "
@@ -1123,7 +1123,7 @@ def forward(self,
             enable_force_load_balance=enable_force_load_balance,
             **kwargs)
 
-        if self.enable_multistream_shared_expert and not is_prefill:
+        if self.enable_multistream_moe and not is_prefill:
             hidden_states, shared_output = hidden_states
 
         if self.dp_size > 1:
@@ -1148,6 +1148,6 @@ def forward(self,
         if self.reduce_results and (self.tp_size > 1 or self.ep_size > 1):
             hidden_states = tensor_model_parallel_all_reduce(hidden_states)
 
-        if self.enable_multistream_shared_expert and not is_prefill:
+        if self.enable_multistream_moe and not is_prefill:
             return hidden_states, shared_output
         return hidden_states