improve the implement of communication between main process and eplb process

wanghanqingLYT · yangcheng (AJ) · commit 2c0fde816dc5 · 2025-07-07T23:45:44.000+08:00
diff --git a/vllm_ascend/eplb/adaptor/vllm_adaptor.py b/vllm_ascend/eplb/adaptor/vllm_adaptor.py
@@ -32,7 +32,7 @@ def __init__(self, model, **args):
         self.param_dict = dict(self.model.named_parameters())
         self.num_dense_layers = self.model.config.first_k_dense_replace
         self.num_moe_layers = self.model.config.num_hidden_layers - self.num_dense_layers
-        self.global_expert_num = 256
+        self.global_expert_num = self.model.config.n_routed_experts
 
         # TODO: init self.expert_weight_names depending on different model types, only deepseek v3 w8a8 is supported here
         self.expert_weight_names = ["w13_weight", "w2_weight", "w13_weight_scale", "w13_weight_offset",
@@ -92,7 +92,7 @@ def do_update_expert_weight(self, layer_id, expert_id_before_replace, buffer_ten
             expert_tensor = self.param_dict[complete_name].data[local_expert_id]
             expert_tensor.copy_(self.buffer_tensor_dict[name][buffer_tensor_id])
 
-    def generate_index_dicts(self,tensor_2d):
+    def generate_index_dicts(self, tensor_2d):
         dict_list = []
         current_idx = 0
 
@@ -137,7 +137,7 @@ def do_update_log2phy_map(self, layer_id, updated_log2phy_map):
         rank_id = torch.distributed.get_rank()
         if self.log2phy_map_per_layer[layer_id] is not None:
             self.log2phy_map_per_layer[layer_id].copy_(updated_log2phy_map[rank_id])
-    
+
     def global2local(self,
         placement: torch.Tensor,
         E_local: int
diff --git a/vllm_ascend/eplb/core/policy/policy_factory.py b/vllm_ascend/eplb/core/policy/policy_factory.py
@@ -10,6 +10,6 @@ class PolicyFactory:
     def generate_policy(policy_type: int, config: DynamicConfig) -> EplbPolicy:
         policy = {
             0:MockLoadBalance ,  # MockLoadBalance
-            1:DynamicEP,  # When real eplb algorithm is ready, recover this
+            1:DynamicEP,
         }
         return policy.get(policy_type, MockLoadBalance)(config)
diff --git a/vllm_ascend/eplb/core/worker/eplb_worker.py b/vllm_ascend/eplb/core/worker/eplb_worker.py
@@ -128,7 +128,7 @@ def compose_expert_update_info(self, updated_expert_maps, current_expert_maps):
 
                 if not torch.isin(torch.tensor(expert_id), experts_to_send).any():
                     # if expert_id are not sent out from any npu, it will be copied from one npu holding this expert
-                    candidate_src_rank_indices = torch.where(current_expert_maps_this_layer[:, expert_id] != -1)
+                    candidate_src_rank_indices = torch.where(current_expert_maps_this_layer[:, expert_id] != -1)[0]
                 else:
                     candidate_src_rank_indices = src_rank_indices[experts_to_send == expert_id]
 
@@ -245,7 +245,7 @@ def __init__(self, shared_dict, planner_q, block_update_q, policy_type: int = 0,
         self.worker = EplbWorker(self.shared_dict, self.policy_type, self.enable_d2d)
 
 
-    def worker_process(self,planner_q,block_update_q):
+    def worker_process(self, planner_q, block_update_q):
         """
         Subprocess entry: bind to specified NPU, loop waiting for planner_q to wake up, call do_update, then notify main process update is complete.
         """
@@ -254,14 +254,17 @@ def worker_process(self,planner_q,block_update_q):
 
                 planner_q.get()
 
-                update_info = self.worker.do_update()
+                update_info_generator = self.worker.do_update()
+                update_info_list = []
 
-                for (a,b,c,d) in update_info:
-                    while True:
-                        if not block_update_q.empty():
-                            continue
-                        block_update_q.put((a,b,c,d))
-                        break
+                for (send_info , recv_info , new_expert_map, layer_id) in update_info_generator:
+                    update_info_list.append((send_info , recv_info , new_expert_map, layer_id))
+
+                while True:
+                    if not block_update_q.empty():
+                        continue
+                    block_update_q.put(update_info_list)
+                    break
 
             except Exception as e:
                 logger.warning(f"[EPLB subprocess Exiting due to error: {e}", exc_info=True)
diff --git a/vllm_ascend/eplb/eplb_updator.py b/vllm_ascend/eplb/eplb_updator.py
@@ -41,6 +41,7 @@ def init_eplb(self):
         self.update_in_flight = False
 
         self.reqs = []
+        self.update_info_all = []
 
         self.cur_iterations: torch.int64 = 0
 
@@ -88,8 +89,12 @@ def wakeup_eplb_worker(self):
     def forward_before(self):
         self.get_init_expert_map()
 
+        # Batch after eplb process being triggered, get update info provided by eplb process
+        if self.update_in_flight and self.weight_update_counter == 0:
+            self.update_info_all = self.block_update_queue.get()
+
         if self.update_in_flight and self.weight_update_counter < self.num_moe_layers:
-            (expert_send_info, expert_recv_info, updated_expert_map, layer_id) = self.block_update_queue.get()
+            (expert_send_info, expert_recv_info, updated_expert_map, layer_id) = self.update_info_all.pop(0)
             rank_id = torch.distributed.get_rank()
             expert_send_info_this_rank = expert_send_info[rank_id] if rank_id in expert_send_info else []
             expert_recv_info_this_rank = expert_recv_info[rank_id] if rank_id in expert_recv_info else []
@@ -100,6 +105,7 @@ def forward_before(self):
             if self.weight_update_counter == self.num_moe_layers:
                 self.weight_update_counter = 0
                 self.update_in_flight = False
+                self.update_info_all = []
 
         # set asynchronous stream for d2d expert weight update
         self.reqs = []

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,6 @@ class PolicyFactory:`
`10`	`10`	`def generate_policy(policy_type: int, config: DynamicConfig) -> EplbPolicy:`
`11`	`11`	`policy = {`
`12`	`12`	`0:MockLoadBalance , # MockLoadBalance`
`13`		`- 1:DynamicEP, # When real eplb algorithm is ready, recover this`
	`13`	`+ 1:DynamicEP,`
`14`	`14`	`}`
`15`	`15`	`return policy.get(policy_type, MockLoadBalance)(config)`