Skip to content

Commit a9584dd

Browse files
author
lt
committed
add get expert workload
1 parent 2e824cd commit a9584dd

File tree

4 files changed

+30
-12
lines changed

4 files changed

+30
-12
lines changed

vllm_ascend/eplb/adaptor/vllm_adaptor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def get_expert_tensor(self, layer_id, global_expert_id_to_send):
7171

7272
def get_rank_expert_workload(
7373
self,
74-
num_moe_layers: int,
74+
num_moe_layers: int,
7575
) -> torch.Tensor:
7676
# 收集各层 topk_ids -> list of [B, K]
7777
all_topk_ids = [self.model.get_topk_ids(i) for i in range(num_moe_layers)]

vllm_ascend/eplb/eplb_updator.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from vllm_ascend.eplb.core.worker.eplb_worker import EplbProcess
2323
from vllm_ascend.eplb.core.loader.device_transfer_loader import D2DExpertWeightLoader
2424

25+
2526
class EplbUpdator:
2627

2728
def __init__(self, redundant_enable):
@@ -34,7 +35,7 @@ def set_adaptor(self, adaptor):
3435

3536
def init_eplb(self, redundant_enable):
3637

37-
self.redundant_enable = redundant_enable
38+
self.redundant_enable = redundant_enable
3839
self.num_iterations: torch.int64 = 130
3940

4041
self.weight_update_counter = 0
@@ -63,18 +64,25 @@ def init_eplb(self, redundant_enable):
6364
})
6465

6566
self.eplb = EplbProcess(
66-
shared_dict = self.shared_dict,
67-
planner_q = self.planner_block_queue,
68-
block_update_q = self.block_update_queue,
69-
redundant_enable = self.redundant_enable,
70-
policy_type = 2,
71-
enable_d2d = True
67+
shared_dict=self.shared_dict,
68+
planner_q=self.planner_block_queue,
69+
block_update_q=self.block_update_queue,
70+
redundant_enable=self.redundant_enable,
71+
policy_type=2,
72+
enable_d2d=True
7273
)
7374

7475
self.eplb_process = self.eplb._launch_process()
7576

77+
# todo - 新增 eplb 周期统计
78+
79+
7680
logger.info(f"[ModelRunner] Launched EPLB process (pid={self.eplb_process.pid})")
7781

82+
def get_expert_load(self) -> str:
83+
"""todo 确认moe_load的值是什么类型"""
84+
# return '{"a":"b"}' # mock
85+
return self.shared_dict['moe_load']
7886

7987
def get_update_iteration(self):
8088
self.cur_iterations = self.cur_iterations + 1
@@ -101,14 +109,16 @@ def forward_before(self):
101109
self.weight_loading = True
102110

103111
if self.update_in_flight and self.weight_loading and self.weight_update_counter < self.num_moe_layers:
104-
(expert_send_info, expert_recv_info, updated_expert_map, log2phy_map, layer_id) = self.update_info_all.pop(0)
112+
(expert_send_info, expert_recv_info, updated_expert_map, log2phy_map, layer_id) = self.update_info_all.pop(
113+
0)
105114
rank_id = torch.distributed.get_rank()
106115
self.eplb_loader.set_log2phy_map(log2phy_map)
107116
expert_send_info_this_rank = expert_send_info[rank_id] if rank_id in expert_send_info else []
108117
expert_recv_info_this_rank = expert_recv_info[rank_id] if rank_id in expert_recv_info else []
109-
#logger.info(f"check update info, layer = {layer_id}, send = {expert_send_info_this_rank}, recv = {expert_recv_info_this_rank}")
118+
# logger.info(f"check update info, layer = {layer_id}, send = {expert_send_info_this_rank}, recv = {expert_recv_info_this_rank}")
110119
self.eplb_loader.generate_expert_d2d_transfer_task(expert_send_info_this_rank,
111-
expert_recv_info_this_rank, updated_expert_map[rank_id], layer_id + 3)
120+
expert_recv_info_this_rank, updated_expert_map[rank_id],
121+
layer_id + 3)
112122
self.weight_update_counter += 1
113123
if self.weight_update_counter == self.num_moe_layers:
114124
self.weight_update_counter = 0
@@ -177,7 +187,7 @@ def warm_up_eplb(self):
177187
continue
178188
comm_op_list.append(
179189
dist.P2POp(dist.irecv, src_tensor, src_rank)
180-
)
190+
)
181191
if comm_op_list:
182192
reqs = dist.batch_isend_irecv(comm_op_list)
183193

vllm_ascend/worker/model_runner_v1.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,6 +1422,9 @@ def _profile_multimodal(self) -> None:
14221422
# Cache the dummy encoder outputs.
14231423
self.encoder_cache["tmp"] = dict(enumerate(dummy_encoder_outputs))
14241424

1425+
def do_get_expert_load(self) -> str:
1426+
return self.eplb_updator.get_expert_load()
1427+
14251428
@torch.inference_mode()
14261429
def _dummy_run(
14271430
self,

vllm_ascend/worker/worker_v1.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,11 @@ def execute_model(
180180
output = self.model_runner.execute_model(scheduler_output)
181181
return output if self.is_driver_worker else None
182182

183+
def get_expert_load(self) -> str:
184+
""" todo 一共几个worker"""
185+
moe_load = self.model_runner.do_get_expert_load()
186+
return moe_load
187+
183188
def load_model(self) -> None:
184189
if self.vllm_config.model_config.enable_sleep_mode:
185190
allocator = CaMemAllocator.get_instance()

0 commit comments

Comments
 (0)