Skip to content

Commit b8b6175

Browse files
committed
init matrix core acclib dir struct
1 parent d5e4ef5 commit b8b6175

File tree

7 files changed

+106
-0
lines changed

7 files changed

+106
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# TODO

vllm_ascend/eplb/core/policy/__init__.py

Whitespace-only changes.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
2+
from abc import abstractmethod
3+
4+
5+
class DynamicConfig:
6+
placement_policy = None
7+
8+
max_transferred_expert_per_layer = 100
9+
# 一台机器上,一层最多搬运多少专家
10+
11+
ep_worldsize = 64 # 整个集群上所有的专家分布在多少个die上
12+
num_die_per_host = 8 # 每台机器上有几个die
13+
14+
15+
class EplbPolicy:
16+
def __init__(self, config: DynamicConfig):
17+
self.config = config
18+
19+
@abstractmethod
20+
def rebalance_experts(self, current_expert_table, expert_workload):
21+
"""
22+
传入weight并返回相关限制条件下的专家复制和放置
23+
INPUT:
24+
current_expert_table: [layerId, rankId, expert_num_i]
25+
expert_workload = expert_table[layer0][rankId][expert_num_i]
26+
27+
RETURNED: (res, expert_table)
28+
res:
29+
1 -- table_changed
30+
0 -- not_changed
31+
32+
expert_table: [layerId, rankId, expert_num_i]
33+
expert_num_i --- [0, MaxExpertPerRank]
34+
expertID = expert_table[layer0][rankId][expert_num_i]
35+
array_values:
36+
[0, 1, 2, 3, 248]
37+
[4, 5, 6, 7, 254]
38+
[8, 9, 10, 11, 71]
39+
...
40+
[252, 253, 254, 255, 0]
41+
"""
42+
pass
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
2+
import copy
3+
import random
4+
5+
from .eplb_policy import EplbPolicy, DynamicConfig
6+
7+
8+
class MockLoadBalance(EplbPolicy):
9+
def __init__(self, config: DynamicConfig):
10+
super().__init__(config)
11+
12+
def rebalance_experts(self, current_expert_table, expert_workload):
13+
new_table = copy.deepcopy(current_expert_table)
14+
num_layers = len(current_expert_table)
15+
num_card = len(current_expert_table[0])
16+
17+
for i in range(num_layers):
18+
# 随机选两个卡
19+
indices = random.sample(range(num_card), 2)
20+
21+
# 交换冗余专家
22+
new_table[i][indices[0]][-1], new_table[i][indices[1]][-1] = (
23+
new_table[i][indices[1]][-1],
24+
new_table[i][indices[0]][-1]
25+
)
26+
return 1, [-i for i in range(num_layers)], new_table
27+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
2+
from .eplb_policy import EplbPolicy, DynamicConfig
3+
from .mock_load_balance import MockLoadBalance
4+
from .dynamic_ep import DynamicEP
5+
6+
7+
class PolicyFactory:
8+
@staticmethod
9+
def generate_policy(policy_type: int, config: DynamicConfig) -> EplbPolicy:
10+
policy = {
11+
0: MockLoadBalance,
12+
1: DynamicEP,
13+
}
14+
return policy.get(policy_type, MockLoadBalance)(config)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#TODO
2+
Using for converted NZ & stacked expert tensor to SSD
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# TODO
2+
load ssd or d2d transformer for expert weight
3+
4+
matrixaccLib-EPLB:
5+
6+
Input 热度表
7+
8+
output
9+
加载到hbm的 tensor
10+
11+
12+
step1. collect
13+
14+
step2. eplb algo
15+
step3. expert weight loading(ssd->host->hbm or d2d hbm) hbm buffer, 与后处理或者attention 计算掩盖
16+
17+
step4. expert table apply & hbm buffer copy
18+
19+
20+

0 commit comments

Comments
 (0)