init matrix core acclib dir struct

raindaywhu · raindaywhu · commit b8b6175f13e0 · 2025-05-30T10:18:43.000+08:00
diff --git a/vllm_ascend/eplb/core/mempool/mem_cache.py b/vllm_ascend/eplb/core/mempool/mem_cache.py
@@ -0,0 +1 @@
+# TODO
diff --git a/vllm_ascend/eplb/core/policy/__init__.py b/vllm_ascend/eplb/core/policy/__init__.py
diff --git a/vllm_ascend/eplb/core/policy/eplb_policy.py b/vllm_ascend/eplb/core/policy/eplb_policy.py
@@ -0,0 +1,42 @@
+# Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+from abc import abstractmethod
+
+
+class DynamicConfig:
+    placement_policy = None
+
+    max_transferred_expert_per_layer = 100
+    # 一台机器上，一层最多搬运多少专家
+
+    ep_worldsize = 64  # 整个集群上所有的专家分布在多少个die上
+    num_die_per_host = 8  # 每台机器上有几个die
+
+
+class EplbPolicy:
+    def __init__(self, config: DynamicConfig):
+        self.config = config
+
+    @abstractmethod
+    def rebalance_experts(self, current_expert_table, expert_workload):
+        """
+        传入weight并返回相关限制条件下的专家复制和放置
+        INPUT:
+        current_expert_table: [layerId, rankId, expert_num_i]
+        expert_workload = expert_table[layer0][rankId][expert_num_i]
+
+        RETURNED: (res, expert_table)
+        res:
+        1 -- table_changed
+        0 -- not_changed
+
+        expert_table: [layerId, rankId, expert_num_i]
+        expert_num_i --- [0, MaxExpertPerRank]
+        expertID = expert_table[layer0][rankId][expert_num_i]
+        array_values:
+        [0, 1, 2, 3, 248]
+        [4, 5, 6, 7, 254]
+        [8, 9, 10, 11, 71]
+        ...
+        [252, 253, 254, 255, 0]
+        """
+        pass
diff --git a/vllm_ascend/eplb/core/policy/mock_load_balance.py b/vllm_ascend/eplb/core/policy/mock_load_balance.py
@@ -0,0 +1,27 @@
+# Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+import copy
+import random
+
+from .eplb_policy import EplbPolicy, DynamicConfig
+
+
+class MockLoadBalance(EplbPolicy):
+    def __init__(self, config: DynamicConfig):
+        super().__init__(config)
+
+    def rebalance_experts(self, current_expert_table, expert_workload):
+        new_table = copy.deepcopy(current_expert_table)
+        num_layers = len(current_expert_table)
+        num_card = len(current_expert_table[0])
+
+        for i in range(num_layers):
+            # 随机选两个卡
+            indices = random.sample(range(num_card), 2)
+
+            # 交换冗余专家
+            new_table[i][indices[0]][-1], new_table[i][indices[1]][-1] = (
+                new_table[i][indices[1]][-1],
+                new_table[i][indices[0]][-1]
+            )
+        return 1, [-i for i in range(num_layers)], new_table
+
diff --git a/vllm_ascend/eplb/core/policy/policy_factory.py b/vllm_ascend/eplb/core/policy/policy_factory.py
@@ -0,0 +1,14 @@
+# Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
+from .eplb_policy import EplbPolicy, DynamicConfig
+from .mock_load_balance import MockLoadBalance
+from .dynamic_ep import DynamicEP
+
+
+class PolicyFactory:
+    @staticmethod
+    def generate_policy(policy_type: int, config: DynamicConfig) -> EplbPolicy:
+        policy = {
+            0: MockLoadBalance,
+            1: DynamicEP,
+        }
+        return policy.get(policy_type, MockLoadBalance)(config)
diff --git a/vllm_ascend/eplb/core/tool/convert_tool.py b/vllm_ascend/eplb/core/tool/convert_tool.py
@@ -0,0 +1,2 @@
+#TODO
+Using for  converted NZ & stacked expert tensor to SSD
diff --git a/vllm_ascend/eplb/core/worker/eplb_updator.py b/vllm_ascend/eplb/core/worker/eplb_updator.py
@@ -0,0 +1,20 @@
+# TODO
+load ssd or d2d transformer for expert weight
+
+matrixaccLib-EPLB:
+
+Input 热度表
+
+output 
+加载到hbm的 tensor
+
+
+step1. collect
+
+step2. eplb algo
+step3. expert weight loading(ssd->host->hbm or d2d hbm) hbm buffer,  与后处理或者attention 计算掩盖
+
+step4. expert table apply & hbm buffer copy
+
+
+

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+#TODO`
	`2`	`+Using for converted NZ & stacked expert tensor to SSD`