Skip to content

Commit 4c7b8bc

Browse files
authored
Simplify the Config code (#2770)
* simplify the code * fix vl * delete config * fix * perfect code * fix ci * fix xpu * fix xpu * fix server * resolve conflict * fix mtp * resolve conflict * fix xpu * fix xpu * fix vl * fix log * fix qwen moe * fix qwen moe * fix qwen moe
1 parent 2e81792 commit 4c7b8bc

34 files changed

+557
-917
lines changed

fastdeploy/config.py

Lines changed: 273 additions & 268 deletions
Large diffs are not rendered by default.

fastdeploy/model_executor/layers/attention/append_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def __init__(self, fd_config: FDConfig, kv_num_heads: int, num_heads: int,
9595
self.kv_num_heads: int = kv_num_heads
9696
self.num_heads: int = num_heads
9797
self.head_dim: int = fd_config.model_config.head_dim
98-
self.num_layers: int = fd_config.model_config.num_layers
98+
self.num_layers: int = fd_config.model_config.num_hidden_layers
9999
self.max_partition_size: int = int(
100100
os.getenv("FLAGS_max_partition_size", 32768))
101101

fastdeploy/model_executor/layers/attention/attention.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@ def __init__(
6767
ValueError: If the `v_head_dim` is less than 0.
6868
"""
6969
super().__init__()
70-
self.num_heads: int = fd_config.model_config.num_attention_heads // fd_config.parallel_config.tensor_parallel_degree
70+
self.num_heads: int = fd_config.model_config.num_attention_heads // fd_config.parallel_config.tensor_parallel_size
7171
self.head_dim: int = fd_config.model_config.head_dim
7272
self.kv_num_heads: int = \
73-
max(1, fd_config.model_config.num_key_value_heads // fd_config.parallel_config.tensor_parallel_degree)
73+
max(1, fd_config.model_config.num_key_value_heads // fd_config.parallel_config.tensor_parallel_size)
7474
self.layer_id: int = layer_id
7575
self.v_head_dim: int = v_head_dim if v_head_dim > 0 else self.head_dim
7676
self.rope_type: str = rope_type

fastdeploy/model_executor/layers/attention/flash_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def __init__(self, fd_config: FDConfig, kv_num_heads: int, num_heads: int,
9696
self.head_dim = fd_config.model_config.head_dim
9797
self.hidden_size = fd_config.model_config.hidden_size
9898
self.block_size = fd_config.parallel_config.block_size
99-
self.num_layers: int = fd_config.model_config.num_layers
99+
self.num_layers: int = fd_config.model_config.num_hidden_layers
100100

101101
self.speculative_method = fd_config.speculative_config.method
102102
self.use_speculate = self.speculative_method is not None

fastdeploy/model_executor/layers/attention/iluvatar_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def __init__(self, llm_config: FDConfig, kv_num_heads: int, num_heads: int,
102102
self.head_dim = head_dim
103103
# note: scale need to change if using MLA
104104
self.attention_metadata.scale = 1.0 / sqrt(head_dim)
105-
self.num_layers = llm_config.model_config.num_layers
105+
self.num_layers = llm_config.model_config.num_hidden_layers
106106
self.record_block_table_metadata = {}
107107
self.only_use_flash_attn = int(
108108
os.getenv("FD_ILUVATAR_ONLY_USE_FLASH_ATTN", 0)) == 1

fastdeploy/model_executor/layers/attention/mla_attention_backend.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -113,18 +113,18 @@ def __init__(self, fd_config: FDConfig, kv_num_heads: int, num_heads: int,
113113
self.kv_num_heads: int = kv_num_heads
114114
self.num_heads: int = num_heads
115115
self.head_dim: int = fd_config.model_config.head_dim
116-
self.num_layers: int = fd_config.model_config.num_layers
116+
self.num_layers: int = fd_config.model_config.num_hidden_layers
117117

118118
# For Multi Head Latent Attention
119-
self.kv_lora_rank: int = fd_config.model_config.deepseekv3.kv_lora_rank
120-
self.qk_rope_head_dim: int = fd_config.model_config.deepseekv3.qk_rope_head_dim
121-
self.qk_head_dim: int = fd_config.model_config.deepseekv3.qk_nope_head_dim \
122-
+ fd_config.model_config.deepseekv3.qk_rope_head_dim
119+
self.kv_lora_rank: int = fd_config.model_config.kv_lora_rank
120+
self.qk_rope_head_dim: int = fd_config.model_config.qk_rope_head_dim
121+
self.qk_head_dim: int = fd_config.model_config.qk_nope_head_dim \
122+
+ fd_config.model_config.qk_rope_head_dim
123123
self.attn_softmax_scale: float = self.qk_head_dim**-0.5
124-
if fd_config.model_config.deepseekv3.rope_scaling:
125-
mscale_all_dim = fd_config.model_config.deepseekv3.rope_scaling.get(
124+
if fd_config.model_config.rope_scaling:
125+
mscale_all_dim = fd_config.model_config.rope_scaling.get(
126126
"mscale_all_dim", False) # 1.0
127-
scaling_factor = fd_config.model_config.deepseekv3.rope_scaling[
127+
scaling_factor = fd_config.model_config.rope_scaling[
128128
"factor"] # 40
129129
mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim))
130130
self.attn_softmax_scale = self.attn_softmax_scale * mscale * mscale

fastdeploy/model_executor/layers/attention/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def init_rank_and_device_id(fd_config: FDConfig):
2222
2323
"""
2424
rank = (fd_config.parallel_config.expert_parallel_rank *
25-
fd_config.parallel_config.tensor_parallel_degree + fd_config.parallel_config.tensor_parallel_rank)
25+
fd_config.parallel_config.tensor_parallel_size + fd_config.parallel_config.tensor_parallel_rank)
2626

2727
cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES", None)
2828

fastdeploy/model_executor/layers/attention/xpu_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def __init__(self, fd_config: FDConfig, kv_num_heads: int, num_heads: int,
9595
self.kv_num_heads: int = kv_num_heads
9696
self.num_heads: int = num_heads
9797
self.head_dim: int = head_dim
98-
self.num_layers: int = fd_config.model_config.num_layers
98+
self.num_layers: int = fd_config.model_config.num_hidden_layers
9999

100100
# pd_disaggregation
101101
self.use_pd_disaggregation: int = int(

fastdeploy/model_executor/layers/backends/gcu/attention/flash_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def __init__(self, fd_config: FDConfig, kv_num_heads: int, num_heads: int,
8888
self.num_heads = num_heads
8989
self.head_dim = head_dim
9090
self.scaling = 1.0 / (self.head_dim**0.5)
91-
self.num_layers = fd_config.model_config.num_layers
91+
self.num_layers = fd_config.model_config.num_hidden_layers
9292
self.position_ids_base = paddle.arange(self.max_seq_len)
9393

9494
# TODO(zhengjun): Need to adapt the allocation logic and

fastdeploy/model_executor/layers/backends/gcu/attention/mem_efficient_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def __init__(self, fd_config: FDConfig, kv_num_heads: int, num_heads: int,
8888
self.num_heads = num_heads
8989
self.head_dim = head_dim
9090
self.scaling = 1.0 / (self.head_dim**0.5)
91-
self.num_layers = fd_config.model_config.num_layers
91+
self.num_layers = fd_config.model_config.num_hidden_layers
9292
self.position_ids_base = paddle.arange(self.max_seq_len)
9393

9494
# TODO(zhengjun): Need to adapt the allocation logic and

0 commit comments

Comments
 (0)