Skip to content

Commit 137eaf1

Browse files
committed
fix xpu
1 parent be39406 commit 137eaf1

File tree

14 files changed

+19
-19
lines changed

14 files changed

+19
-19
lines changed

fastdeploy/engine/engine.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,10 +1194,10 @@ def detect_thread():
11941194
r'set state for layer (\d+)',
11951195
line)):
11961196
progress = eval(match.group(
1197-
1)) * 1.0 / self.cfg.model_config.num_layers
1197+
1)) * 1.0 / self.cfg.model_config.num_hidden_layers
11981198
self.worker_init_status["layer_loadding"] = progress
11991199
if self.worker_init_status[
1200-
"layer_loadding"] == self.cfg.model_config.num_layers - 1:
1200+
"layer_loadding"] == self.cfg.model_config.num_hidden_layers - 1:
12011201
self.worker_init_status["finished"] = True
12021202

12031203
self.checking_worker_status_thread = threading.Thread(

fastdeploy/model_executor/layers/attention/iluvatar_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def __init__(self, llm_config: FDConfig, kv_num_heads: int, num_heads: int,
101101
self.head_dim = head_dim
102102
# note: scale need to change if using MLA
103103
self.attention_metadata.scale = 1.0 / sqrt(head_dim)
104-
self.num_layers = llm_config.model_config.num_layers
104+
self.num_layers = llm_config.model_config.num_hidden_layers
105105
self.record_block_table_metadata = {}
106106
self.only_use_flash_attn = int(
107107
os.getenv("FD_ILUVATAR_ONLY_USE_FLASH_ATTN", 0)) == 1

fastdeploy/model_executor/layers/backends/gcu/attention/flash_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def __init__(self, fd_config: FDConfig, kv_num_heads: int, num_heads: int,
9191
self.num_heads = num_heads
9292
self.head_dim = head_dim
9393
self.scaling = 1.0 / (self.head_dim**0.5)
94-
self.num_layers = fd_config.model_config.num_layers
94+
self.num_layers = fd_config.model_config.num_hidden_layers
9595
self.position_ids_base = paddle.arange(self.max_seq_len)
9696

9797
# TODO(zhengjun): Need to adapt the allocation logic and

fastdeploy/model_executor/layers/backends/gcu/attention/mem_efficient_attn_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def __init__(self, fd_config: FDConfig, kv_num_heads: int, num_heads: int,
9090
self.num_heads = num_heads
9191
self.head_dim = head_dim
9292
self.scaling = 1.0 / (self.head_dim**0.5)
93-
self.num_layers = fd_config.model_config.num_layers
93+
self.num_layers = fd_config.model_config.num_hidden_layers
9494
self.position_ids_base = paddle.arange(self.max_seq_len)
9595

9696
# TODO(zhengjun): Need to adapt the allocation logic and

fastdeploy/model_executor/load_weight_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def load_ep_checkpoint(model_path: str,
4343
filtered_map = {k: v for k, v in weight_list.items() if "experts" not in k}
4444
num_local_ffn_keys = []
4545

46-
for i in range(config.moe_layer_start_index, config.num_layers):
46+
for i in range(config.moe_layer_start_index, config.num_hidden_layers):
4747
for j in range(
4848
config.num_experts_start_offset,
4949
config.num_experts_start_offset + config.num_experts_per_rank,

fastdeploy/model_executor/models/deepseek_v3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -758,5 +758,5 @@ def get_tensor_parallel_split_mappings(num_layers):
758758

759759
return final_actions
760760

761-
mappings = get_tensor_parallel_split_mappings(config.num_layers)
761+
mappings = get_tensor_parallel_split_mappings(config.num_hidden_layers)
762762
return mappings

fastdeploy/model_executor/models/ernie4_5_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ def get_tensor_parallel_split_mappings(num_layers, moe_num_experts,
618618
elif isinstance(config.moe_layer_start_index, int):
619619
moe_layer_start_index = config.moe_layer_start_index
620620

621-
mappings = get_tensor_parallel_split_mappings(config.num_layers,
621+
mappings = get_tensor_parallel_split_mappings(config.num_hidden_layers,
622622
moe_num_experts,
623623
moe_layer_start_index,
624624
config.prefix_name)

fastdeploy/model_executor/models/ernie4_5_mtp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def get_tensor_parallel_split_mappings(num_layers, moe_num_experts,
237237

238238
moe_num_experts = 0
239239
mappings = get_tensor_parallel_split_mappings(
240-
config.num_layers,
240+
config.num_hidden_layers,
241241
moe_num_experts,
242242
config.moe_layer_start_index,
243243
)

fastdeploy/model_executor/models/qwen2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,6 @@ def get_tensor_parallel_split_mappings(num_layers):
427427

428428
return final_actions
429429

430-
mappings = get_tensor_parallel_split_mappings(config.num_layers)
430+
mappings = get_tensor_parallel_split_mappings(config.num_hidden_layers)
431431

432432
return mappings

fastdeploy/model_executor/models/qwen3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,5 +357,5 @@ def get_tensor_parallel_split_mappings(num_layers):
357357

358358
return final_actions
359359

360-
mappings = get_tensor_parallel_split_mappings(config.num_layers)
360+
mappings = get_tensor_parallel_split_mappings(config.num_hidden_layers)
361361
return mappings

0 commit comments

Comments
 (0)