Skip to content

Commit bad53c6

Browse files
authored
[vl]remove duplicated load logic (#2744)
1 parent 1694082 commit bad53c6

File tree

11 files changed

+510
-632
lines changed

11 files changed

+510
-632
lines changed

fastdeploy/config.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from dataclasses import dataclass, field
2020
from enum import Enum
21-
from typing import Literal, Optional
21+
from typing import Literal, Optional, Union
2222

2323
from paddleformers.transformers.configuration_utils import PretrainedConfig
2424

@@ -72,8 +72,10 @@ def __init__(
7272
rope_theta: int = 10000,
7373
rope_3d: bool = False,
7474
ori_vocab_size: int | None = None,
75-
moe_layer_start_index: int | None = None,
76-
moe_layer_end_index: int | None = None,
75+
moe_layer_start_index: Union[int, list[int], None] = None,
76+
moe_num_experts: Union[int, list[int], None] = None,
77+
moe_layer_end_index: Union[int, list[int], None] = None,
78+
moe_num_shared_experts: int | None = None,
7779
num_hidden_layers: int | None = None,
7880
prefix_name="",
7981
freeze_embedding=False,
@@ -110,14 +112,10 @@ def __init__(
110112
self.prefix_name = prefix_name
111113
self.freeze_embedding = freeze_embedding
112114
self.rope_head_dim = rope_head_dim
113-
moe_num_experts = kwargs.get("moe_num_experts", 0)
114-
if moe_layer_start_index is not None:
115-
self.moe_layer_start_index = moe_layer_start_index
116-
elif moe_num_experts == 0:
117-
self.moe_layer_start_index = self.num_layers
118-
self.moe_num_experts = 0
119-
if moe_layer_end_index is not None:
120-
self.moe_layer_end_index = moe_layer_end_index
115+
self.moe_layer_start_index = moe_layer_start_index
116+
self.moe_num_experts = moe_num_experts
117+
self.moe_num_shared_experts = moe_num_shared_experts
118+
self.moe_layer_end_index = moe_layer_end_index
121119
self.ffn_hidden_size = ffn_hidden_size
122120
self.rope_3d = rope_3d
123121
self.start_layer_index = start_layer_index
@@ -132,15 +130,15 @@ class MoEConfig:
132130
"""
133131
Configuration for MoE.
134132
"""
135-
num_experts: int = -1
133+
num_experts: Union[int, list[int], None] = None
136134
top_k: int = 8
137135
moe_intermediate_size: int = -1
138136
num_experts_per_rank: int = -1
139137
num_experts_start_offset: int = -1
140138

141139
moe_num_shared_experts = (0, )
142-
moe_layer_start_index = 0
143-
moe_layer_end_index = None
140+
moe_layer_start_index: Union[int, list[int], None] = None
141+
moe_layer_end_index: Union[int, list[int], None] = None
144142
moe_use_aux_free: bool = False
145143
num_max_dispatch_tokens_per_rank = 256
146144
im_patch_id = (

fastdeploy/model_executor/model_loader.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
Ernie4_5_PretrainedModel
2929
from fastdeploy.model_executor.models.ernie4_5_mtp import \
3030
Ernie4_5_MTPPretrainedModel
31+
from fastdeploy.model_executor.models.ernie4_5_vl.ernie4_5_vl_moe import \
32+
Ernie4_5_VLPretrainedModel
3133
from fastdeploy.model_executor.models.model_base import ModelRegistry
3234
from fastdeploy.model_executor.models.qwen2 import Qwen2PretrainedModel
3335
from fastdeploy.model_executor.models.qwen3 import Qwen3PretrainedModel
@@ -42,6 +44,7 @@
4244
"Qwen3MoeForCausalLM": Qwen3MoePretrainedModel,
4345
"Ernie4_5_ForCausalLM": Ernie4_5_PretrainedModel,
4446
"DeepseekV3ForCausalLM": DeepSeekV3PretrainedModel,
47+
"Ernie4_5_VLMoeForConditionalGeneration": Ernie4_5_VLPretrainedModel,
4548
}
4649

4750

@@ -94,7 +97,7 @@ def load_model(self, fd_config: FDConfig) -> nn.Layer:
9497

9598
if fd_config.load_config.dynamic_load_weight:
9699
# register rl model
97-
import fastdeploy.rl
100+
import fastdeploy.rl # noqa
98101
architectures = architectures + "RL"
99102

100103
with context:

fastdeploy/model_executor/models/ernie4_5_moe.py

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from paddleformers.utils.log import logger
2727

2828
from fastdeploy.config import FDConfig, ModelConfig
29+
from fastdeploy.model_executor.forward_meta import ForwardMeta
2930
from fastdeploy.model_executor.graph_optimization.decorator import \
3031
support_graph_optimization
3132
from fastdeploy.model_executor.layers.activation import SiluAndMul
@@ -41,7 +42,6 @@
4142
from fastdeploy.model_executor.models.utils import \
4243
LayerIdPlaceholder as layerid
4344
from fastdeploy.model_executor.models.utils import WeightMeta
44-
from fastdeploy.model_executor.forward_meta import ForwardMeta
4545

4646

4747
class Ernie4_5_MLP(nn.Layer):
@@ -599,27 +599,13 @@ def get_tensor_parallel_split_mappings(num_layers, moe_num_experts,
599599
start_layer = (moe_layer_start_index
600600
if moe_layer_start_index > 0 else num_layers)
601601
final_actions = build_expanded_keys(
602-
num_layers,
603-
moe_num_experts,
604-
start_layer,
605-
base_actions,
602+
base_actions, num_layers, start_layer, moe_num_experts
606603
)
607604
return final_actions
608-
609-
moe_num_experts = 0
610-
if isinstance(config.moe_num_experts, list):
611-
moe_num_experts = sum(config.moe_num_experts)
612-
elif isinstance(config.moe_num_experts, int):
613-
moe_num_experts = config.moe_num_experts
614-
615-
moe_layer_start_index = -1
616-
if isinstance(config.moe_layer_start_index, list):
617-
moe_layer_start_index = min(config.moe_layer_start_index)
618-
elif isinstance(config.moe_layer_start_index, int):
619-
moe_layer_start_index = config.moe_layer_start_index
620-
621-
mappings = get_tensor_parallel_split_mappings(config.num_layers,
622-
moe_num_experts,
623-
moe_layer_start_index,
624-
config.prefix_name)
605+
mappings = get_tensor_parallel_split_mappings(
606+
config.num_layers,
607+
config.moe_num_experts,
608+
config.moe_layer_start_index,
609+
config.prefix_name,
610+
)
625611
return mappings

fastdeploy/model_executor/models/ernie4_5_vl/dfnrope/modeling.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
flash_attn_unpadded as flash_attn_varlen_func
3030
from paddleformers.transformers.model_utils import PretrainedModel
3131

32+
from fastdeploy.model_executor.layers.utils import get_tensor
33+
3234
from .activation import ACT2FN
3335
from .configuration import DFNRopeVisionTransformerConfig
3436

@@ -487,10 +489,10 @@ class DFNRopeVisionTransformerPretrainedModel(PretrainedModel):
487489

488490
config_class = DFNRopeVisionTransformerConfig
489491

490-
def __init__(self, config) -> None:
492+
def __init__(self, config, prefix_name: str = "") -> None:
491493
super().__init__(config)
492494
self.spatial_merge_size = config.spatial_merge_size
493-
495+
self.prefix_name = prefix_name
494496
self.patch_embed = PatchEmbed(
495497
patch_size=config.patch_size,
496498
in_channels=config.in_channels,
@@ -723,10 +725,18 @@ def get_tensor_parallel_split_mappings(depth):
723725
mappings = get_tensor_parallel_split_mappings(vision_config.depth)
724726
return mappings
725727

726-
def set_state_dict(self, state_dict, *args, **kwargs):
727-
"""_summary_
728-
729-
Args:
730-
state_dict (_type_): _description_
731-
"""
732-
super().set_state_dict(state_dict, *args, **kwargs)
728+
def load_state_dict(self, state_dict):
729+
params_dict = dict(self.named_parameters())
730+
for param_name, param in params_dict.items():
731+
state_dict_key = f"{self.prefix_name}.{param_name}"
732+
if state_dict_key not in state_dict:
733+
raise ValueError(
734+
f"The key {state_dict_key} does not exist in state_dict. "
735+
)
736+
tensor = get_tensor(state_dict.pop(state_dict_key))
737+
if param.shape != tensor.shape:
738+
raise ValueError(
739+
f"{state_dict_key} param.shape={param.shape} tensor.shape={tensor.shape}"
740+
)
741+
else:
742+
param.copy_(tensor, False)

0 commit comments

Comments
 (0)