@@ -41,16 +41,18 @@ def huggingface(model_config: Gemma3Config, quantization: Quantization) -> Exter
41
41
42
42
mapping = ExternMapping ()
43
43
44
+ mlc_prefix = "language_model."
45
+ hf_prefix = "language_model." if not model_config .is_text_model else ""
44
46
for i in range (model_config .text_config .num_hidden_layers ):
45
47
# Add gates in MLP
46
- mlp = f"language_model. model.layers.{ i } .mlp"
47
- mlc_name = f"{ mlp } .gate_up_proj.weight"
48
+ mlp = f"model.layers.{ i } .mlp"
49
+ mlc_name = f"{ mlc_prefix + mlp } .gate_up_proj.weight"
48
50
mlc_param = named_parameters [mlc_name ]
49
51
mapping .add_mapping (
50
52
mlc_name ,
51
53
[
52
- f"{ mlp } .gate_proj.weight" ,
53
- f"{ mlp } .up_proj.weight" ,
54
+ f"{ hf_prefix + mlp } .gate_proj.weight" ,
55
+ f"{ hf_prefix + mlp } .up_proj.weight" ,
54
56
],
55
57
functools .partial (
56
58
lambda gate , up , dtype : np .concatenate ([gate , up ], axis = 0 ).astype (dtype ),
@@ -59,88 +61,88 @@ def huggingface(model_config: Gemma3Config, quantization: Quantization) -> Exter
59
61
)
60
62
# Modify RMS layernorm weights, since Gemma model adds 1 to the weights
61
63
# We add 1 to the weights here for efficiency purpose
62
- mlc_name = f"language_model. model.layers.{ i } .input_layernorm.weight"
63
- mlc_param = named_parameters [mlc_name ]
64
+ mlc_name = f"model.layers.{ i } .input_layernorm.weight"
65
+ mlc_param = named_parameters [mlc_prefix + mlc_name ]
64
66
mapping .add_mapping (
65
- mlc_name ,
66
- [mlc_name ],
67
+ mlc_prefix + mlc_name ,
68
+ [hf_prefix + mlc_name ],
67
69
functools .partial (
68
70
lambda x , dtype : (x + 1 ).astype (dtype ),
69
- dtype = named_parameters [mlc_name ].dtype ,
71
+ dtype = named_parameters [mlc_prefix + mlc_name ].dtype ,
70
72
),
71
73
)
72
74
73
- mlc_name = f"language_model. model.layers.{ i } .post_attention_layernorm.weight"
74
- mlc_param = named_parameters [mlc_name ]
75
+ mlc_name = f"model.layers.{ i } .post_attention_layernorm.weight"
76
+ mlc_param = named_parameters [mlc_prefix + mlc_name ]
75
77
mapping .add_mapping (
76
- mlc_name ,
77
- [mlc_name ],
78
+ mlc_prefix + mlc_name ,
79
+ [hf_prefix + mlc_name ],
78
80
functools .partial (
79
81
lambda x , dtype : (x + 1 ).astype (dtype ),
80
- dtype = named_parameters [mlc_name ].dtype ,
82
+ dtype = named_parameters [mlc_prefix + mlc_name ].dtype ,
81
83
),
82
84
)
83
85
84
- mlc_name = f"language_model. model.layers.{ i } .pre_feedforward_layernorm.weight"
85
- mlc_param = named_parameters [mlc_name ]
86
+ mlc_name = f"model.layers.{ i } .pre_feedforward_layernorm.weight"
87
+ mlc_param = named_parameters [mlc_prefix + mlc_name ]
86
88
mapping .add_mapping (
87
- mlc_name ,
88
- [mlc_name ],
89
+ mlc_prefix + mlc_name ,
90
+ [hf_prefix + mlc_name ],
89
91
functools .partial (
90
92
lambda x , dtype : (x + 1 ).astype (dtype ),
91
- dtype = named_parameters [mlc_name ].dtype ,
93
+ dtype = named_parameters [mlc_prefix + mlc_name ].dtype ,
92
94
),
93
95
)
94
96
95
- mlc_name = f"language_model. model.layers.{ i } .post_feedforward_layernorm.weight"
96
- mlc_param = named_parameters [mlc_name ]
97
+ mlc_name = f"model.layers.{ i } .post_feedforward_layernorm.weight"
98
+ mlc_param = named_parameters [mlc_prefix + mlc_name ]
97
99
mapping .add_mapping (
98
- mlc_name ,
99
- [mlc_name ],
100
+ mlc_prefix + mlc_name ,
101
+ [hf_prefix + mlc_name ],
100
102
functools .partial (
101
103
lambda x , dtype : (x + 1 ).astype (dtype ),
102
- dtype = named_parameters [mlc_name ].dtype ,
104
+ dtype = named_parameters [mlc_prefix + mlc_name ].dtype ,
103
105
),
104
106
)
105
107
106
- mlc_name = f"language_model. model.layers.{ i } .self_attn.k_norm.weight"
107
- mlc_param = named_parameters [mlc_name ]
108
+ mlc_name = f"model.layers.{ i } .self_attn.k_norm.weight"
109
+ mlc_param = named_parameters [mlc_prefix + mlc_name ]
108
110
mapping .add_mapping (
109
- mlc_name ,
110
- [mlc_name ],
111
+ mlc_prefix + mlc_name ,
112
+ [hf_prefix + mlc_name ],
111
113
functools .partial (
112
114
lambda x , dtype : (x + 1 ).astype (dtype ),
113
- dtype = named_parameters [mlc_name ].dtype ,
115
+ dtype = named_parameters [mlc_prefix + mlc_name ].dtype ,
114
116
),
115
117
)
116
118
117
- mlc_name = f"language_model. model.layers.{ i } .self_attn.q_norm.weight"
118
- mlc_param = named_parameters [mlc_name ]
119
+ mlc_name = f"model.layers.{ i } .self_attn.q_norm.weight"
120
+ mlc_param = named_parameters [mlc_prefix + mlc_name ]
119
121
mapping .add_mapping (
120
- mlc_name ,
121
- [mlc_name ],
122
+ mlc_prefix + mlc_name ,
123
+ [hf_prefix + mlc_name ],
122
124
functools .partial (
123
125
lambda x , dtype : (x + 1 ).astype (dtype ),
124
- dtype = named_parameters [mlc_name ].dtype ,
126
+ dtype = named_parameters [mlc_prefix + mlc_name ].dtype ,
125
127
),
126
128
)
127
129
128
- mlc_name = "language_model. model.norm.weight"
129
- mlc_param = named_parameters [mlc_name ]
130
+ mlc_name = "model.norm.weight"
131
+ mlc_param = named_parameters [mlc_prefix + mlc_name ]
130
132
mapping .add_mapping (
131
- mlc_name ,
132
- [mlc_name ],
133
+ mlc_prefix + mlc_name ,
134
+ [hf_prefix + mlc_name ],
133
135
functools .partial (
134
136
lambda x , dtype : (x + 1 ).astype (dtype ),
135
- dtype = named_parameters [mlc_name ].dtype ,
137
+ dtype = named_parameters [mlc_prefix + mlc_name ].dtype ,
136
138
),
137
139
)
138
140
139
141
for mlc_name , mlc_param in named_parameters .items ():
140
142
if mlc_name not in mapping .param_map :
141
143
mapping .add_mapping (
142
144
mlc_name ,
143
- [mlc_name ],
145
+ [hf_prefix + mlc_name [ len ( mlc_prefix ) :] ],
144
146
functools .partial (
145
147
lambda x , dtype : x .astype (dtype ),
146
148
dtype = mlc_param .dtype ,
0 commit comments