@@ -13,7 +13,7 @@ class TensorNameMap:
13
13
"transformer.wte" , # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
"transformer.word_embeddings" , # falcon
15
15
"word_embeddings" , # bloom
16
- "model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid
16
+ "model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid exaone4
17
17
"tok_embeddings" , # llama-pth
18
18
"embeddings.word_embeddings" , # bert nomic-bert
19
19
"language_model.embedding.word_embeddings" , # persimmon
@@ -62,7 +62,7 @@ class TensorNameMap:
62
62
# Output
63
63
MODEL_TENSOR .OUTPUT : (
64
64
"embed_out" , # gptneox
65
- "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
65
+ "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone exaone4 olmoe olmo2 phimoe
66
66
"output" , # llama-pth bloom internlm2
67
67
"word_embeddings_for_head" , # persimmon
68
68
"lm_head.linear" , # phi2
@@ -76,7 +76,7 @@ class TensorNameMap:
76
76
MODEL_TENSOR .OUTPUT_NORM : (
77
77
"gpt_neox.final_layer_norm" , # gptneox
78
78
"transformer.ln_f" , # gpt2 gpt-j falcon jais exaone
79
- "model.norm" , # llama-hf baichuan internlm2 olmoe olmo2 phimoe
79
+ "model.norm" , # llama-hf baichuan internlm2 olmoe olmo2 phimoe exaone4
80
80
"norm" , # llama-pth
81
81
"transformer.norm_f" , # mpt dbrx
82
82
"ln_f" , # refact bloom qwen gpt2
@@ -168,7 +168,7 @@ class TensorNameMap:
168
168
169
169
# Attention query
170
170
MODEL_TENSOR .ATTN_Q : (
171
- "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe
171
+ "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
172
172
"model.layers.{bid}.self_attn.q_proj_no_perm" , # llama-custom
173
173
"layers.{bid}.attention.wq" , # llama-pth
174
174
"encoder.layer.{bid}.attention.self.query" , # bert
@@ -183,7 +183,7 @@ class TensorNameMap:
183
183
184
184
# Attention key
185
185
MODEL_TENSOR .ATTN_K : (
186
- "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe
186
+ "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
187
187
"model.layers.{bid}.self_attn.k_proj_no_perm" , # llama-custom
188
188
"layers.{bid}.attention.wk" , # llama-pth
189
189
"encoder.layer.{bid}.attention.self.key" , # bert
@@ -199,7 +199,7 @@ class TensorNameMap:
199
199
200
200
# Attention value
201
201
MODEL_TENSOR .ATTN_V : (
202
- "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe
202
+ "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
203
203
"layers.{bid}.attention.wv" , # llama-pth
204
204
"encoder.layer.{bid}.attention.self.value" , # bert
205
205
"transformer.layer.{bid}.attention.v_lin" , # distillbert
@@ -219,7 +219,7 @@ class TensorNameMap:
219
219
"transformer.blocks.{bid}.attn.out_proj" , # mpt
220
220
"transformer.h.{bid}.self_attention.dense" , # falcon
221
221
"h.{bid}.self_attention.dense" , # bloom
222
- "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe
222
+ "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
223
223
"model.layers.{bid}.self_attn.linear_attn" , # deci
224
224
"layers.{bid}.attention.wo" , # llama-pth
225
225
"encoder.layer.{bid}.attention.output.dense" , # bert
@@ -252,7 +252,7 @@ class TensorNameMap:
252
252
),
253
253
254
254
MODEL_TENSOR .ATTN_POST_NORM : (
255
- "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 # ge
255
+ "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 exaone4 # ge
256
256
"model.layers.{bid}.post_self_attn_layernorm" , # glm-4-0414
257
257
),
258
258
@@ -293,7 +293,7 @@ class TensorNameMap:
293
293
294
294
# Post feed-forward norm
295
295
MODEL_TENSOR .FFN_POST_NORM : (
296
- "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2
296
+ "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2 exaone4
297
297
"model.layers.{bid}.post_mlp_layernorm" , # glm-4-0414
298
298
"model.layers.{bid}.feed_forward.up_proj" ,
299
299
),
@@ -325,7 +325,7 @@ class TensorNameMap:
325
325
"transformer.blocks.{bid}.ffn.up_proj" , # mpt
326
326
"transformer.h.{bid}.mlp.dense_h_to_4h" , # falcon
327
327
"h.{bid}.mlp.dense_h_to_4h" , # bloom
328
- "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2
328
+ "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2 exaone4
329
329
"layers.{bid}.feed_forward.w3" , # llama-pth
330
330
"encoder.layer.{bid}.intermediate.dense" , # bert
331
331
"transformer.layer.{bid}.ffn.lin1" , # distillbert
@@ -378,7 +378,7 @@ class TensorNameMap:
378
378
379
379
# Feed-forward gate
380
380
MODEL_TENSOR .FFN_GATE : (
381
- "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
381
+ "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2 exaone4
382
382
"layers.{bid}.feed_forward.w1" , # llama-pth
383
383
"transformer.h.{bid}.mlp.w2" , # qwen
384
384
"transformer.h.{bid}.mlp.c_fc2" , # jais
@@ -415,7 +415,7 @@ class TensorNameMap:
415
415
"transformer.blocks.{bid}.ffn.down_proj" , # mpt
416
416
"transformer.h.{bid}.mlp.dense_4h_to_h" , # falcon
417
417
"h.{bid}.mlp.dense_4h_to_h" , # bloom
418
- "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2
418
+ "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2 exaone4
419
419
"layers.{bid}.feed_forward.w2" , # llama-pth
420
420
"encoder.layer.{bid}.output.dense" , # bert
421
421
"transformer.layer.{bid}.ffn.lin2" , # distillbert
@@ -462,7 +462,7 @@ class TensorNameMap:
462
462
"language_model.encoder.layers.{bid}.self_attention.q_layernorm" ,
463
463
"model.layers.{bid}.self_attn.q_layernorm" , # persimmon
464
464
"model.layers.{bid}.self_attn.query_layernorm" , # hunyuan
465
- "model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2
465
+ "model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2 exaone4
466
466
"transformer.blocks.{bid}.attn.q_ln" , # sea-lion
467
467
"encoder.layer.{bid}.attention.self.layer_norm_q" , # jina-bert-v2
468
468
"transformer.layers.{bid}.attn.q_norm" , # openelm
@@ -472,7 +472,7 @@ class TensorNameMap:
472
472
"language_model.encoder.layers.{bid}.self_attention.k_layernorm" ,
473
473
"model.layers.{bid}.self_attn.k_layernorm" , # persimmon
474
474
"model.layers.{bid}.self_attn.key_layernorm" , # hunyuan
475
- "model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2
475
+ "model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2 exaone4
476
476
"transformer.blocks.{bid}.attn.k_ln" , # sea-lion
477
477
"encoder.layer.{bid}.attention.self.layer_norm_k" , # jina-bert-v2
478
478
"transformer.layers.{bid}.attn.k_norm" , # openelm
0 commit comments