@@ -13,7 +13,7 @@ class TensorNameMap:
13
13
"transformer.wte" , # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
14
"transformer.word_embeddings" , # falcon
15
15
"word_embeddings" , # bloom
16
- "model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
16
+ "model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 exaone4
17
17
"tok_embeddings" , # llama-pth
18
18
"embeddings.word_embeddings" , # bert nomic-bert
19
19
"language_model.embedding.word_embeddings" , # persimmon
@@ -62,7 +62,7 @@ class TensorNameMap:
62
62
# Output
63
63
MODEL_TENSOR .OUTPUT : (
64
64
"embed_out" , # gptneox
65
- "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
65
+ "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone exaone4 olmoe olmo2 phimoe
66
66
"output" , # llama-pth bloom internlm2
67
67
"word_embeddings_for_head" , # persimmon
68
68
"lm_head.linear" , # phi2
@@ -76,7 +76,7 @@ class TensorNameMap:
76
76
MODEL_TENSOR .OUTPUT_NORM : (
77
77
"gpt_neox.final_layer_norm" , # gptneox
78
78
"transformer.ln_f" , # gpt2 gpt-j falcon jais exaone
79
- "model.norm" , # llama-hf baichuan internlm2 olmoe olmo2 phimoe
79
+ "model.norm" , # llama-hf baichuan internlm2 olmoe olmo2 phimoe exaone4
80
80
"norm" , # llama-pth
81
81
"transformer.norm_f" , # mpt dbrx
82
82
"ln_f" , # refact bloom qwen gpt2
@@ -168,7 +168,7 @@ class TensorNameMap:
168
168
169
169
# Attention query
170
170
MODEL_TENSOR .ATTN_Q : (
171
- "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe
171
+ "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
172
172
"model.layers.{bid}.self_attn.q_proj_no_perm" , # llama-custom
173
173
"layers.{bid}.attention.wq" , # llama-pth
174
174
"encoder.layer.{bid}.attention.self.query" , # bert
@@ -183,7 +183,7 @@ class TensorNameMap:
183
183
184
184
# Attention key
185
185
MODEL_TENSOR .ATTN_K : (
186
- "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe
186
+ "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
187
187
"model.layers.{bid}.self_attn.k_proj_no_perm" , # llama-custom
188
188
"layers.{bid}.attention.wk" , # llama-pth
189
189
"encoder.layer.{bid}.attention.self.key" , # bert
@@ -199,7 +199,7 @@ class TensorNameMap:
199
199
200
200
# Attention value
201
201
MODEL_TENSOR .ATTN_V : (
202
- "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe
202
+ "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
203
203
"layers.{bid}.attention.wv" , # llama-pth
204
204
"encoder.layer.{bid}.attention.self.value" , # bert
205
205
"transformer.layer.{bid}.attention.v_lin" , # distillbert
@@ -219,7 +219,7 @@ class TensorNameMap:
219
219
"transformer.blocks.{bid}.attn.out_proj" , # mpt
220
220
"transformer.h.{bid}.self_attention.dense" , # falcon
221
221
"h.{bid}.self_attention.dense" , # bloom
222
- "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe
222
+ "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
223
223
"model.layers.{bid}.self_attn.linear_attn" , # deci
224
224
"layers.{bid}.attention.wo" , # llama-pth
225
225
"encoder.layer.{bid}.attention.output.dense" , # bert
@@ -252,7 +252,7 @@ class TensorNameMap:
252
252
),
253
253
254
254
MODEL_TENSOR .ATTN_POST_NORM : (
255
- "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 # ge
255
+ "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 exaone4 # ge
256
256
"model.layers.{bid}.post_self_attn_layernorm" , # glm-4-0414
257
257
),
258
258
@@ -290,7 +290,7 @@ class TensorNameMap:
290
290
291
291
# Post feed-forward norm
292
292
MODEL_TENSOR .FFN_POST_NORM : (
293
- "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2
293
+ "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2 exaone4
294
294
"model.layers.{bid}.post_mlp_layernorm" , # glm-4-0414
295
295
),
296
296
@@ -321,7 +321,7 @@ class TensorNameMap:
321
321
"transformer.blocks.{bid}.ffn.up_proj" , # mpt
322
322
"transformer.h.{bid}.mlp.dense_h_to_4h" , # falcon
323
323
"h.{bid}.mlp.dense_h_to_4h" , # bloom
324
- "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2
324
+ "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2 exaone4
325
325
"layers.{bid}.feed_forward.w3" , # llama-pth
326
326
"encoder.layer.{bid}.intermediate.dense" , # bert
327
327
"transformer.layer.{bid}.ffn.lin1" , # distillbert
@@ -373,7 +373,7 @@ class TensorNameMap:
373
373
374
374
# Feed-forward gate
375
375
MODEL_TENSOR .FFN_GATE : (
376
- "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
376
+ "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2 exaone4
377
377
"layers.{bid}.feed_forward.w1" , # llama-pth
378
378
"transformer.h.{bid}.mlp.w2" , # qwen
379
379
"transformer.h.{bid}.mlp.c_fc2" , # jais
@@ -410,7 +410,7 @@ class TensorNameMap:
410
410
"transformer.blocks.{bid}.ffn.down_proj" , # mpt
411
411
"transformer.h.{bid}.mlp.dense_4h_to_h" , # falcon
412
412
"h.{bid}.mlp.dense_4h_to_h" , # bloom
413
- "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2
413
+ "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2 exaone4
414
414
"layers.{bid}.feed_forward.w2" , # llama-pth
415
415
"encoder.layer.{bid}.output.dense" , # bert
416
416
"transformer.layer.{bid}.ffn.lin2" , # distillbert
@@ -457,7 +457,7 @@ class TensorNameMap:
457
457
"language_model.encoder.layers.{bid}.self_attention.q_layernorm" ,
458
458
"model.layers.{bid}.self_attn.q_layernorm" , # persimmon
459
459
"model.layers.{bid}.self_attn.query_layernorm" , # hunyuan
460
- "model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2
460
+ "model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2 exaone4
461
461
"transformer.blocks.{bid}.attn.q_ln" , # sea-lion
462
462
"encoder.layer.{bid}.attention.self.layer_norm_q" , # jina-bert-v2
463
463
"transformer.layers.{bid}.attn.q_norm" , # openelm
@@ -467,7 +467,7 @@ class TensorNameMap:
467
467
"language_model.encoder.layers.{bid}.self_attention.k_layernorm" ,
468
468
"model.layers.{bid}.self_attn.k_layernorm" , # persimmon
469
469
"model.layers.{bid}.self_attn.key_layernorm" , # hunyuan
470
- "model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2
470
+ "model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2 exaone4
471
471
"transformer.blocks.{bid}.attn.k_ln" , # sea-lion
472
472
"encoder.layer.{bid}.attention.self.layer_norm_k" , # jina-bert-v2
473
473
"transformer.layers.{bid}.attn.k_norm" , # openelm
0 commit comments