@@ -304,9 +304,8 @@ class TensorNameMap:
304
304
"model.layers.{bid}.mlp.gate" , # qwen2moe olmoe
305
305
"transformer.decoder_layer.{bid}.router" , # Grok
306
306
"transformer.blocks.{bid}.ffn.router.layer" , # dbrx
307
- "model.layers.{bid}.feed_forward.router" , # jamba
308
307
"model.layers.{bid}.block_sparse_moe.router.layer" , # granitemoe
309
- "model.layers.{bid}.feed_forward.router" , # llama4
308
+ "model.layers.{bid}.feed_forward.router" , # llama4 jamba
310
309
"encoder.layers.{bid}.mlp.router.layer" , # nomic-bert-moe
311
310
"model.layers.{bid}.mlp.gate.wg" , # hunyuan
312
311
),
@@ -348,10 +347,9 @@ class TensorNameMap:
348
347
"encoder.layer.{bid}.mlp.gated_layers" , # jina-bert-v2 (GEGLU)
349
348
"encoder.layer.{bid}.mlp.up_gated_layer" , # jina-v2-code (GEGLU)
350
349
"model.layers.{bid}.residual_mlp.w3" , # arctic
351
- "model.layers.{bid}.feed_forward.up_proj" , # jamba
352
350
"encoder.layers.{bid}.mlp.dense_h_to_4h" , # chatglm
353
351
"transformer.h.{bid}.mlp.c_fc_1" , # exaone
354
- "model.layers.{bid}.feed_forward.up_proj" , # llama4
352
+ "model.layers.{bid}.feed_forward.up_proj" , # llama4 jamba
355
353
"transformer_encoder.{bid}.ffn.w12" , # neobert
356
354
),
357
355
@@ -390,9 +388,8 @@ class TensorNameMap:
390
388
"encoder.layer.{bid}.mlp.gated_layers_w" , # jina-bert-v2 (split up/gate, no longer used)
391
389
"transformer.h.{bid}.mlp.linear_1" , # refact
392
390
"model.layers.{bid}.residual_mlp.w1" , # arctic
393
- "model.layers.{bid}.feed_forward.gate_proj" , # jamba
394
391
"transformer.h.{bid}.mlp.c_fc_0" , # exaone
395
- "model.layers.{bid}.feed_forward.gate_proj" , # llama4
392
+ "model.layers.{bid}.feed_forward.gate_proj" , # llama4 jamba
396
393
),
397
394
398
395
MODEL_TENSOR .FFN_GATE_EXP : (
@@ -436,10 +433,9 @@ class TensorNameMap:
436
433
"transformer.layers.{bid}.ffn.proj_2" , # openelm
437
434
"model.layers.{bid}.residual_mlp.w2" , # arctic
438
435
"encoder.layer.{bid}.mlp.down_layer" , # jina-bert-v2
439
- "model.layers.{bid}.feed_forward.down_proj" , # jamba
440
436
"encoder.layers.{bid}.mlp.dense_4h_to_h" , # chatglm
441
437
"model.layers.h.{bid}.mlp.c_proj" , # exaone
442
- "model.layers.{bid}.feed_forward.down_proj" , # llama4
438
+ "model.layers.{bid}.feed_forward.down_proj" , # llama4 jamba
443
439
"transformer_encoder.{bid}.ffn.w3" , # neobert
444
440
),
445
441
0 commit comments