Skip to content

Commit 4d6a179

Browse files
committed
gguf-py : avoid adding duplicate tensor mappings for Jamba
Some of the tensor names are common with Llama4
1 parent 452207f commit 4d6a179

File tree

1 file changed

+4
-8
lines changed

1 file changed

+4
-8
lines changed

gguf-py/gguf/tensor_mapping.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,8 @@ class TensorNameMap:
304304
"model.layers.{bid}.mlp.gate", # qwen2moe olmoe
305305
"transformer.decoder_layer.{bid}.router", # Grok
306306
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
307-
"model.layers.{bid}.feed_forward.router", # jamba
308307
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
309-
"model.layers.{bid}.feed_forward.router", # llama4
308+
"model.layers.{bid}.feed_forward.router", # llama4 jamba
310309
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
311310
"model.layers.{bid}.mlp.gate.wg", # hunyuan
312311
),
@@ -348,10 +347,9 @@ class TensorNameMap:
348347
"encoder.layer.{bid}.mlp.gated_layers", # jina-bert-v2 (GEGLU)
349348
"encoder.layer.{bid}.mlp.up_gated_layer", # jina-v2-code (GEGLU)
350349
"model.layers.{bid}.residual_mlp.w3", # arctic
351-
"model.layers.{bid}.feed_forward.up_proj", # jamba
352350
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
353351
"transformer.h.{bid}.mlp.c_fc_1", # exaone
354-
"model.layers.{bid}.feed_forward.up_proj", # llama4
352+
"model.layers.{bid}.feed_forward.up_proj", # llama4 jamba
355353
"transformer_encoder.{bid}.ffn.w12", # neobert
356354
),
357355

@@ -390,9 +388,8 @@ class TensorNameMap:
390388
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
391389
"transformer.h.{bid}.mlp.linear_1", # refact
392390
"model.layers.{bid}.residual_mlp.w1", # arctic
393-
"model.layers.{bid}.feed_forward.gate_proj", # jamba
394391
"transformer.h.{bid}.mlp.c_fc_0", # exaone
395-
"model.layers.{bid}.feed_forward.gate_proj", # llama4
392+
"model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba
396393
),
397394

398395
MODEL_TENSOR.FFN_GATE_EXP: (
@@ -436,10 +433,9 @@ class TensorNameMap:
436433
"transformer.layers.{bid}.ffn.proj_2", # openelm
437434
"model.layers.{bid}.residual_mlp.w2", # arctic
438435
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
439-
"model.layers.{bid}.feed_forward.down_proj", # jamba
440436
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
441437
"model.layers.h.{bid}.mlp.c_proj", # exaone
442-
"model.layers.{bid}.feed_forward.down_proj", # llama4
438+
"model.layers.{bid}.feed_forward.down_proj", # llama4 jamba
443439
"transformer_encoder.{bid}.ffn.w3", # neobert
444440
),
445441

0 commit comments

Comments
 (0)