@@ -287,6 +287,7 @@ class MODEL_ARCH(IntEnum):
287
287
CHAMELEON = auto()
288
288
WAVTOKENIZER_DEC = auto()
289
289
PLM = auto()
290
+ BAILINGMOE = auto()
290
291
291
292
292
293
class MODEL_TENSOR(IntEnum):
@@ -490,6 +491,7 @@ class MODEL_TENSOR(IntEnum):
490
491
MODEL_ARCH.CHAMELEON: "chameleon",
491
492
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
492
493
MODEL_ARCH.PLM: "plm",
494
+ MODEL_ARCH.BAILINGMOE: "bailingmoe",
493
495
}
494
496
495
497
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -1667,6 +1669,28 @@ class MODEL_TENSOR(IntEnum):
1667
1669
MODEL_TENSOR.POSNET_ATTN_V,
1668
1670
MODEL_TENSOR.POSNET_ATTN_OUT,
1669
1671
],
1672
+ MODEL_ARCH.BAILINGMOE: [
1673
+ MODEL_TENSOR.TOKEN_EMBD,
1674
+ MODEL_TENSOR.OUTPUT_NORM,
1675
+ MODEL_TENSOR.OUTPUT,
1676
+ MODEL_TENSOR.ROPE_FREQS,
1677
+ MODEL_TENSOR.ATTN_NORM,
1678
+ MODEL_TENSOR.ATTN_Q,
1679
+ MODEL_TENSOR.ATTN_K,
1680
+ MODEL_TENSOR.ATTN_V,
1681
+ MODEL_TENSOR.ATTN_OUT,
1682
+ MODEL_TENSOR.FFN_GATE_INP,
1683
+ MODEL_TENSOR.FFN_NORM,
1684
+ MODEL_TENSOR.FFN_GATE,
1685
+ MODEL_TENSOR.FFN_DOWN,
1686
+ MODEL_TENSOR.FFN_UP,
1687
+ MODEL_TENSOR.FFN_GATE_EXP,
1688
+ MODEL_TENSOR.FFN_DOWN_EXP,
1689
+ MODEL_TENSOR.FFN_UP_EXP,
1690
+ MODEL_TENSOR.FFN_GATE_SHEXP,
1691
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
1692
+ MODEL_TENSOR.FFN_UP_SHEXP,
1693
+ ],
1670
1694
# TODO
1671
1695
}
1672
1696
@@ -1719,6 +1743,9 @@ class MODEL_TENSOR(IntEnum):
1719
1743
MODEL_TENSOR.ROPE_FREQS,
1720
1744
MODEL_TENSOR.ATTN_ROT_EMBD,
1721
1745
],
1746
+ MODEL_ARCH.BAILINGMOE: [
1747
+ MODEL_TENSOR.ROPE_FREQS,
1748
+ ],
1722
1749
}
1723
1750
1724
1751
#
0 commit comments