@@ -45,6 +45,9 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
45
45
{ LLM_ARCH_GEMMA3N, " gemma3n" },
46
46
{ LLM_ARCH_STARCODER2, " starcoder2" },
47
47
{ LLM_ARCH_MAMBA, " mamba" },
48
+ { LLM_ARCH_MAMBA2, " mamba2" },
49
+ { LLM_ARCH_JAMBA, " jamba" },
50
+ { LLM_ARCH_FALCON_H1, " falcon-h1" },
48
51
{ LLM_ARCH_XVERSE, " xverse" },
49
52
{ LLM_ARCH_COMMAND_R, " command-r" },
50
53
{ LLM_ARCH_COHERE2, " cohere2" },
@@ -70,13 +73,17 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
70
73
{ LLM_ARCH_ARWKV7, " arwkv7" },
71
74
{ LLM_ARCH_GRANITE, " granite" },
72
75
{ LLM_ARCH_GRANITE_MOE, " granitemoe" },
76
+ { LLM_ARCH_GRANITE_HYBRID, " granitehybrid" },
73
77
{ LLM_ARCH_CHAMELEON, " chameleon" },
74
78
{ LLM_ARCH_WAVTOKENIZER_DEC, " wavtokenizer-dec" },
75
79
{ LLM_ARCH_PLM, " plm" },
76
80
{ LLM_ARCH_BAILINGMOE, " bailingmoe" },
77
81
{ LLM_ARCH_DOTS1, " dots1" },
78
82
{ LLM_ARCH_ARCEE, " arcee" },
79
83
{ LLM_ARCH_ERNIE4_5, " ernie4_5" },
84
+ { LLM_ARCH_HUNYUAN_MOE, " hunyuan-moe" },
85
+ { LLM_ARCH_SMOLLM3, " smollm3" },
86
+ { LLM_ARCH_LFM2, " lfm2" },
80
87
{ LLM_ARCH_UNKNOWN, " (unknown)" },
81
88
};
82
89
@@ -149,7 +156,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
149
156
{ LLM_KV_ATTENTION_SCALE, " %s.attention.scale" },
150
157
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, " %s.attention.key_length_mla" },
151
158
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, " %s.attention.value_length_mla" },
152
- { LLM_KV_ATTENTION_LAYER_INDICES, " %s.attention.layer_indices" },
153
159
154
160
{ LLM_KV_ROPE_DIMENSION_COUNT, " %s.rope.dimension_count" },
155
161
{ LLM_KV_ROPE_DIMENSION_SECTIONS, " %s.rope.dimension_sections" },
@@ -170,6 +176,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
170
176
{ LLM_KV_SSM_INNER_SIZE, " %s.ssm.inner_size" },
171
177
{ LLM_KV_SSM_STATE_SIZE, " %s.ssm.state_size" },
172
178
{ LLM_KV_SSM_TIME_STEP_RANK, " %s.ssm.time_step_rank" },
179
+ { LLM_KV_SSM_GROUP_COUNT, " %s.ssm.group_count" },
173
180
{ LLM_KV_SSM_DT_B_C_RMS, " %s.ssm.dt_b_c_rms" },
174
181
175
182
{ LLM_KV_WKV_HEAD_SIZE, " %s.wkv.head_size" },
@@ -182,6 +189,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
182
189
183
190
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, " %s.classifier.output_labels" },
184
191
192
+ { LLM_KV_SHORTCONV_L_CACHE, " %s.shortconv.l_cache" },
193
+
185
194
{ LLM_KV_TOKENIZER_MODEL, " tokenizer.ggml.model" },
186
195
{ LLM_KV_TOKENIZER_PRE, " tokenizer.ggml.pre" },
187
196
{ LLM_KV_TOKENIZER_LIST, " tokenizer.ggml.tokens" },
@@ -1004,6 +1013,77 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1004
1013
{ LLM_TENSOR_SSM_OUT, " blk.%d.ssm_out" },
1005
1014
},
1006
1015
},
1016
+ {
1017
+ LLM_ARCH_MAMBA2,
1018
+ {
1019
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1020
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1021
+ { LLM_TENSOR_OUTPUT, " output" },
1022
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1023
+ { LLM_TENSOR_SSM_IN, " blk.%d.ssm_in" },
1024
+ { LLM_TENSOR_SSM_CONV1D, " blk.%d.ssm_conv1d" },
1025
+ { LLM_TENSOR_SSM_DT, " blk.%d.ssm_dt" },
1026
+ { LLM_TENSOR_SSM_A, " blk.%d.ssm_a" },
1027
+ { LLM_TENSOR_SSM_D, " blk.%d.ssm_d" },
1028
+ { LLM_TENSOR_SSM_NORM, " blk.%d.ssm_norm" },
1029
+ { LLM_TENSOR_SSM_OUT, " blk.%d.ssm_out" },
1030
+ },
1031
+ },
1032
+ {
1033
+ LLM_ARCH_JAMBA,
1034
+ {
1035
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1036
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1037
+ { LLM_TENSOR_OUTPUT, " output" },
1038
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1039
+ { LLM_TENSOR_SSM_IN, " blk.%d.ssm_in" },
1040
+ { LLM_TENSOR_SSM_CONV1D, " blk.%d.ssm_conv1d" },
1041
+ { LLM_TENSOR_SSM_X, " blk.%d.ssm_x" },
1042
+ { LLM_TENSOR_SSM_DT, " blk.%d.ssm_dt" },
1043
+ { LLM_TENSOR_SSM_DT_NORM, " blk.%d.ssm_dt_norm" },
1044
+ { LLM_TENSOR_SSM_A, " blk.%d.ssm_a" },
1045
+ { LLM_TENSOR_SSM_B_NORM, " blk.%d.ssm_b_norm" },
1046
+ { LLM_TENSOR_SSM_C_NORM, " blk.%d.ssm_c_norm" },
1047
+ { LLM_TENSOR_SSM_D, " blk.%d.ssm_d" },
1048
+ { LLM_TENSOR_SSM_OUT, " blk.%d.ssm_out" },
1049
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1050
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1051
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1052
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1053
+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
1054
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1055
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
1056
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
1057
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1058
+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
1059
+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
1060
+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
1061
+ },
1062
+ },
1063
+ {
1064
+ LLM_ARCH_FALCON_H1,
1065
+ {
1066
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1067
+ { LLM_TENSOR_OUTPUT, " output" },
1068
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1069
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1070
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1071
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1072
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1073
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1074
+ { LLM_TENSOR_SSM_IN, " blk.%d.ssm_in" },
1075
+ { LLM_TENSOR_SSM_CONV1D, " blk.%d.ssm_conv1d" },
1076
+ { LLM_TENSOR_SSM_DT, " blk.%d.ssm_dt" },
1077
+ { LLM_TENSOR_SSM_A, " blk.%d.ssm_a" },
1078
+ { LLM_TENSOR_SSM_D, " blk.%d.ssm_d" },
1079
+ { LLM_TENSOR_SSM_NORM, " blk.%d.ssm_norm" },
1080
+ { LLM_TENSOR_SSM_OUT, " blk.%d.ssm_out" },
1081
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1082
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
1083
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
1084
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1085
+ },
1086
+ },
1007
1087
{
1008
1088
LLM_ARCH_XVERSE,
1009
1089
{
@@ -1564,6 +1644,43 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1564
1644
{ LLM_TENSOR_FFN_UP_SHEXP, " blk.%d.ffn_up_shexp" },
1565
1645
},
1566
1646
},
1647
+ {
1648
+ LLM_ARCH_GRANITE_HYBRID,
1649
+ {
1650
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1651
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1652
+ { LLM_TENSOR_OUTPUT, " output" },
1653
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1654
+ // mamba(2) ssm layers
1655
+ { LLM_TENSOR_SSM_IN, " blk.%d.ssm_in" },
1656
+ { LLM_TENSOR_SSM_CONV1D, " blk.%d.ssm_conv1d" },
1657
+ { LLM_TENSOR_SSM_DT, " blk.%d.ssm_dt" },
1658
+ { LLM_TENSOR_SSM_A, " blk.%d.ssm_a" },
1659
+ { LLM_TENSOR_SSM_D, " blk.%d.ssm_d" },
1660
+ { LLM_TENSOR_SSM_NORM, " blk.%d.ssm_norm" },
1661
+ { LLM_TENSOR_SSM_OUT, " blk.%d.ssm_out" },
1662
+ // attention layers
1663
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1664
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1665
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1666
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1667
+ // dense FFN
1668
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1669
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
1670
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
1671
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1672
+ // moe FFN
1673
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1674
+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
1675
+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
1676
+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
1677
+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
1678
+ // shared expert
1679
+ { LLM_TENSOR_FFN_GATE_SHEXP, " blk.%d.ffn_gate_shexp" },
1680
+ { LLM_TENSOR_FFN_DOWN_SHEXP, " blk.%d.ffn_down_shexp" },
1681
+ { LLM_TENSOR_FFN_UP_SHEXP, " blk.%d.ffn_up_shexp" },
1682
+ },
1683
+ },
1567
1684
{
1568
1685
LLM_ARCH_CHAMELEON,
1569
1686
{
@@ -1676,6 +1793,67 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1676
1793
{ LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1677
1794
},
1678
1795
},
1796
+ {
1797
+ LLM_ARCH_HUNYUAN_MOE,
1798
+ {
1799
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1800
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1801
+ { LLM_TENSOR_OUTPUT, " output" },
1802
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1803
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1804
+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
1805
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1806
+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
1807
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1808
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1809
+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
1810
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1811
+ { LLM_TENSOR_FFN_GATE_SHEXP, " blk.%d.ffn_gate_shexp" },
1812
+ { LLM_TENSOR_FFN_DOWN_SHEXP, " blk.%d.ffn_down_shexp" },
1813
+ { LLM_TENSOR_FFN_UP_SHEXP, " blk.%d.ffn_up_shexp" },
1814
+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
1815
+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
1816
+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
1817
+ },
1818
+ },
1819
+ {
1820
+ LLM_ARCH_SMOLLM3,
1821
+ {
1822
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1823
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1824
+ { LLM_TENSOR_OUTPUT, " output" },
1825
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1826
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1827
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1828
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1829
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1830
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1831
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
1832
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
1833
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1834
+ },
1835
+ },
1836
+ {
1837
+ LLM_ARCH_LFM2,
1838
+ {
1839
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1840
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1841
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1842
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1843
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1844
+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
1845
+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
1846
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
1847
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
1848
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1849
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1850
+ { LLM_TENSOR_SHORTCONV_CONV, " blk.%d.shortconv.conv" },
1851
+ { LLM_TENSOR_SHORTCONV_INPROJ, " blk.%d.shortconv.in_proj" },
1852
+ { LLM_TENSOR_SHORTCONV_OUTPROJ, " blk.%d.shortconv.out_proj" },
1853
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1854
+ { LLM_TENSOR_TOKEN_EMBD_NORM, " token_embd_norm" },
1855
+ }
1856
+ },
1679
1857
{
1680
1858
LLM_ARCH_UNKNOWN,
1681
1859
{
@@ -1760,7 +1938,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1760
1938
{LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
1761
1939
{LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
1762
1940
{LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
1941
+ {LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1942
+ {LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1943
+ {LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1763
1944
{LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1945
+ {LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1764
1946
{LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1765
1947
{LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1766
1948
{LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
@@ -1839,6 +2021,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1839
2021
{LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1840
2022
{LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1841
2023
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2024
+ {LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2025
+ {LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2026
+ {LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1842
2027
};
1843
2028
1844
2029
LLM_KV::LLM_KV (llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
@@ -1894,6 +2079,7 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
1894
2079
bool llm_arch_is_recurrent (const llm_arch & arch) {
1895
2080
switch (arch) {
1896
2081
case LLM_ARCH_MAMBA:
2082
+ case LLM_ARCH_MAMBA2:
1897
2083
case LLM_ARCH_RWKV6:
1898
2084
case LLM_ARCH_RWKV6QWEN2:
1899
2085
case LLM_ARCH_RWKV7:
@@ -1905,9 +2091,12 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
1905
2091
}
1906
2092
1907
2093
bool llm_arch_is_hybrid (const llm_arch & arch) {
1908
- // TODO: There are currently no hybrid models! Once there are, this will be
1909
- // the place to identify them
1910
2094
switch (arch) {
2095
+ case LLM_ARCH_JAMBA:
2096
+ case LLM_ARCH_FALCON_H1:
2097
+ case LLM_ARCH_GRANITE_HYBRID:
2098
+ case LLM_ARCH_LFM2:
2099
+ return true ;
1911
2100
default :
1912
2101
return false ;
1913
2102
}
0 commit comments