Skip to content

Commit 6ddff4d

Browse files
committed
talk-llama : sync llama.cpp
ggml-ci
1 parent 6d64e4a commit 6ddff4d

24 files changed

+3289
-1148
lines changed

examples/talk-llama/llama-arch.cpp

Lines changed: 192 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
4545
{ LLM_ARCH_GEMMA3N, "gemma3n" },
4646
{ LLM_ARCH_STARCODER2, "starcoder2" },
4747
{ LLM_ARCH_MAMBA, "mamba" },
48+
{ LLM_ARCH_MAMBA2, "mamba2" },
49+
{ LLM_ARCH_JAMBA, "jamba" },
50+
{ LLM_ARCH_FALCON_H1, "falcon-h1" },
4851
{ LLM_ARCH_XVERSE, "xverse" },
4952
{ LLM_ARCH_COMMAND_R, "command-r" },
5053
{ LLM_ARCH_COHERE2, "cohere2" },
@@ -70,13 +73,17 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
7073
{ LLM_ARCH_ARWKV7, "arwkv7" },
7174
{ LLM_ARCH_GRANITE, "granite" },
7275
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
76+
{ LLM_ARCH_GRANITE_HYBRID, "granitehybrid" },
7377
{ LLM_ARCH_CHAMELEON, "chameleon" },
7478
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
7579
{ LLM_ARCH_PLM, "plm" },
7680
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
7781
{ LLM_ARCH_DOTS1, "dots1" },
7882
{ LLM_ARCH_ARCEE, "arcee" },
7983
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
84+
{ LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
85+
{ LLM_ARCH_SMOLLM3, "smollm3" },
86+
{ LLM_ARCH_LFM2, "lfm2" },
8087
{ LLM_ARCH_UNKNOWN, "(unknown)" },
8188
};
8289

@@ -149,7 +156,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
149156
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
150157
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
151158
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
152-
{ LLM_KV_ATTENTION_LAYER_INDICES, "%s.attention.layer_indices" },
153159

154160
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
155161
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
@@ -170,6 +176,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
170176
{ LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" },
171177
{ LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" },
172178
{ LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
179+
{ LLM_KV_SSM_GROUP_COUNT, "%s.ssm.group_count" },
173180
{ LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" },
174181

175182
{ LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
@@ -182,6 +189,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
182189

183190
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
184191

192+
{ LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
193+
185194
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
186195
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
187196
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
@@ -1004,6 +1013,77 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
10041013
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
10051014
},
10061015
},
1016+
{
1017+
LLM_ARCH_MAMBA2,
1018+
{
1019+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1020+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1021+
{ LLM_TENSOR_OUTPUT, "output" },
1022+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1023+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1024+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1025+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1026+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1027+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1028+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1029+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1030+
},
1031+
},
1032+
{
1033+
LLM_ARCH_JAMBA,
1034+
{
1035+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1036+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1037+
{ LLM_TENSOR_OUTPUT, "output" },
1038+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1039+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1040+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1041+
{ LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
1042+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1043+
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
1044+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1045+
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
1046+
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
1047+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1048+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1049+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1050+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1051+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1052+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1053+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1054+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1055+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1056+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1057+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1058+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1059+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1060+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1061+
},
1062+
},
1063+
{
1064+
LLM_ARCH_FALCON_H1,
1065+
{
1066+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1067+
{ LLM_TENSOR_OUTPUT, "output" },
1068+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1069+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1070+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1071+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1072+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1073+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1074+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1075+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1076+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1077+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1078+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1079+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1080+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1081+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1082+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1083+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1084+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1085+
},
1086+
},
10071087
{
10081088
LLM_ARCH_XVERSE,
10091089
{
@@ -1564,6 +1644,43 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
15641644
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
15651645
},
15661646
},
1647+
{
1648+
LLM_ARCH_GRANITE_HYBRID,
1649+
{
1650+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1651+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1652+
{ LLM_TENSOR_OUTPUT, "output" },
1653+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1654+
// mamba(2) ssm layers
1655+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
1656+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
1657+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
1658+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
1659+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
1660+
{ LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" },
1661+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
1662+
// attention layers
1663+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1664+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1665+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1666+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1667+
// dense FFN
1668+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1669+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1670+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1671+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1672+
// moe FFN
1673+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1674+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1675+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1676+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1677+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1678+
// shared expert
1679+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1680+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1681+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1682+
},
1683+
},
15671684
{
15681685
LLM_ARCH_CHAMELEON,
15691686
{
@@ -1676,6 +1793,67 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
16761793
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
16771794
},
16781795
},
1796+
{
1797+
LLM_ARCH_HUNYUAN_MOE,
1798+
{
1799+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1800+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1801+
{ LLM_TENSOR_OUTPUT, "output" },
1802+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1803+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1804+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1805+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1806+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1807+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1808+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1809+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1810+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1811+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1812+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1813+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1814+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1815+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1816+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1817+
},
1818+
},
1819+
{
1820+
LLM_ARCH_SMOLLM3,
1821+
{
1822+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1823+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1824+
{ LLM_TENSOR_OUTPUT, "output" },
1825+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1826+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1827+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1828+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1829+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1830+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1831+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1832+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1833+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1834+
},
1835+
},
1836+
{
1837+
LLM_ARCH_LFM2,
1838+
{
1839+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1840+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1841+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1842+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1843+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1844+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1845+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1846+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1847+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1848+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1849+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1850+
{ LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" },
1851+
{ LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
1852+
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
1853+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1854+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1855+
}
1856+
},
16791857
{
16801858
LLM_ARCH_UNKNOWN,
16811859
{
@@ -1760,7 +1938,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
17601938
{LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
17611939
{LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
17621940
{LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
1941+
{LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1942+
{LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1943+
{LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
17631944
{LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1945+
{LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
17641946
{LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
17651947
{LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
17661948
{LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
@@ -1839,6 +2021,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
18392021
{LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
18402022
{LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
18412023
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2024+
{LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2025+
{LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2026+
{LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
18422027
};
18432028

18442029
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
@@ -1894,6 +2079,7 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
18942079
bool llm_arch_is_recurrent(const llm_arch & arch) {
18952080
switch (arch) {
18962081
case LLM_ARCH_MAMBA:
2082+
case LLM_ARCH_MAMBA2:
18972083
case LLM_ARCH_RWKV6:
18982084
case LLM_ARCH_RWKV6QWEN2:
18992085
case LLM_ARCH_RWKV7:
@@ -1905,9 +2091,12 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
19052091
}
19062092

19072093
bool llm_arch_is_hybrid(const llm_arch & arch) {
1908-
// TODO: There are currently no hybrid models! Once there are, this will be
1909-
// the place to identify them
19102094
switch (arch) {
2095+
case LLM_ARCH_JAMBA:
2096+
case LLM_ARCH_FALCON_H1:
2097+
case LLM_ARCH_GRANITE_HYBRID:
2098+
case LLM_ARCH_LFM2:
2099+
return true;
19112100
default:
19122101
return false;
19132102
}

examples/talk-llama/llama-arch.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ enum llm_arch {
4949
LLM_ARCH_GEMMA3N,
5050
LLM_ARCH_STARCODER2,
5151
LLM_ARCH_MAMBA,
52+
LLM_ARCH_MAMBA2,
53+
LLM_ARCH_JAMBA,
54+
LLM_ARCH_FALCON_H1,
5255
LLM_ARCH_XVERSE,
5356
LLM_ARCH_COMMAND_R,
5457
LLM_ARCH_COHERE2,
@@ -74,13 +77,17 @@ enum llm_arch {
7477
LLM_ARCH_ARWKV7,
7578
LLM_ARCH_GRANITE,
7679
LLM_ARCH_GRANITE_MOE,
80+
LLM_ARCH_GRANITE_HYBRID,
7781
LLM_ARCH_CHAMELEON,
7882
LLM_ARCH_WAVTOKENIZER_DEC,
7983
LLM_ARCH_PLM,
8084
LLM_ARCH_BAILINGMOE,
8185
LLM_ARCH_DOTS1,
8286
LLM_ARCH_ARCEE,
8387
LLM_ARCH_ERNIE4_5,
88+
LLM_ARCH_HUNYUAN_MOE,
89+
LLM_ARCH_SMOLLM3,
90+
LLM_ARCH_LFM2,
8491
LLM_ARCH_UNKNOWN,
8592
};
8693

@@ -153,7 +160,6 @@ enum llm_kv {
153160
LLM_KV_ATTENTION_SCALE,
154161
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
155162
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
156-
LLM_KV_ATTENTION_LAYER_INDICES,
157163

158164
LLM_KV_ROPE_DIMENSION_COUNT,
159165
LLM_KV_ROPE_DIMENSION_SECTIONS,
@@ -174,6 +180,7 @@ enum llm_kv {
174180
LLM_KV_SSM_CONV_KERNEL,
175181
LLM_KV_SSM_STATE_SIZE,
176182
LLM_KV_SSM_TIME_STEP_RANK,
183+
LLM_KV_SSM_GROUP_COUNT,
177184
LLM_KV_SSM_DT_B_C_RMS,
178185

179186
LLM_KV_WKV_HEAD_SIZE,
@@ -221,6 +228,8 @@ enum llm_kv {
221228

222229
LLM_KV_CLASSIFIER_OUTPUT_LABELS,
223230

231+
LLM_KV_SHORTCONV_L_CACHE,
232+
224233
// deprecated:
225234
LLM_KV_TOKENIZER_PREFIX_ID,
226235
LLM_KV_TOKENIZER_SUFFIX_ID,
@@ -291,8 +300,12 @@ enum llm_tensor {
291300
LLM_TENSOR_SSM_CONV1D,
292301
LLM_TENSOR_SSM_X,
293302
LLM_TENSOR_SSM_DT,
303+
LLM_TENSOR_SSM_DT_NORM,
294304
LLM_TENSOR_SSM_A,
305+
LLM_TENSOR_SSM_B_NORM,
306+
LLM_TENSOR_SSM_C_NORM,
295307
LLM_TENSOR_SSM_D,
308+
LLM_TENSOR_SSM_NORM,
296309
LLM_TENSOR_SSM_OUT,
297310
LLM_TENSOR_TIME_MIX_W0,
298311
LLM_TENSOR_TIME_MIX_W1,
@@ -386,6 +399,9 @@ enum llm_tensor {
386399
LLM_TENSOR_POS_NET_ATTN_K,
387400
LLM_TENSOR_POS_NET_ATTN_V,
388401
LLM_TENSOR_POS_NET_ATTN_OUT,
402+
LLM_TENSOR_SHORTCONV_CONV,
403+
LLM_TENSOR_SHORTCONV_INPROJ,
404+
LLM_TENSOR_SHORTCONV_OUTPROJ,
389405
};
390406

391407
enum llm_tensor_layer {

0 commit comments

Comments
 (0)