Skip to content

Commit 4a4f426

Browse files
gabriellarsonCISC
andauthored
model : add Kimi-K2 support (ggml-org#14654)
* Kimi-K2 conversion * add Kimi_K2 pre type * Kimi-K2 * Kimi-K2 unicode * Kimi-K2 * LLAMA_MAX_EXPERTS 384 * fix vocab iteration * regex space fix * add kimi-k2 to pre_computed_hashes * Updated with kimi-k2 get_vocab_base_pre hash * fix whitespaces * fix flake errors * remove more unicode.cpp whitespaces * change set_vocab() flow * add moonshotai-Kimi-K2.jinja to /models/templates/ * update moonshotai-Kimi-K2.jinja * add kimi-k2 chat template * add kimi-k2 * update NotImplementedError Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * except Exception Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * LLM_CHAT_TEMPLATE_KIMI_K2 if(add_ass){} --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
1 parent ba1ceb3 commit 4a4f426

File tree

10 files changed

+345
-2
lines changed

10 files changed

+345
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
840840
if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51":
841841
# ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer
842842
res = "lfm2"
843+
if chkhsh == "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890":
844+
# ref: https://huggingface.co/moonshotai/Kimi-K2-Base
845+
res = "kimi-k2"
843846

844847
if res is None:
845848
logger.warning("\n")
@@ -5739,7 +5742,58 @@ class DeepseekV2Model(TextModel):
57395742
model_arch = gguf.MODEL_ARCH.DEEPSEEK2
57405743

57415744
def set_vocab(self):
5742-
self._set_vocab_gpt2()
5745+
try:
5746+
self._set_vocab_gpt2()
5747+
return
5748+
except Exception:
5749+
pass
5750+
5751+
from transformers import AutoTokenizer
5752+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
5753+
tokpre = self.get_vocab_base_pre(tokenizer)
5754+
5755+
if tokpre == "kimi-k2":
5756+
# Build merges list using the approach similar to HunYuanMoE
5757+
merges = []
5758+
vocab = {}
5759+
mergeable_ranks = tokenizer.model._mergeable_ranks
5760+
for token, rank in mergeable_ranks.items():
5761+
vocab[QwenModel.token_bytes_to_string(token)] = rank
5762+
if len(token) == 1:
5763+
continue
5764+
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
5765+
if len(merged) == 2:
5766+
merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
5767+
5768+
# Build token list
5769+
vocab_size = self.hparams["vocab_size"]
5770+
special_tokens = tokenizer.special_tokens
5771+
reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()}
5772+
tokens: list[str] = []
5773+
toktypes: list[int] = []
5774+
5775+
for i in range(vocab_size):
5776+
if i not in reverse_vocab:
5777+
tokens.append(f"[PAD{i}]")
5778+
toktypes.append(gguf.TokenType.UNUSED)
5779+
else:
5780+
token = reverse_vocab[i]
5781+
tokens.append(token)
5782+
if i in special_tokens.values():
5783+
toktypes.append(gguf.TokenType.CONTROL)
5784+
else:
5785+
toktypes.append(gguf.TokenType.NORMAL)
5786+
5787+
self.gguf_writer.add_tokenizer_model("gpt2")
5788+
self.gguf_writer.add_tokenizer_pre(tokpre)
5789+
self.gguf_writer.add_token_list(tokens)
5790+
self.gguf_writer.add_token_types(toktypes)
5791+
self.gguf_writer.add_token_merges(merges)
5792+
5793+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
5794+
special_vocab.add_to_gguf(self.gguf_writer)
5795+
else:
5796+
raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!")
57435797

57445798
def set_gguf_parameters(self):
57455799

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ class TOKENIZER_TYPE(IntEnum):
146146
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-1B-Base", "chkhsh": "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86"},
147147
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-7B-Base", "chkhsh": "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896"},
148148
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"},
149+
{"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
149150
]
150151

151152

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{%- if tools -%}
2+
<|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
3+
{%- endif -%}
4+
{%- for message in messages -%}
5+
{%- if loop.first and messages[0]['role'] != 'system' -%}
6+
<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
7+
{%- endif -%}
8+
{%- if message['role'] == 'system' -%}
9+
<|im_system|>system<|im_middle|>
10+
{%- elif message['role'] == 'user' -%}
11+
<|im_user|>user<|im_middle|>
12+
{%- elif message['role'] == 'assistant' -%}
13+
<|im_assistant|>assistant<|im_middle|>
14+
{%- elif message['role'] == 'tool' -%}
15+
<|im_system|>tool<|im_middle|>
16+
{%- endif -%}
17+
{%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
18+
{%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
19+
<|tool_calls_section_begin|>
20+
{%- for tool_call in message['tool_calls'] -%}
21+
{%- set func_name = tool_call['function']['name'] -%}
22+
{%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
23+
<|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
24+
{%- endfor -%}
25+
<|tool_calls_section_end|>
26+
{%- elif message['role'] == 'tool' -%}
27+
## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
28+
{%- elif message['content'] is string -%}
29+
{{ message['content'] }}
30+
{%- elif message['content'] is not none -%}
31+
{% for content in message['content'] -%}
32+
{% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
33+
<|media_start|>image<|media_content|><|media_pad|><|media_end|>
34+
{% else -%}
35+
{{ content['text'] }}
36+
{%- endif -%}
37+
{%- endfor -%}
38+
{%- endif -%}
39+
<|im_end|>
40+
{%- endfor -%}
41+
{%- if add_generation_prompt -%}
42+
<|im_assistant|>assistant<|im_middle|>
43+
{%- endif -%}

src/llama-chat.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
6565
{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
6666
{ "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
6767
{ "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
68+
{ "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
6869
};
6970

7071
llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -188,6 +189,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
188189
return LLM_CHAT_TEMPLATE_DOTS1;
189190
} else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
190191
return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
192+
} else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
193+
return LLM_CHAT_TEMPLATE_KIMI_K2;
191194
}
192195
return LLM_CHAT_TEMPLATE_UNKNOWN;
193196
}
@@ -680,6 +683,26 @@ int32_t llm_chat_apply_template(
680683
ss << "<|startoftext|>" << message->content << "<|extra_0|>";
681684
}
682685
}
686+
} else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
687+
// moonshotai/Kimi-K2-Instruct
688+
for (auto message : chat) {
689+
std::string role(message->role);
690+
if (role == "system") {
691+
ss << "<|im_system|>system<|im_middle|>";
692+
} else if (role == "user") {
693+
ss << "<|im_user|>user<|im_middle|>";
694+
} else if (role == "assistant") {
695+
ss << "<|im_assistant|>assistant<|im_middle|>";
696+
} else if (role == "tool") {
697+
ss << "<|im_system|>tool<|im_middle|>";
698+
}
699+
700+
ss << message->content << "<|im_end|>";
701+
702+
if (add_ass) {
703+
ss << "<|im_assistant|>assistant<|im_middle|>";
704+
}
705+
}
683706
} else {
684707
// template not supported
685708
return -1;

src/llama-chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ enum llm_chat_template {
4545
LLM_CHAT_TEMPLATE_SMOLVLM,
4646
LLM_CHAT_TEMPLATE_DOTS1,
4747
LLM_CHAT_TEMPLATE_HUNYUAN_MOE,
48+
LLM_CHAT_TEMPLATE_KIMI_K2,
4849
LLM_CHAT_TEMPLATE_UNKNOWN,
4950
};
5051

src/llama-hparams.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
// bump if necessary
88
#define LLAMA_MAX_LAYERS 512
9-
#define LLAMA_MAX_EXPERTS 256 // DeepSeekV3
9+
#define LLAMA_MAX_EXPERTS 384 // Kimi-K2
1010

1111
enum llama_expert_gating_func_type {
1212
LLAMA_EXPERT_GATING_FUNC_TYPE_NONE = 0,

src/llama-vocab.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
405405
"[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
406406
};
407407
break;
408+
case LLAMA_VOCAB_PRE_TYPE_KIMI_K2:
409+
regex_exprs = {
410+
// K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp
411+
// The custom handler implements all K2 patterns with proper Han character exclusion
412+
"\\p{Han}+",
413+
};
414+
break;
408415
case LLAMA_VOCAB_PRE_TYPE_SUPERBPE:
409416
regex_exprs = {
410417
"\\p{N}+",
@@ -1954,6 +1961,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
19541961
tokenizer_pre == "hunyuan") {
19551962
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN;
19561963
clean_spaces = false;
1964+
} else if (
1965+
tokenizer_pre == "kimi-k2") {
1966+
pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2;
1967+
clean_spaces = false;
19571968
} else {
19581969
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
19591970
}

src/llama-vocab.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ enum llama_vocab_pre_type {
4545
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
4646
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
4747
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36,
48+
LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37,
4849
};
4950

5051
struct LLM_KV;

0 commit comments

Comments
 (0)