From 38d591031777b5fb29e1e67371a01002a066d5bf Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:35:45 -0500 Subject: [PATCH 01/21] Kimi-K2 conversion --- convert_hf_to_gguf.py | 48 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 8afb425b156f2..0a6b1daf932f7 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5563,7 +5563,53 @@ class DeepseekV2Model(TextModel): model_arch = gguf.MODEL_ARCH.DEEPSEEK2 def set_vocab(self): - self._set_vocab_gpt2() + if(self.hparams["vocab_size"]==163840): # Kimi-K2 model + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True) + tokpre = "kimi-k2" # TODO: add identifier hash + + # Build merges list using the approach similar to HunYuanMoE + merges = [] + vocab = {} + mergeable_ranks = tokenizer.model._mergeable_ranks + for token, rank in mergeable_ranks.items(): + vocab[QwenModel.token_bytes_to_string(token)] = rank + if len(token) == 1: + continue + merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank) + if len(merged) == 2: + merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged))) + + + # Build token list + vocab_size = self.hparams["vocab_size"] + special_tokens = tokenizer.special_tokens + reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()} + tokens: list[str] = [] + toktypes: list[int] = [] + + for i in range(tokenizer.vocab_size): + if i not in reverse_vocab: + tokens.append(f"[PAD{i}]") + toktypes.append(gguf.TokenType.UNUSED) + else: + token = reverse_vocab[i] + tokens.append(token) + if i in special_tokens.values(): + toktypes.append(gguf.TokenType.CONTROL) + else: + toktypes.append(gguf.TokenType.NORMAL) + + self.gguf_writer.add_tokenizer_model("gpt2") + self.gguf_writer.add_tokenizer_pre(tokpre) + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_types(toktypes) + self.gguf_writer.add_token_merges(merges) + + special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) + special_vocab.add_to_gguf(self.gguf_writer) + else: + self._set_vocab_gpt2() def set_gguf_parameters(self): From bf674c3663af63b09aa8e10ff18501aa08d1f4b9 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:37:34 -0500 Subject: [PATCH 02/21] add Kimi_K2 pre type --- src/llama-vocab.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llama-vocab.h b/src/llama-vocab.h index 46a1ccecb51fc..1ce8fd307e2d3 100644 --- a/src/llama-vocab.h +++ b/src/llama-vocab.h @@ -45,6 +45,7 @@ enum llama_vocab_pre_type { LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34, LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35, LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36, + LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37, }; struct LLM_KV; From d80d7fc1591bc5228418d8f3a26ebf52ec8842c4 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:39:30 -0500 Subject: [PATCH 03/21] Kimi-K2 --- src/llama-vocab.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index e0e578d6394d8..602e5a2743e6d 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -404,6 +404,13 @@ struct llm_tokenizer_bpe : llm_tokenizer { "[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", }; break; + case LLAMA_VOCAB_PRE_TYPE_KIMI_K2: + regex_exprs = { + // K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp + // The custom handler implements all K2 patterns with proper Han character exclusion + "\\p{Han}+", + }; + break; case LLAMA_VOCAB_PRE_TYPE_SUPERBPE: regex_exprs = { "\\p{N}+", @@ -1665,6 +1672,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "hunyuan") { pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN; clean_spaces = false; + } else if ( + tokenizer_pre == "kimi-k2") { + pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2; + clean_spaces = false; } else { throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); } From f8a643061157b2fc714d4c6c7487cebb440c6d9d Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:40:19 -0500 Subject: [PATCH 04/21] Kimi-K2 unicode --- src/unicode.cpp | 202 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) diff --git a/src/unicode.cpp b/src/unicode.cpp index 43a4581b961fe..78ae4bf88af1d 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -557,6 +557,173 @@ static std::vector unicode_regex_split_stl(const std::string & text, con return bpe_offsets; } +// K2 system regex patterns (from tokenization_kimi.py): +// [\p{Han}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+ +static std::vector unicode_regex_split_custom_kimi_k2(const std::string & text, const std::vector & offsets) { + std::vector bpe_offsets; + bpe_offsets.reserve(offsets.size()); + + const auto cpts = unicode_cpts_from_utf8(text); + + size_t start = 0; + for (auto offset : offsets) { + const size_t offset_ini = start; + const size_t offset_end = start + offset; + assert(offset_end <= cpts.size()); + start = offset_end; + + static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF; + auto _get_cpt = [&] (const size_t pos) -> uint32_t { + return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE; + }; + + auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags { + return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{}; + }; + + size_t _prev_end = offset_ini; + auto _add_token = [&] (const size_t end) -> size_t { + assert(_prev_end <= end && end <= offset_end); + size_t len = end - _prev_end; + if (len > 0) { + bpe_offsets.push_back(len); + } + _prev_end = end; + return len; + }; + + for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) { + const uint32_t cpt = _get_cpt(pos); + const auto flags = _get_flags(pos); + + // Pattern 1: [\p{Han}]+ (Chinese characters) + if (unicode_cpt_is_han(cpt)) { + while (unicode_cpt_is_han(_get_cpt(pos))) { + pos++; + } + _add_token(pos); + continue; + } + + // Pattern 2 & 3: Letter words excluding Han characters with optional contractions + // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)? + // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)? + if (flags.is_letter && !unicode_cpt_is_han(cpt)) { + // Handle optional leading non-letter/non-number character + bool has_leading_char = false; + if (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number)) { + has_leading_char = true; + pos++; + } + + // Match letter sequence (excluding Han characters) + bool has_letters = false; + while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) { + has_letters = true; + pos++; + } + + // Only proceed if we found letters (after potentially skipping leading char) + if (has_letters || (!has_leading_char && _get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos)))) { + if (!has_letters) pos++; // consume the first letter if we didn't already + + // Continue consuming letters + while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) { + pos++; + } + + // Check for optional contractions (?:'s|'t|'re|'ve|'m|'ll|'d) + if (_get_cpt(pos) == '\'' && pos + 1 < offset_end) { + uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1)); + if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') { + pos += 2; + } else if (pos + 2 < offset_end) { + uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos + 2)); + if ((cpt_next == 'r' && cpt_next_next == 'e') || + (cpt_next == 'v' && cpt_next_next == 'e') || + (cpt_next == 'l' && cpt_next_next == 'l')) { + pos += 3; + } + } + } + + _add_token(pos); + continue; + } else if (has_leading_char) { + // We consumed a leading char but found no letters, backtrack + pos--; + } + } + + // Pattern 4: \p{N}{1,3} (numbers 1-3 digits) + if (flags.is_number) { + size_t ini = pos; + while (_get_flags(pos).is_number) { + if (++pos - ini >= 3) { + _add_token(pos); + ini = pos; + } + } + _add_token(pos); + continue; + } + + // Pattern 5: ?[^\s\p{L}\p{N}]+[\r\n]* (optional space + non-word chars + optional newlines) + auto flags2 = (cpt == ' ' ? _get_flags(pos + 1) : flags); + if (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) { + pos += (cpt == ' '); + while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) { + flags2 = _get_flags(++pos); + } + // Match optional [\r\n]* + uint32_t cpt2 = _get_cpt(pos); + while (cpt2 == '\r' || cpt2 == '\n') { + cpt2 = _get_cpt(++pos); + } + _add_token(pos); + continue; + } + + // Count whitespace characters + size_t num_whitespaces = 0; + size_t last_end_r_or_n = 0; + while (_get_flags(pos + num_whitespaces).is_whitespace) { + uint32_t cpt2 = _get_cpt(pos + num_whitespaces); + if (cpt2 == '\r' || cpt2 == '\n') { + last_end_r_or_n = pos + num_whitespaces + 1; + } + num_whitespaces++; + } + + // Pattern 6: \s*[\r\n]+ (whitespace with newlines) + if (last_end_r_or_n > 0) { + pos = last_end_r_or_n; + _add_token(pos); + continue; + } + + // Pattern 7: \s+(?!\S) (trailing whitespace) + if (num_whitespaces > 1 && _get_cpt(pos + num_whitespaces) != OUT_OF_RANGE) { + pos += num_whitespaces - 1; + _add_token(pos); + continue; + } + + // Pattern 8: \s+ (general whitespace) + if (num_whitespaces > 0) { + pos += num_whitespaces; + _add_token(pos); + continue; + } + + // No matches - consume single character + _add_token(++pos); + } + } + + return bpe_offsets; +} + static std::vector unicode_regex_split_custom(const std::string & text, const std::string & regex_expr, const std::vector & offsets) { std::vector bpe_offsets; @@ -567,6 +734,9 @@ static std::vector unicode_regex_split_custom(const std::string & text, regex_expr == "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+") { bpe_offsets = unicode_regex_split_custom_llama3(text, offsets); + } else if (regex_expr == "\\p{Han}+") { + // K2's first pattern - handle all K2 patterns together + bpe_offsets = unicode_regex_split_custom_kimi_k2(text, offsets); } return bpe_offsets; @@ -672,6 +842,38 @@ uint32_t unicode_tolower(uint32_t cpt) { return cpt; // Return the original code point if no lowercase mapping is found } +bool unicode_cpt_is_han(uint32_t cpt) { + // Han character ranges (Chinese/CJK characters) + // CJK Unified Ideographs (most common) + if (cpt >= 0x4E00 && cpt <= 0x9FFF) return true; + + // CJK Extension A + if (cpt >= 0x3400 && cpt <= 0x4DBF) return true; + + // CJK Extension B + if (cpt >= 0x20000 && cpt <= 0x2A6DF) return true; + + // CJK Extension C + if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true; + + // CJK Extension D + if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true; + + // CJK Extension E + if (cpt >= 0x2B820 && cpt <= 0x2CEAF) return true; + + // CJK Extension F + if (cpt >= 0x2CEB0 && cpt <= 0x2EBEF) return true; + + // CJK Compatibility Ideographs + if (cpt >= 0xF900 && cpt <= 0xFAFF) return true; + + // CJK Compatibility Ideographs Supplement + if (cpt >= 0x2F800 && cpt <= 0x2FA1F) return true; + + return false; +} + std::vector unicode_regex_split(const std::string & text, const std::vector & regex_exprs) { // unicode categories static const std::map k_ucat_enum = { From 7e5d0ee35faf951d70f28bdc2693520f634321be Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:40:43 -0500 Subject: [PATCH 05/21] Kimi-K2 --- src/unicode.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/unicode.h b/src/unicode.h index c27098df7d4be..0a5fa2a78ceff 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -63,4 +63,6 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8); uint32_t unicode_tolower(uint32_t cpt); +bool unicode_cpt_is_han(uint32_t cpt); + std::vector unicode_regex_split(const std::string & text, const std::vector & regex_exprs); From 5d10e3ad0b1b83e8fbc2a7812e95a848ca646a23 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Sat, 12 Jul 2025 13:59:26 -0500 Subject: [PATCH 06/21] LLAMA_MAX_EXPERTS 384 --- src/llama-hparams.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama-hparams.h b/src/llama-hparams.h index d0500e4d0fd77..9116a3743c993 100644 --- a/src/llama-hparams.h +++ b/src/llama-hparams.h @@ -6,7 +6,7 @@ // bump if necessary #define LLAMA_MAX_LAYERS 512 -#define LLAMA_MAX_EXPERTS 256 // DeepSeekV3 +#define LLAMA_MAX_EXPERTS 384 // Kimi-K2 enum llama_expert_gating_func_type { LLAMA_EXPERT_GATING_FUNC_TYPE_NONE = 0, From 30c58409caf1fa926d503557b923271a5b53e9d0 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Sun, 13 Jul 2025 12:09:03 -0500 Subject: [PATCH 07/21] fix vocab iteration --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 0a6b1daf932f7..807e306888991 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5588,7 +5588,7 @@ def set_vocab(self): tokens: list[str] = [] toktypes: list[int] = [] - for i in range(tokenizer.vocab_size): + for i in range(vocab_size): if i not in reverse_vocab: tokens.append(f"[PAD{i}]") toktypes.append(gguf.TokenType.UNUSED) From 273ea092b12a55ebafaf9bd4b31cbeee29325ee9 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Sun, 13 Jul 2025 19:08:06 -0500 Subject: [PATCH 08/21] regex space fix --- src/unicode.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/unicode.cpp b/src/unicode.cpp index 78ae4bf88af1d..bf56d6be2e068 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -608,7 +608,12 @@ static std::vector unicode_regex_split_custom_kimi_k2(const std::string // Pattern 2 & 3: Letter words excluding Han characters with optional contractions // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)? // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)? - if (flags.is_letter && !unicode_cpt_is_han(cpt)) { + // Check if current char is a letter OR if current char could be a leading char and next char is a letter + bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) || + (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) && + _get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1))); + + if (is_letter_pattern) { // Handle optional leading non-letter/non-number character bool has_leading_char = false; if (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number)) { From 4def0639a429bacc0c3aaced25852abdc25aa3e8 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Mon, 14 Jul 2025 08:55:29 -0500 Subject: [PATCH 09/21] add kimi-k2 to pre_computed_hashes --- convert_hf_to_gguf_update.py | 1 + 1 file changed, 1 insertion(+) diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 16f4acfe7834f..c091273c0d724 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -146,6 +146,7 @@ class TOKENIZER_TYPE(IntEnum): {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-1B-Base", "chkhsh": "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86"}, {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-7B-Base", "chkhsh": "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896"}, {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"}, + {"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"}, ] From b023e530b061a74610cfd4f8a9e9c656b17cf6dd Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Mon, 14 Jul 2025 08:57:38 -0500 Subject: [PATCH 10/21] Updated with kimi-k2 get_vocab_base_pre hash --- convert_hf_to_gguf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 807e306888991..cf5566993ee8c 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -840,6 +840,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51": # ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer res = "lfm2" + if chkhsh == "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890": + # ref: https://huggingface.co/moonshotai/Kimi-K2-Base + res = "kimi-k2" if res is None: logger.warning("\n") @@ -5566,7 +5569,7 @@ def set_vocab(self): if(self.hparams["vocab_size"]==163840): # Kimi-K2 model from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True) - tokpre = "kimi-k2" # TODO: add identifier hash + tokpre = self.get_vocab_base_pre(tokenizer) # Build merges list using the approach similar to HunYuanMoE merges = [] From ecb345ff26ddc21a08722c3c2665121cf2313a00 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:48:34 -0500 Subject: [PATCH 11/21] fix whitespaces --- src/unicode.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/unicode.cpp b/src/unicode.cpp index bf56d6be2e068..375c9f9b694ee 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -557,7 +557,7 @@ static std::vector unicode_regex_split_stl(const std::string & text, con return bpe_offsets; } -// K2 system regex patterns (from tokenization_kimi.py): +// K2 system regex patterns (from tokenization_kimi.py): // [\p{Han}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+ static std::vector unicode_regex_split_custom_kimi_k2(const std::string & text, const std::vector & offsets) { std::vector bpe_offsets; @@ -610,7 +610,7 @@ static std::vector unicode_regex_split_custom_kimi_k2(const std::string // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)? // Check if current char is a letter OR if current char could be a leading char and next char is a letter bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) || - (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) && + (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) && _get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1))); if (is_letter_pattern) { @@ -861,7 +861,7 @@ bool unicode_cpt_is_han(uint32_t cpt) { // CJK Extension C if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true; - // CJK Extension D + // CJK Extension D if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true; // CJK Extension E From 6fda4fe0d8511978218dd00de280b6623905d271 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:35:45 -0500 Subject: [PATCH 12/21] fix flake errors --- convert_hf_to_gguf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index cf5566993ee8c..ecfc2236336e1 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5582,7 +5582,6 @@ def set_vocab(self): merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank) if len(merged) == 2: merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged))) - # Build token list vocab_size = self.hparams["vocab_size"] @@ -5590,7 +5589,7 @@ def set_vocab(self): reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()} tokens: list[str] = [] toktypes: list[int] = [] - + for i in range(vocab_size): if i not in reverse_vocab: tokens.append(f"[PAD{i}]") @@ -5602,13 +5601,13 @@ def set_vocab(self): toktypes.append(gguf.TokenType.CONTROL) else: toktypes.append(gguf.TokenType.NORMAL) - + self.gguf_writer.add_tokenizer_model("gpt2") self.gguf_writer.add_tokenizer_pre(tokpre) self.gguf_writer.add_token_list(tokens) self.gguf_writer.add_token_types(toktypes) self.gguf_writer.add_token_merges(merges) - + special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) special_vocab.add_to_gguf(self.gguf_writer) else: From 29c153243992700f7ad5d6d40230e1be13ca5045 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:45:59 -0500 Subject: [PATCH 13/21] remove more unicode.cpp whitespaces --- src/unicode.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/unicode.cpp b/src/unicode.cpp index 375c9f9b694ee..65f3665171582 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -612,7 +612,7 @@ static std::vector unicode_regex_split_custom_kimi_k2(const std::string bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) || (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) && _get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1))); - + if (is_letter_pattern) { // Handle optional leading non-letter/non-number character bool has_leading_char = false; @@ -620,23 +620,23 @@ static std::vector unicode_regex_split_custom_kimi_k2(const std::string has_leading_char = true; pos++; } - + // Match letter sequence (excluding Han characters) bool has_letters = false; while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) { has_letters = true; pos++; } - + // Only proceed if we found letters (after potentially skipping leading char) if (has_letters || (!has_leading_char && _get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos)))) { if (!has_letters) pos++; // consume the first letter if we didn't already - + // Continue consuming letters while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) { pos++; } - + // Check for optional contractions (?:'s|'t|'re|'ve|'m|'ll|'d) if (_get_cpt(pos) == '\'' && pos + 1 < offset_end) { uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1)); @@ -651,7 +651,7 @@ static std::vector unicode_regex_split_custom_kimi_k2(const std::string } } } - + _add_token(pos); continue; } else if (has_leading_char) { @@ -851,31 +851,31 @@ bool unicode_cpt_is_han(uint32_t cpt) { // Han character ranges (Chinese/CJK characters) // CJK Unified Ideographs (most common) if (cpt >= 0x4E00 && cpt <= 0x9FFF) return true; - + // CJK Extension A if (cpt >= 0x3400 && cpt <= 0x4DBF) return true; - + // CJK Extension B if (cpt >= 0x20000 && cpt <= 0x2A6DF) return true; - + // CJK Extension C if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true; - + // CJK Extension D if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true; - + // CJK Extension E if (cpt >= 0x2B820 && cpt <= 0x2CEAF) return true; - + // CJK Extension F if (cpt >= 0x2CEB0 && cpt <= 0x2EBEF) return true; - + // CJK Compatibility Ideographs if (cpt >= 0xF900 && cpt <= 0xFAFF) return true; - + // CJK Compatibility Ideographs Supplement if (cpt >= 0x2F800 && cpt <= 0x2FA1F) return true; - + return false; } From 6ef6aa1cd341a0dd1fb12630b20f353d7dc3fc9c Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Mon, 14 Jul 2025 18:06:01 -0500 Subject: [PATCH 14/21] change set_vocab() flow --- convert_hf_to_gguf.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ecfc2236336e1..b66eac85d2e2c 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5566,11 +5566,17 @@ class DeepseekV2Model(TextModel): model_arch = gguf.MODEL_ARCH.DEEPSEEK2 def set_vocab(self): - if(self.hparams["vocab_size"]==163840): # Kimi-K2 model - from transformers import AutoTokenizer - tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True) - tokpre = self.get_vocab_base_pre(tokenizer) + try: + self._set_vocab_gpt2() + return + except: + pass + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True) + tokpre = self.get_vocab_base_pre(tokenizer) + + if tokpre == "kimi-k2": # Build merges list using the approach similar to HunYuanMoE merges = [] vocab = {} @@ -5611,7 +5617,7 @@ def set_vocab(self): special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) special_vocab.add_to_gguf(self.gguf_writer) else: - self._set_vocab_gpt2() + raise NotImplementedError(f"{self.dir_model} is not supported yet!") def set_gguf_parameters(self): From 02f4a631678bc435c5cea7a54ab4222ee32e8453 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Tue, 15 Jul 2025 00:31:04 -0500 Subject: [PATCH 15/21] add moonshotai-Kimi-K2.jinja to /models/templates/ --- models/templates/moonshotai-Kimi-K2.jinja | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 models/templates/moonshotai-Kimi-K2.jinja diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja new file mode 100644 index 0000000000000..be3627b0a4778 --- /dev/null +++ b/models/templates/moonshotai-Kimi-K2.jinja @@ -0,0 +1,37 @@ +{% if tools -%} + {{ '<|im_system|>tool_declare<|im_middle|>' -}} + {{- tools | tojson -}} + {{ '<|im_end|>' -}} +{%- endif -%} + +{%- for message in messages -%} + {%- if loop.first and messages[0]['role'] != 'system' -%} + {{ '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>' }} + {%- endif -%} + {%- if message['role'] == 'system' -%} + {{ '<|im_system|>system<|im_middle|>' }} + {%- elif message['role'] == 'user' -%} + {{ '<|im_user|>user<|im_middle|>' }} + {%- elif message['role'] == 'assistant' -%} + {{ '<|im_assistant|>assistant<|im_middle|>' }} + {%- elif message['role'] == 'tool' -%} + {{ '<|im_system|>tool<|im_middle|>' }} + {%- endif -%} + + {%- if message['content'] is string -%} + {{- message['content'] + '<|im_end|>' -}} + {%- else -%} + {%- for content in message['content'] -%} + {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} + {{ '<|media_start|>image<|media_content|><|media_pad|><|media_end|>' }} + {%- else -%} + {{ content['text'] }} + {%- endif -%} + {%- endfor -%} + {{ '<|im_end|>' }} + {%- endif -%} +{%- endfor -%} + +{%- if add_generation_prompt -%} + {{ '<|im_assistant|>assistant<|im_middle|>' }} +{%- endif -%} \ No newline at end of file From 5a730aedf7f485ba44f71030aa9ec72cc86ca120 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Tue, 15 Jul 2025 10:07:26 -0500 Subject: [PATCH 16/21] update moonshotai-Kimi-K2.jinja --- models/templates/moonshotai-Kimi-K2.jinja | 50 +++++++++++++---------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja index be3627b0a4778..ecb49a210852c 100644 --- a/models/templates/moonshotai-Kimi-K2.jinja +++ b/models/templates/moonshotai-Kimi-K2.jinja @@ -1,37 +1,43 @@ -{% if tools -%} - {{ '<|im_system|>tool_declare<|im_middle|>' -}} - {{- tools | tojson -}} - {{ '<|im_end|>' -}} +{%- if tools -%} + <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|> {%- endif -%} - {%- for message in messages -%} {%- if loop.first and messages[0]['role'] != 'system' -%} - {{ '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>' }} + <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|> {%- endif -%} {%- if message['role'] == 'system' -%} - {{ '<|im_system|>system<|im_middle|>' }} + <|im_system|>system<|im_middle|> {%- elif message['role'] == 'user' -%} - {{ '<|im_user|>user<|im_middle|>' }} + <|im_user|>user<|im_middle|> {%- elif message['role'] == 'assistant' -%} - {{ '<|im_assistant|>assistant<|im_middle|>' }} + <|im_assistant|>assistant<|im_middle|> {%- elif message['role'] == 'tool' -%} - {{ '<|im_system|>tool<|im_middle|>' }} + <|im_system|>tool<|im_middle|> {%- endif -%} - - {%- if message['content'] is string -%} - {{- message['content'] + '<|im_end|>' -}} - {%- else -%} - {%- for content in message['content'] -%} - {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} - {{ '<|media_start|>image<|media_content|><|media_pad|><|media_end|>' }} - {%- else -%} + {%- if message['role'] == 'assistant' and message.get('tool_calls') -%} + {%- if message['content'] -%}{{ message['content'] }}{%- endif -%} + <|tool_calls_section_begin|> + {%- for tool_call in message['tool_calls'] -%} + {%- set func_name = tool_call['function']['name'] -%} + {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%} + <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|> + {%- endfor -%} + <|tool_calls_section_end|> + {%- elif message['role'] == 'tool' -%} + ## Return of {{ message.tool_call_id }}\n{{ message['content'] }} + {%- elif message['content'] is string -%} + {{ message['content'] }} + {%- elif message['content'] is not none -%} + {% for content in message['content'] -%} + {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} + <|media_start|>image<|media_content|><|media_pad|><|media_end|> + {% else -%} {{ content['text'] }} {%- endif -%} {%- endfor -%} - {{ '<|im_end|>' }} {%- endif -%} + <|im_end|> {%- endfor -%} - {%- if add_generation_prompt -%} - {{ '<|im_assistant|>assistant<|im_middle|>' }} -{%- endif -%} \ No newline at end of file + <|im_assistant|>assistant<|im_middle|> +{%- endif -%} From 73dcb44312074b40b8595f2623e2377c0e060156 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:21:35 -0500 Subject: [PATCH 17/21] add kimi-k2 chat template --- src/llama-chat.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index cbc19d3c40c30..d824392964f28 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -65,6 +65,7 @@ static const std::map LLM_CHAT_TEMPLATES = { { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 }, { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM }, { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE }, + { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 }, }; llm_chat_template llm_chat_template_from_str(const std::string & name) { @@ -188,6 +189,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { return LLM_CHAT_TEMPLATE_DOTS1; } else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) { return LLM_CHAT_TEMPLATE_HUNYUAN_MOE; + } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) { + return LLM_CHAT_TEMPLATE_KIMI_K2; } return LLM_CHAT_TEMPLATE_UNKNOWN; } @@ -680,6 +683,22 @@ int32_t llm_chat_apply_template( ss << "<|startoftext|>" << message->content << "<|extra_0|>"; } } + } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) { + // moonshotai/Kimi-K2-Instruct + for (auto message : chat) { + std::string role(message->role); + if (role == "system") { + ss << "<|im_system|>system<|im_middle|>"; + } else if (role == "user") { + ss << "<|im_user|>user<|im_middle|>"; + } else if (role == "assistant") { + ss << "<|im_assistant|>assistant<|im_middle|>"; + } else if (role == "tool") { + ss << "<|im_system|>tool<|im_middle|>"; + } + + ss << message->content << "<|im_end|>"; + } } else { // template not supported return -1; From d2211e01cf612f4726e8d192b462f6629c6550c7 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:22:58 -0500 Subject: [PATCH 18/21] add kimi-k2 --- src/llama-chat.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llama-chat.h b/src/llama-chat.h index b621fda281669..cab0533485652 100644 --- a/src/llama-chat.h +++ b/src/llama-chat.h @@ -45,6 +45,7 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_SMOLVLM, LLM_CHAT_TEMPLATE_DOTS1, LLM_CHAT_TEMPLATE_HUNYUAN_MOE, + LLM_CHAT_TEMPLATE_KIMI_K2, LLM_CHAT_TEMPLATE_UNKNOWN, }; From a57870df6963650ea2252bc6041bcb0828e5e4ae Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:55:27 -0500 Subject: [PATCH 19/21] update NotImplementedError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sigbjørn Skjæret --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index b66eac85d2e2c..15b0e016751b8 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5617,7 +5617,7 @@ def set_vocab(self): special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) special_vocab.add_to_gguf(self.gguf_writer) else: - raise NotImplementedError(f"{self.dir_model} is not supported yet!") + raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!") def set_gguf_parameters(self): From 0a5704b59d8d3b4846ac4c4a9997cc19de9e06d8 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:59:28 -0500 Subject: [PATCH 20/21] except Exception MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sigbjørn Skjæret --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 15b0e016751b8..9b929bf70964d 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5569,7 +5569,7 @@ def set_vocab(self): try: self._set_vocab_gpt2() return - except: + except Exception: pass from transformers import AutoTokenizer From 459df19b89b06b19613daad00ef37f9e49a96837 Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Tue, 15 Jul 2025 13:11:31 -0500 Subject: [PATCH 21/21] LLM_CHAT_TEMPLATE_KIMI_K2 if(add_ass){} --- src/llama-chat.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index d824392964f28..11359903078e2 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -698,6 +698,10 @@ int32_t llm_chat_apply_template( } ss << message->content << "<|im_end|>"; + + if (add_ass) { + ss << "<|im_assistant|>assistant<|im_middle|>"; + } } } else { // template not supported