From 38d591031777b5fb29e1e67371a01002a066d5bf Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Sat, 12 Jul 2025 13:35:45 -0500
Subject: [PATCH 01/21] Kimi-K2 conversion

---
 convert_hf_to_gguf.py | 48 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 8afb425b156f2..0a6b1daf932f7 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -5563,7 +5563,53 @@ class DeepseekV2Model(TextModel):
     model_arch = gguf.MODEL_ARCH.DEEPSEEK2
 
     def set_vocab(self):
-        self._set_vocab_gpt2()
+        if(self.hparams["vocab_size"]==163840): # Kimi-K2 model
+            from transformers import AutoTokenizer
+            tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
+            tokpre = "kimi-k2" # TODO: add identifier hash
+
+            # Build merges list using the approach similar to HunYuanMoE
+            merges = []
+            vocab = {}
+            mergeable_ranks = tokenizer.model._mergeable_ranks
+            for token, rank in mergeable_ranks.items():
+                vocab[QwenModel.token_bytes_to_string(token)] = rank
+                if len(token) == 1:
+                    continue
+                merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
+                if len(merged) == 2:
+                    merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
+            
+
+            # Build token list
+            vocab_size = self.hparams["vocab_size"]
+            special_tokens = tokenizer.special_tokens
+            reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()}
+            tokens: list[str] = []
+            toktypes: list[int] = []
+            
+            for i in range(tokenizer.vocab_size):
+                if i not in reverse_vocab:
+                    tokens.append(f"[PAD{i}]")
+                    toktypes.append(gguf.TokenType.UNUSED)
+                else:
+                    token = reverse_vocab[i]
+                    tokens.append(token)
+                    if i in special_tokens.values():
+                        toktypes.append(gguf.TokenType.CONTROL)
+                    else:
+                        toktypes.append(gguf.TokenType.NORMAL)
+            
+            self.gguf_writer.add_tokenizer_model("gpt2")
+            self.gguf_writer.add_tokenizer_pre(tokpre)
+            self.gguf_writer.add_token_list(tokens)
+            self.gguf_writer.add_token_types(toktypes)
+            self.gguf_writer.add_token_merges(merges)
+            
+            special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
+            special_vocab.add_to_gguf(self.gguf_writer)
+        else:
+            self._set_vocab_gpt2()
 
     def set_gguf_parameters(self):
 

From bf674c3663af63b09aa8e10ff18501aa08d1f4b9 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Sat, 12 Jul 2025 13:37:34 -0500
Subject: [PATCH 02/21] add Kimi_K2  pre type

---
 src/llama-vocab.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 46a1ccecb51fc..1ce8fd307e2d3 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -45,6 +45,7 @@ enum llama_vocab_pre_type {
     LLAMA_VOCAB_PRE_TYPE_PIXTRAL        = 34,
     LLAMA_VOCAB_PRE_TYPE_SEED_CODER     = 35,
     LLAMA_VOCAB_PRE_TYPE_HUNYUAN        = 36,
+    LLAMA_VOCAB_PRE_TYPE_KIMI_K2        = 37,
 };
 
 struct LLM_KV;

From d80d7fc1591bc5228418d8f3a26ebf52ec8842c4 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Sat, 12 Jul 2025 13:39:30 -0500
Subject: [PATCH 03/21] Kimi-K2

---
 src/llama-vocab.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index e0e578d6394d8..602e5a2743e6d 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -404,6 +404,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
                     "[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
                 };
                 break;
+            case LLAMA_VOCAB_PRE_TYPE_KIMI_K2:
+                regex_exprs = {
+                    // K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp
+                    // The custom handler implements all K2 patterns with proper Han character exclusion
+                    "\\p{Han}+",
+                };
+                break;
             case LLAMA_VOCAB_PRE_TYPE_SUPERBPE:
                 regex_exprs = {
                     "\\p{N}+",
@@ -1665,6 +1672,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                 tokenizer_pre == "hunyuan") {
                 pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN;
                 clean_spaces = false;
+            } else if (
+                tokenizer_pre == "kimi-k2") {
+                pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2;
+                clean_spaces = false;
             } else {
                 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
             }

From f8a643061157b2fc714d4c6c7487cebb440c6d9d Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Sat, 12 Jul 2025 13:40:19 -0500
Subject: [PATCH 04/21] Kimi-K2 unicode

---
 src/unicode.cpp | 202 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 202 insertions(+)

diff --git a/src/unicode.cpp b/src/unicode.cpp
index 43a4581b961fe..78ae4bf88af1d 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -557,6 +557,173 @@ static std::vector<size_t> unicode_regex_split_stl(const std::string & text, con
     return bpe_offsets;
 }
 
+// K2 system regex patterns (from tokenization_kimi.py): 
+// [\p{Han}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+
+static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string & text, const std::vector<size_t> & offsets) {
+    std::vector<size_t> bpe_offsets;
+    bpe_offsets.reserve(offsets.size());
+
+    const auto cpts = unicode_cpts_from_utf8(text);
+
+    size_t start = 0;
+    for (auto offset : offsets) {
+        const size_t offset_ini = start;
+        const size_t offset_end = start + offset;
+        assert(offset_end <= cpts.size());
+        start = offset_end;
+
+        static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF;
+        auto _get_cpt = [&] (const size_t pos) -> uint32_t {
+            return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE;
+        };
+
+        auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags {
+            return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{};
+        };
+
+        size_t _prev_end = offset_ini;
+        auto _add_token = [&] (const size_t end) -> size_t {
+            assert(_prev_end <= end && end <= offset_end);
+            size_t len = end - _prev_end;
+            if (len > 0) {
+                bpe_offsets.push_back(len);
+            }
+            _prev_end = end;
+            return len;
+        };
+
+        for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) {
+            const uint32_t cpt = _get_cpt(pos);
+            const auto flags = _get_flags(pos);
+
+            // Pattern 1: [\p{Han}]+ (Chinese characters)
+            if (unicode_cpt_is_han(cpt)) {
+                while (unicode_cpt_is_han(_get_cpt(pos))) {
+                    pos++;
+                }
+                _add_token(pos);
+                continue;
+            }
+
+            // Pattern 2 & 3: Letter words excluding Han characters with optional contractions
+            // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)?
+            // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)?
+            if (flags.is_letter && !unicode_cpt_is_han(cpt)) {
+                // Handle optional leading non-letter/non-number character
+                bool has_leading_char = false;
+                if (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number)) {
+                    has_leading_char = true;
+                    pos++;
+                }
+                
+                // Match letter sequence (excluding Han characters)
+                bool has_letters = false;
+                while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
+                    has_letters = true;
+                    pos++;
+                }
+                
+                // Only proceed if we found letters (after potentially skipping leading char)
+                if (has_letters || (!has_leading_char && _get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos)))) {
+                    if (!has_letters) pos++; // consume the first letter if we didn't already
+                    
+                    // Continue consuming letters
+                    while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
+                        pos++;
+                    }
+                    
+                    // Check for optional contractions (?:'s|'t|'re|'ve|'m|'ll|'d)
+                    if (_get_cpt(pos) == '\'' && pos + 1 < offset_end) {
+                        uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1));
+                        if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') {
+                            pos += 2;
+                        } else if (pos + 2 < offset_end) {
+                            uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos + 2));
+                            if ((cpt_next == 'r' && cpt_next_next == 'e') ||
+                                (cpt_next == 'v' && cpt_next_next == 'e') ||
+                                (cpt_next == 'l' && cpt_next_next == 'l')) {
+                                pos += 3;
+                            }
+                        }
+                    }
+                    
+                    _add_token(pos);
+                    continue;
+                } else if (has_leading_char) {
+                    // We consumed a leading char but found no letters, backtrack
+                    pos--;
+                }
+            }
+
+            // Pattern 4: \p{N}{1,3} (numbers 1-3 digits)
+            if (flags.is_number) {
+                size_t ini = pos;
+                while (_get_flags(pos).is_number) {
+                    if (++pos - ini >= 3) {
+                        _add_token(pos);
+                        ini = pos;
+                    }
+                }
+                _add_token(pos);
+                continue;
+            }
+
+            // Pattern 5:  ?[^\s\p{L}\p{N}]+[\r\n]* (optional space + non-word chars + optional newlines)
+            auto flags2 = (cpt == ' ' ? _get_flags(pos + 1) : flags);
+            if (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) {
+                pos += (cpt == ' ');
+                while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) {
+                    flags2 = _get_flags(++pos);
+                }
+                // Match optional [\r\n]*
+                uint32_t cpt2 = _get_cpt(pos);
+                while (cpt2 == '\r' || cpt2 == '\n') {
+                    cpt2 = _get_cpt(++pos);
+                }
+                _add_token(pos);
+                continue;
+            }
+
+            // Count whitespace characters
+            size_t num_whitespaces = 0;
+            size_t last_end_r_or_n = 0;
+            while (_get_flags(pos + num_whitespaces).is_whitespace) {
+                uint32_t cpt2 = _get_cpt(pos + num_whitespaces);
+                if (cpt2 == '\r' || cpt2 == '\n') {
+                    last_end_r_or_n = pos + num_whitespaces + 1;
+                }
+                num_whitespaces++;
+            }
+
+            // Pattern 6: \s*[\r\n]+ (whitespace with newlines)
+            if (last_end_r_or_n > 0) {
+                pos = last_end_r_or_n;
+                _add_token(pos);
+                continue;
+            }
+
+            // Pattern 7: \s+(?!\S) (trailing whitespace)
+            if (num_whitespaces > 1 && _get_cpt(pos + num_whitespaces) != OUT_OF_RANGE) {
+                pos += num_whitespaces - 1;
+                _add_token(pos);
+                continue;
+            }
+
+            // Pattern 8: \s+ (general whitespace)
+            if (num_whitespaces > 0) {
+                pos += num_whitespaces;
+                _add_token(pos);
+                continue;
+            }
+
+            // No matches - consume single character
+            _add_token(++pos);
+        }
+    }
+
+    return bpe_offsets;
+}
+
 static std::vector<size_t> unicode_regex_split_custom(const std::string & text, const std::string & regex_expr, const std::vector<size_t> & offsets) {
     std::vector<size_t> bpe_offsets;
 
@@ -567,6 +734,9 @@ static std::vector<size_t> unicode_regex_split_custom(const std::string & text,
             regex_expr == "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+") {
 
         bpe_offsets = unicode_regex_split_custom_llama3(text, offsets);
+    } else if (regex_expr == "\\p{Han}+") {
+        // K2's first pattern - handle all K2 patterns together
+        bpe_offsets = unicode_regex_split_custom_kimi_k2(text, offsets);
     }
 
     return bpe_offsets;
@@ -672,6 +842,38 @@ uint32_t unicode_tolower(uint32_t cpt) {
     return cpt;  // Return the original code point if no lowercase mapping is found
 }
 
+bool unicode_cpt_is_han(uint32_t cpt) {
+    // Han character ranges (Chinese/CJK characters)
+    // CJK Unified Ideographs (most common)
+    if (cpt >= 0x4E00 && cpt <= 0x9FFF) return true;
+    
+    // CJK Extension A
+    if (cpt >= 0x3400 && cpt <= 0x4DBF) return true;
+    
+    // CJK Extension B
+    if (cpt >= 0x20000 && cpt <= 0x2A6DF) return true;
+    
+    // CJK Extension C
+    if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true;
+    
+    // CJK Extension D  
+    if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true;
+    
+    // CJK Extension E
+    if (cpt >= 0x2B820 && cpt <= 0x2CEAF) return true;
+    
+    // CJK Extension F
+    if (cpt >= 0x2CEB0 && cpt <= 0x2EBEF) return true;
+    
+    // CJK Compatibility Ideographs
+    if (cpt >= 0xF900 && cpt <= 0xFAFF) return true;
+    
+    // CJK Compatibility Ideographs Supplement
+    if (cpt >= 0x2F800 && cpt <= 0x2FA1F) return true;
+    
+    return false;
+}
+
 std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs) {
     // unicode categories
     static const std::map<std::string, int> k_ucat_enum = {

From 7e5d0ee35faf951d70f28bdc2693520f634321be Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Sat, 12 Jul 2025 13:40:43 -0500
Subject: [PATCH 05/21] Kimi-K2

---
 src/unicode.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/unicode.h b/src/unicode.h
index c27098df7d4be..0a5fa2a78ceff 100644
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -63,4 +63,6 @@ uint8_t     unicode_utf8_to_byte(const std::string & utf8);
 
 uint32_t unicode_tolower(uint32_t cpt);
 
+bool unicode_cpt_is_han(uint32_t cpt);
+
 std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);

From 5d10e3ad0b1b83e8fbc2a7812e95a848ca646a23 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Sat, 12 Jul 2025 13:59:26 -0500
Subject: [PATCH 06/21] LLAMA_MAX_EXPERTS 384

---
 src/llama-hparams.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama-hparams.h b/src/llama-hparams.h
index d0500e4d0fd77..9116a3743c993 100644
--- a/src/llama-hparams.h
+++ b/src/llama-hparams.h
@@ -6,7 +6,7 @@
 
 // bump if necessary
 #define LLAMA_MAX_LAYERS  512
-#define LLAMA_MAX_EXPERTS 256  // DeepSeekV3
+#define LLAMA_MAX_EXPERTS 384  // Kimi-K2
 
 enum llama_expert_gating_func_type {
     LLAMA_EXPERT_GATING_FUNC_TYPE_NONE    = 0,

From 30c58409caf1fa926d503557b923271a5b53e9d0 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Sun, 13 Jul 2025 12:09:03 -0500
Subject: [PATCH 07/21] fix vocab iteration

---
 convert_hf_to_gguf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 0a6b1daf932f7..807e306888991 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -5588,7 +5588,7 @@ def set_vocab(self):
             tokens: list[str] = []
             toktypes: list[int] = []
             
-            for i in range(tokenizer.vocab_size):
+            for i in range(vocab_size):
                 if i not in reverse_vocab:
                     tokens.append(f"[PAD{i}]")
                     toktypes.append(gguf.TokenType.UNUSED)

From 273ea092b12a55ebafaf9bd4b31cbeee29325ee9 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Sun, 13 Jul 2025 19:08:06 -0500
Subject: [PATCH 08/21] regex space fix

---
 src/unicode.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/unicode.cpp b/src/unicode.cpp
index 78ae4bf88af1d..bf56d6be2e068 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -608,7 +608,12 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
             // Pattern 2 & 3: Letter words excluding Han characters with optional contractions
             // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)?
             // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)?
-            if (flags.is_letter && !unicode_cpt_is_han(cpt)) {
+            // Check if current char is a letter OR if current char could be a leading char and next char is a letter
+            bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) ||
+                                     (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) && 
+                                      _get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1)));
+            
+            if (is_letter_pattern) {
                 // Handle optional leading non-letter/non-number character
                 bool has_leading_char = false;
                 if (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number)) {

From 4def0639a429bacc0c3aaced25852abdc25aa3e8 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Mon, 14 Jul 2025 08:55:29 -0500
Subject: [PATCH 09/21] add kimi-k2 to pre_computed_hashes

---
 convert_hf_to_gguf_update.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index 16f4acfe7834f..c091273c0d724 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -146,6 +146,7 @@ class TOKENIZER_TYPE(IntEnum):
     {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-1B-Base", "chkhsh": "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86"},
     {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-7B-Base", "chkhsh": "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896"},
     {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"},
+    {"name": "kimi-k2",   "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base",   "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
 ]
 
 

From b023e530b061a74610cfd4f8a9e9c656b17cf6dd Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Mon, 14 Jul 2025 08:57:38 -0500
Subject: [PATCH 10/21] Updated with kimi-k2 get_vocab_base_pre hash

---
 convert_hf_to_gguf.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 807e306888991..cf5566993ee8c 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -840,6 +840,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
         if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51":
             # ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer
             res = "lfm2"
+        if chkhsh == "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890":
+            # ref: https://huggingface.co/moonshotai/Kimi-K2-Base
+            res = "kimi-k2"
 
         if res is None:
             logger.warning("\n")
@@ -5566,7 +5569,7 @@ def set_vocab(self):
         if(self.hparams["vocab_size"]==163840): # Kimi-K2 model
             from transformers import AutoTokenizer
             tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
-            tokpre = "kimi-k2" # TODO: add identifier hash
+            tokpre = self.get_vocab_base_pre(tokenizer)
 
             # Build merges list using the approach similar to HunYuanMoE
             merges = []

From ecb345ff26ddc21a08722c3c2665121cf2313a00 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Mon, 14 Jul 2025 15:48:34 -0500
Subject: [PATCH 11/21] fix whitespaces

---
 src/unicode.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/unicode.cpp b/src/unicode.cpp
index bf56d6be2e068..375c9f9b694ee 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -557,7 +557,7 @@ static std::vector<size_t> unicode_regex_split_stl(const std::string & text, con
     return bpe_offsets;
 }
 
-// K2 system regex patterns (from tokenization_kimi.py): 
+// K2 system regex patterns (from tokenization_kimi.py):
 // [\p{Han}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+
 static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string & text, const std::vector<size_t> & offsets) {
     std::vector<size_t> bpe_offsets;
@@ -610,7 +610,7 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
             // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)?
             // Check if current char is a letter OR if current char could be a leading char and next char is a letter
             bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) ||
-                                     (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) && 
+                                     (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) &&
                                       _get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1)));
             
             if (is_letter_pattern) {
@@ -861,7 +861,7 @@ bool unicode_cpt_is_han(uint32_t cpt) {
     // CJK Extension C
     if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true;
     
-    // CJK Extension D  
+    // CJK Extension D
     if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true;
     
     // CJK Extension E

From 6fda4fe0d8511978218dd00de280b6623905d271 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Mon, 14 Jul 2025 16:35:45 -0500
Subject: [PATCH 12/21] fix flake errors

---
 convert_hf_to_gguf.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index cf5566993ee8c..ecfc2236336e1 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -5582,7 +5582,6 @@ def set_vocab(self):
                 merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
                 if len(merged) == 2:
                     merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
-            
 
             # Build token list
             vocab_size = self.hparams["vocab_size"]
@@ -5590,7 +5589,7 @@ def set_vocab(self):
             reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()}
             tokens: list[str] = []
             toktypes: list[int] = []
-            
+
             for i in range(vocab_size):
                 if i not in reverse_vocab:
                     tokens.append(f"[PAD{i}]")
@@ -5602,13 +5601,13 @@ def set_vocab(self):
                         toktypes.append(gguf.TokenType.CONTROL)
                     else:
                         toktypes.append(gguf.TokenType.NORMAL)
-            
+
             self.gguf_writer.add_tokenizer_model("gpt2")
             self.gguf_writer.add_tokenizer_pre(tokpre)
             self.gguf_writer.add_token_list(tokens)
             self.gguf_writer.add_token_types(toktypes)
             self.gguf_writer.add_token_merges(merges)
-            
+
             special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
             special_vocab.add_to_gguf(self.gguf_writer)
         else:

From 29c153243992700f7ad5d6d40230e1be13ca5045 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Mon, 14 Jul 2025 16:45:59 -0500
Subject: [PATCH 13/21] remove more unicode.cpp whitespaces

---
 src/unicode.cpp | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/unicode.cpp b/src/unicode.cpp
index 375c9f9b694ee..65f3665171582 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -612,7 +612,7 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
             bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) ||
                                      (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) &&
                                       _get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1)));
-            
+
             if (is_letter_pattern) {
                 // Handle optional leading non-letter/non-number character
                 bool has_leading_char = false;
@@ -620,23 +620,23 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
                     has_leading_char = true;
                     pos++;
                 }
-                
+
                 // Match letter sequence (excluding Han characters)
                 bool has_letters = false;
                 while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
                     has_letters = true;
                     pos++;
                 }
-                
+
                 // Only proceed if we found letters (after potentially skipping leading char)
                 if (has_letters || (!has_leading_char && _get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos)))) {
                     if (!has_letters) pos++; // consume the first letter if we didn't already
-                    
+
                     // Continue consuming letters
                     while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
                         pos++;
                     }
-                    
+
                     // Check for optional contractions (?:'s|'t|'re|'ve|'m|'ll|'d)
                     if (_get_cpt(pos) == '\'' && pos + 1 < offset_end) {
                         uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1));
@@ -651,7 +651,7 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
                             }
                         }
                     }
-                    
+
                     _add_token(pos);
                     continue;
                 } else if (has_leading_char) {
@@ -851,31 +851,31 @@ bool unicode_cpt_is_han(uint32_t cpt) {
     // Han character ranges (Chinese/CJK characters)
     // CJK Unified Ideographs (most common)
     if (cpt >= 0x4E00 && cpt <= 0x9FFF) return true;
-    
+
     // CJK Extension A
     if (cpt >= 0x3400 && cpt <= 0x4DBF) return true;
-    
+
     // CJK Extension B
     if (cpt >= 0x20000 && cpt <= 0x2A6DF) return true;
-    
+
     // CJK Extension C
     if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true;
-    
+
     // CJK Extension D
     if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true;
-    
+
     // CJK Extension E
     if (cpt >= 0x2B820 && cpt <= 0x2CEAF) return true;
-    
+
     // CJK Extension F
     if (cpt >= 0x2CEB0 && cpt <= 0x2EBEF) return true;
-    
+
     // CJK Compatibility Ideographs
     if (cpt >= 0xF900 && cpt <= 0xFAFF) return true;
-    
+
     // CJK Compatibility Ideographs Supplement
     if (cpt >= 0x2F800 && cpt <= 0x2FA1F) return true;
-    
+
     return false;
 }
 

From 6ef6aa1cd341a0dd1fb12630b20f353d7dc3fc9c Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Mon, 14 Jul 2025 18:06:01 -0500
Subject: [PATCH 14/21] change set_vocab() flow

---
 convert_hf_to_gguf.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index ecfc2236336e1..b66eac85d2e2c 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -5566,11 +5566,17 @@ class DeepseekV2Model(TextModel):
     model_arch = gguf.MODEL_ARCH.DEEPSEEK2
 
     def set_vocab(self):
-        if(self.hparams["vocab_size"]==163840): # Kimi-K2 model
-            from transformers import AutoTokenizer
-            tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
-            tokpre = self.get_vocab_base_pre(tokenizer)
+        try:
+            self._set_vocab_gpt2()
+            return
+        except:
+            pass
 
+        from transformers import AutoTokenizer
+        tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
+        tokpre = self.get_vocab_base_pre(tokenizer)
+
+        if tokpre == "kimi-k2":
             # Build merges list using the approach similar to HunYuanMoE
             merges = []
             vocab = {}
@@ -5611,7 +5617,7 @@ def set_vocab(self):
             special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
             special_vocab.add_to_gguf(self.gguf_writer)
         else:
-            self._set_vocab_gpt2()
+            raise NotImplementedError(f"{self.dir_model} is not supported yet!")
 
     def set_gguf_parameters(self):
 

From 02f4a631678bc435c5cea7a54ab4222ee32e8453 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Tue, 15 Jul 2025 00:31:04 -0500
Subject: [PATCH 15/21] add moonshotai-Kimi-K2.jinja to /models/templates/

---
 models/templates/moonshotai-Kimi-K2.jinja | 37 +++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 models/templates/moonshotai-Kimi-K2.jinja

diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja
new file mode 100644
index 0000000000000..be3627b0a4778
--- /dev/null
+++ b/models/templates/moonshotai-Kimi-K2.jinja
@@ -0,0 +1,37 @@
+{% if tools -%}
+    {{ '<|im_system|>tool_declare<|im_middle|>' -}}
+    {{- tools | tojson -}}
+    {{ '<|im_end|>' -}}
+{%- endif -%}
+
+{%- for message in messages -%}
+  {%- if loop.first and messages[0]['role'] != 'system' -%}
+    {{ '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>' }}
+  {%- endif -%}
+  {%- if message['role'] == 'system' -%}
+    {{ '<|im_system|>system<|im_middle|>' }}
+  {%- elif message['role'] == 'user' -%}
+    {{ '<|im_user|>user<|im_middle|>' }}
+  {%- elif message['role'] == 'assistant' -%}
+    {{ '<|im_assistant|>assistant<|im_middle|>' }}
+  {%- elif message['role'] == 'tool' -%}
+    {{ '<|im_system|>tool<|im_middle|>' }}
+  {%- endif -%}
+
+  {%- if message['content'] is string -%}
+    {{- message['content'] + '<|im_end|>' -}}
+  {%- else -%}
+    {%- for content in message['content'] -%}
+      {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+        {{ '<|media_start|>image<|media_content|><|media_pad|><|media_end|>' }}
+      {%- else -%}
+        {{ content['text'] }}
+      {%- endif -%}
+    {%- endfor -%}
+    {{ '<|im_end|>' }}
+  {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+  {{ '<|im_assistant|>assistant<|im_middle|>' }}
+{%- endif -%}
\ No newline at end of file

From 5a730aedf7f485ba44f71030aa9ec72cc86ca120 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Tue, 15 Jul 2025 10:07:26 -0500
Subject: [PATCH 16/21] update moonshotai-Kimi-K2.jinja

---
 models/templates/moonshotai-Kimi-K2.jinja | 50 +++++++++++++----------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja
index be3627b0a4778..ecb49a210852c 100644
--- a/models/templates/moonshotai-Kimi-K2.jinja
+++ b/models/templates/moonshotai-Kimi-K2.jinja
@@ -1,37 +1,43 @@
-{% if tools -%}
-    {{ '<|im_system|>tool_declare<|im_middle|>' -}}
-    {{- tools | tojson -}}
-    {{ '<|im_end|>' -}}
+{%- if tools -%}
+  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|>
 {%- endif -%}
-
 {%- for message in messages -%}
   {%- if loop.first and messages[0]['role'] != 'system' -%}
-    {{ '<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>' }}
+    <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>
   {%- endif -%}
   {%- if message['role'] == 'system' -%}
-    {{ '<|im_system|>system<|im_middle|>' }}
+    <|im_system|>system<|im_middle|>
   {%- elif message['role'] == 'user' -%}
-    {{ '<|im_user|>user<|im_middle|>' }}
+    <|im_user|>user<|im_middle|>
   {%- elif message['role'] == 'assistant' -%}
-    {{ '<|im_assistant|>assistant<|im_middle|>' }}
+    <|im_assistant|>assistant<|im_middle|>
   {%- elif message['role'] == 'tool' -%}
-    {{ '<|im_system|>tool<|im_middle|>' }}
+    <|im_system|>tool<|im_middle|>
   {%- endif -%}
-
-  {%- if message['content'] is string -%}
-    {{- message['content'] + '<|im_end|>' -}}
-  {%- else -%}
-    {%- for content in message['content'] -%}
-      {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
-        {{ '<|media_start|>image<|media_content|><|media_pad|><|media_end|>' }}
-      {%- else -%}
+  {%- if message['role'] == 'assistant' and message.get('tool_calls') -%}
+    {%- if message['content'] -%}{{ message['content'] }}{%- endif -%}
+    <|tool_calls_section_begin|>
+    {%- for tool_call in message['tool_calls'] -%}
+      {%- set func_name = tool_call['function']['name'] -%}
+      {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%}
+      <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|>
+    {%- endfor -%}
+    <|tool_calls_section_end|>
+  {%- elif message['role'] == 'tool' -%}
+    ## Return of {{ message.tool_call_id }}\n{{ message['content'] }}
+  {%- elif message['content'] is string -%}
+    {{ message['content'] }}
+  {%- elif message['content'] is not none -%}
+    {% for content in message['content'] -%}
+      {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+        <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+      {% else -%}
         {{ content['text'] }}
       {%- endif -%}
     {%- endfor -%}
-    {{ '<|im_end|>' }}
   {%- endif -%}
+  <|im_end|>
 {%- endfor -%}
-
 {%- if add_generation_prompt -%}
-  {{ '<|im_assistant|>assistant<|im_middle|>' }}
-{%- endif -%}
\ No newline at end of file
+  <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}

From 73dcb44312074b40b8595f2623e2377c0e060156 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Tue, 15 Jul 2025 11:21:35 -0500
Subject: [PATCH 17/21] add kimi-k2 chat template

---
 src/llama-chat.cpp | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp
index cbc19d3c40c30..d824392964f28 100644
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@@ -65,6 +65,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
     { "llama4",            LLM_CHAT_TEMPLATE_LLAMA4            },
     { "smolvlm",           LLM_CHAT_TEMPLATE_SMOLVLM           },
     { "hunyuan-moe",       LLM_CHAT_TEMPLATE_HUNYUAN_MOE       },
+    { "kimi-k2",           LLM_CHAT_TEMPLATE_KIMI_K2           },
 };
 
 llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -188,6 +189,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
         return LLM_CHAT_TEMPLATE_DOTS1;
     } else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
         return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
+    } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
+        return LLM_CHAT_TEMPLATE_KIMI_K2;
     }
     return LLM_CHAT_TEMPLATE_UNKNOWN;
 }
@@ -680,6 +683,22 @@ int32_t llm_chat_apply_template(
                 ss << "<|startoftext|>" << message->content << "<|extra_0|>";
             }
         }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
+        // moonshotai/Kimi-K2-Instruct
+        for (auto message : chat) {
+            std::string role(message->role);
+            if (role == "system") {
+                ss << "<|im_system|>system<|im_middle|>";
+            } else if (role == "user") {
+                ss << "<|im_user|>user<|im_middle|>";
+            } else if (role == "assistant") {
+                ss << "<|im_assistant|>assistant<|im_middle|>";
+            } else if (role == "tool") {
+                ss << "<|im_system|>tool<|im_middle|>";
+            }
+
+            ss << message->content << "<|im_end|>";
+        }
     } else {
         // template not supported
         return -1;

From d2211e01cf612f4726e8d192b462f6629c6550c7 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Tue, 15 Jul 2025 11:22:58 -0500
Subject: [PATCH 18/21] add kimi-k2

---
 src/llama-chat.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llama-chat.h b/src/llama-chat.h
index b621fda281669..cab0533485652 100644
--- a/src/llama-chat.h
+++ b/src/llama-chat.h
@@ -45,6 +45,7 @@ enum llm_chat_template {
     LLM_CHAT_TEMPLATE_SMOLVLM,
     LLM_CHAT_TEMPLATE_DOTS1,
     LLM_CHAT_TEMPLATE_HUNYUAN_MOE,
+    LLM_CHAT_TEMPLATE_KIMI_K2,
     LLM_CHAT_TEMPLATE_UNKNOWN,
 };
 

From a57870df6963650ea2252bc6041bcb0828e5e4ae Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Tue, 15 Jul 2025 11:55:27 -0500
Subject: [PATCH 19/21] update NotImplementedError
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
---
 convert_hf_to_gguf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index b66eac85d2e2c..15b0e016751b8 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -5617,7 +5617,7 @@ def set_vocab(self):
             special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
             special_vocab.add_to_gguf(self.gguf_writer)
         else:
-            raise NotImplementedError(f"{self.dir_model} is not supported yet!")
+            raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!")
 
     def set_gguf_parameters(self):
 

From 0a5704b59d8d3b4846ac4c4a9997cc19de9e06d8 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Tue, 15 Jul 2025 11:59:28 -0500
Subject: [PATCH 20/21] except Exception
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
---
 convert_hf_to_gguf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 15b0e016751b8..9b929bf70964d 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -5569,7 +5569,7 @@ def set_vocab(self):
         try:
             self._set_vocab_gpt2()
             return
-        except:
+        except Exception:
             pass
 
         from transformers import AutoTokenizer

From 459df19b89b06b19613daad00ef37f9e49a96837 Mon Sep 17 00:00:00 2001
From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com>
Date: Tue, 15 Jul 2025 13:11:31 -0500
Subject: [PATCH 21/21] LLM_CHAT_TEMPLATE_KIMI_K2 if(add_ass){}

---
 src/llama-chat.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp
index d824392964f28..11359903078e2 100644
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@@ -698,6 +698,10 @@ int32_t llm_chat_apply_template(
             }
 
             ss << message->content << "<|im_end|>";
+
+            if (add_ass) {
+                ss << "<|im_assistant|>assistant<|im_middle|>";
+            }
         }
     } else {
         // template not supported