Skip to content

Commit 576c82e

Browse files
authored
vocab : add midm-2.0 model pre-tokenizer (#14626)
1 parent 0aedae0 commit 576c82e

File tree

3 files changed

+6
-1
lines changed

3 files changed

+6
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
833833
if chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b":
834834
# ref: https://huggingface.co/tiiuae/Falcon-H1-34B-Base
835835
res = "falcon-h1"
836+
if chkhsh == "f6791d196f87ce6b56a7d234be618e0d58f8cda3549416635b2bebcd22cd95c4":
837+
# ref: https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct
838+
res = "midm-2.0"
836839

837840
if res is None:
838841
logger.warning("\n")

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ class TOKENIZER_TYPE(IntEnum):
129129
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
130130
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
131131
{"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
132+
{"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", },
132133
]
133134

134135
# some models are known to be broken upstream, so we will skip them as exceptions

src/llama-vocab.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1524,7 +1524,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
15241524
tokenizer_pre == "llama-bpe"||
15251525
tokenizer_pre == "falcon3" ||
15261526
tokenizer_pre == "falcon-h1" ||
1527-
tokenizer_pre == "pixtral") {
1527+
tokenizer_pre == "pixtral" ||
1528+
tokenizer_pre == "midm-2.0") {
15281529
pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
15291530
ignore_merges = true;
15301531
add_bos = true;

0 commit comments

Comments
 (0)