Skip to content

Commit ffd59e7

Browse files
authored
model : add skt/A.X-4.0 model vocabulary (#14589)
1 parent 1055545 commit ffd59e7

File tree

3 files changed

+6
-1
lines changed

3 files changed

+6
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
818818
if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664":
819819
# ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
820820
res = "hunyuan"
821+
if chkhsh == "b0a6b1c0bd5998ebd9df08611efde34a4ff03faed45ae09c43e6b31ebd4b94cf":
822+
# ref: https://huggingface.co/skt/A.X-4.0
823+
res = "a.x-4.0"
821824
if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6":
822825
# ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
823826
res = "falcon-h1"

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ class TOKENIZER_TYPE(IntEnum):
128128
{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
129129
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
130130
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
131+
{"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", },
131132
]
132133

133134
# some models are known to be broken upstream, so we will skip them as exceptions

src/llama-vocab.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1556,7 +1556,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
15561556
tokenizer_pre == "jina-de" ||
15571557
tokenizer_pre == "gigachat" ||
15581558
tokenizer_pre == "jina-v2-es" ||
1559-
tokenizer_pre == "jina-v2-de") {
1559+
tokenizer_pre == "jina-v2-de" ||
1560+
tokenizer_pre == "a.x-4.0") {
15601561
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
15611562
} else if (
15621563
tokenizer_pre == "jina-v1-en" ||

0 commit comments

Comments
 (0)