-
Notifications
You must be signed in to change notification settings - Fork 12.4k
model : add hunyuan moe #14425
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
model : add hunyuan moe #14425
Changes from 3 commits
f5d8a22
38acf7f
35591a9
cb1f9f2
51886a4
cff16cc
5e78e88
d219580
616f4c7
0fd3930
b19ecae
245db15
3920faa
8fd547b
34cc679
4d66bdc
b20bd26
99d9e94
1221d94
5471f5a
46c8b70
443ec9b
251e78a
06cab8f
5cfc73b
2d56a29
e5fe089
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -607,8 +607,7 @@ | |
|
||
from transformers import AutoTokenizer | ||
tokenizer = AutoTokenizer.from_pretrained(self.dir_model) | ||
vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab)) | ||
assert max(tokenizer.vocab.values()) < vocab_size | ||
|
||
tokpre = self.get_vocab_base_pre(tokenizer) | ||
|
||
|
@@ -617,7 +616,7 @@ | |
|
||
added_tokens_decoder = tokenizer.added_tokens_decoder | ||
|
||
for i in range(vocab_size): | ||
if i not in reverse_vocab: | ||
tokens.append(f"[PAD{i}]") | ||
toktypes.append(gguf.TokenType.UNUSED) | ||
|
@@ -815,6 +814,9 @@ | |
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35": | ||
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0 | ||
res = "minerva-7b" | ||
if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664": | ||
# ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct | ||
res = "hunyuan" | ||
|
||
if res is None: | ||
logger.warning("\n") | ||
|
@@ -6390,6 +6392,114 @@ | |
super().set_gguf_parameters() | ||
self.gguf_writer.add_audio_stack_factor(self.global_config["stack_factor"]) | ||
|
||
|
||
@ModelBase.register("HunYuanMoEV1ForCausalLM") | ||
class HunYuanMoEModel(LlamaModel): | ||
model_arch = gguf.MODEL_ARCH.HUNYUAN_MOE | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also could you add the version suffix on the arch name, like the arch name in model 's config.json ? |
||
undo_permute = False | ||
|
||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
|
||
def set_vocab(self): | ||
self._set_vocab_gpt2() | ||
|
||
def get_vocab_base(self) -> tuple[list[str], list[int], str]: | ||
tokens: list[str] = [] | ||
toktypes: list[int] = [] | ||
|
||
from transformers import AutoTokenizer | ||
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True) | ||
|
||
# merge logic is copied from QwenModel, maybe incorrect | ||
merges = [] | ||
vocab = {} | ||
mergeable_ranks = tokenizer.mergeable_ranks | ||
for token, rank in mergeable_ranks.items(): | ||
vocab[QwenModel.token_bytes_to_string(token)] = rank | ||
if len(token) == 1: | ||
continue | ||
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank) | ||
Check failure on line 6422 in convert_hf_to_gguf.py
|
||
if len(merged) == 2: | ||
merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged))) | ||
ngxson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.gguf_writer.add_token_merges(merges) | ||
|
||
reverse_vocab = tokenizer.decoder | ||
assert max(reverse_vocab.keys()) < tokenizer.vocab_size | ||
|
||
tokpre = self.get_vocab_base_pre(tokenizer) | ||
added_vocab = tokenizer.get_added_vocab() | ||
|
||
added_tokens_decoder = tokenizer.added_tokens_decoder | ||
|
||
for i in range(tokenizer.vocab_size): | ||
if i not in reverse_vocab: | ||
tokens.append(f"[PAD{i}]") | ||
toktypes.append(gguf.TokenType.UNUSED) | ||
else: | ||
token: str = reverse_vocab[i] | ||
if token in added_vocab: | ||
# The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized. | ||
# To avoid unexpected issues - we make sure to normalize non-normalized tokens | ||
if not added_tokens_decoder[i].normalized: | ||
previous_token = token | ||
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False)) | ||
if previous_token != token: | ||
logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer") | ||
|
||
if added_tokens_decoder[i].special or self.does_token_look_special(token): | ||
toktypes.append(gguf.TokenType.CONTROL) | ||
else: | ||
# NOTE: this was added for Gemma. | ||
# Encoding and decoding the tokens above isn't sufficient for this case. | ||
token = token.replace(b"\xe2\x96\x81".decode("utf-8"), " ") # pre-normalize user-defined spaces | ||
toktypes.append(gguf.TokenType.USER_DEFINED) | ||
else: | ||
toktypes.append(gguf.TokenType.NORMAL) | ||
tokens.append(token) | ||
|
||
return tokens, toktypes, tokpre | ||
|
||
def set_gguf_parameters(self): | ||
super().set_gguf_parameters() | ||
|
||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: | ||
# process the experts separately | ||
if name.find("mlp.experts") != -1: | ||
n_experts = self.hparams["num_experts"] | ||
assert bid is not None | ||
|
||
tensors: list[tuple[str, Tensor]] = [] | ||
|
||
if self._experts is None: | ||
self._experts = [{} for _ in range(self.block_count)] | ||
|
||
self._experts[bid][name] = data_torch | ||
|
||
if len(self._experts[bid]) >= n_experts * 3: | ||
# merge the experts into a single 3d tensor | ||
for w_name in ["down_proj", "gate_proj", "up_proj"]: | ||
datas: list[Tensor] = [] | ||
|
||
for xid in range(n_experts): | ||
ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight" | ||
datas.append(self._experts[bid][ename]) | ||
del self._experts[bid][ename] | ||
|
||
data_torch = torch.stack(datas, dim=0) | ||
|
||
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight" | ||
|
||
new_name = self.map_tensor_name(merged_name) | ||
|
||
tensors.append((new_name, data_torch)) | ||
|
||
return tensors | ||
else: | ||
return [] | ||
|
||
return [(self.map_tensor_name(name), data_torch)] | ||
|
||
###### CONVERSION LOGIC ###### | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -137,6 +137,7 @@ class TOKENIZER_TYPE(IntEnum): | |
{"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"}, | ||
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"}, | ||
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"}, | ||
{"name": "hunyuan", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. model name should be hunyuan a13b, from my source , they will release more llm model soon, we'd better add some identify for the mdoel. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is tokenizer name, not model name |
||
] | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -354,6 +354,7 @@ class MODEL_ARCH(IntEnum): | |
BAILINGMOE = auto() | ||
DOTS1 = auto() | ||
ARCEE = auto() | ||
HUNYUAN_MOE = auto() | ||
|
||
|
||
class VISION_PROJECTOR_TYPE(IntEnum): | ||
|
@@ -654,6 +655,7 @@ class MODEL_TENSOR(IntEnum): | |
MODEL_ARCH.BAILINGMOE: "bailingmoe", | ||
MODEL_ARCH.DOTS1: "dots1", | ||
MODEL_ARCH.ARCEE: "arcee", | ||
MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hunyuan-moe-v1 will be a better name for later model updating. |
||
} | ||
|
||
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = { | ||
|
@@ -2177,6 +2179,27 @@ class MODEL_TENSOR(IntEnum): | |
MODEL_TENSOR.FFN_DOWN, | ||
MODEL_TENSOR.FFN_UP, | ||
], | ||
MODEL_ARCH.HUNYUAN_MOE: [ | ||
MODEL_TENSOR.TOKEN_EMBD, | ||
MODEL_TENSOR.OUTPUT_NORM, | ||
MODEL_TENSOR.OUTPUT, | ||
MODEL_TENSOR.ROPE_FREQS, | ||
MODEL_TENSOR.ATTN_NORM, | ||
MODEL_TENSOR.ATTN_Q, | ||
MODEL_TENSOR.ATTN_Q_NORM, | ||
MODEL_TENSOR.ATTN_K, | ||
MODEL_TENSOR.ATTN_K_NORM, | ||
MODEL_TENSOR.ATTN_V, | ||
MODEL_TENSOR.ATTN_OUT, | ||
MODEL_TENSOR.FFN_GATE_INP, | ||
MODEL_TENSOR.FFN_NORM, | ||
MODEL_TENSOR.FFN_GATE_EXP, | ||
MODEL_TENSOR.FFN_DOWN_EXP, | ||
MODEL_TENSOR.FFN_UP_EXP, | ||
MODEL_TENSOR.FFN_GATE_SHEXP, | ||
MODEL_TENSOR.FFN_DOWN_SHEXP, | ||
MODEL_TENSOR.FFN_UP_SHEXP, | ||
], | ||
# TODO | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -117,6 +117,7 @@ extern "C" { | |
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33, | ||
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34, | ||
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35, | ||
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a version suffix on vocab type will be better. |
||
}; | ||
|
||
enum llama_rope_type { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -351,6 +351,7 @@ struct llm_tokenizer_bpe : llm_tokenizer { | |
break; | ||
case LLAMA_VOCAB_PRE_TYPE_STABLELM2: | ||
case LLAMA_VOCAB_PRE_TYPE_QWEN2: | ||
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN: | ||
regex_exprs = { | ||
// original regex from tokenizer.json | ||
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" | ||
|
@@ -1656,6 +1657,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { | |
tokenizer_pre == "seed-coder") { | ||
pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER; | ||
clean_spaces = false; | ||
} else if ( | ||
tokenizer_pre == "hunyuan") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the tokenizer verison |
||
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN; | ||
clean_spaces = false; | ||
} else { | ||
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
model name better with hunyuan A13B