@@ -819,6 +819,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
819
819
if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664" :
820
820
# ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
821
821
res = "hunyuan"
822
+ if chkhsh == "bba3b3366b646dbdded5dbc42d59598b849371afc42f7beafa914afaa5b70aa6" :
823
+ # ref: # TODO: update ref
824
+ res = "hunyuan-v1-dense"
822
825
if chkhsh == "b0a6b1c0bd5998ebd9df08611efde34a4ff03faed45ae09c43e6b31ebd4b94cf" :
823
826
# ref: https://huggingface.co/skt/A.X-4.0
824
827
res = "a.x-4.0"
@@ -7070,6 +7073,105 @@ def prepare_tensors(self):
7070
7073
raise ValueError (f"Unprocessed experts: { experts } " )
7071
7074
7072
7075
7076
+ @ModelBase .register ("HunYuanDenseV1ForCausalLM" )
7077
+ class HunYuanModel (TextModel ):
7078
+ model_arch = gguf .MODEL_ARCH .HUNYUAN_V1_DENSE
7079
+
7080
+ def __init__ (self , * args , ** kwargs ):
7081
+ super ().__init__ (* args , ** kwargs )
7082
+ # For handling tied embeddings
7083
+ self ._tok_embd = None
7084
+
7085
+ def set_vocab (self ):
7086
+ from transformers import AutoTokenizer
7087
+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
7088
+
7089
+ # 1. Get the pre-tokenizer identifier hash
7090
+ tokpre = self .get_vocab_base_pre (tokenizer )
7091
+
7092
+ # 2. Reverse-engineer the merges list from mergeable_ranks
7093
+ merges = []
7094
+ vocab = {}
7095
+ mergeable_ranks = tokenizer .mergeable_ranks
7096
+ for token , rank in mergeable_ranks .items ():
7097
+ vocab [QwenModel .token_bytes_to_string (token )] = rank
7098
+ if len (token ) == 1 :
7099
+ continue
7100
+ merged = QwenModel .bpe (mergeable_ranks , token , max_rank = rank )
7101
+ if len (merged ) == 2 : # todo this is an assert in Qwen, why?
7102
+ merges .append (' ' .join (map (QwenModel .token_bytes_to_string , merged )))
7103
+
7104
+ # 3. Generate the tokens and toktypes lists
7105
+ vocab_size = self .hparams ["vocab_size" ]
7106
+ assert tokenizer .vocab_size == vocab_size
7107
+ special_tokens = tokenizer .special_tokens
7108
+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in {** vocab , ** special_tokens }.items ()}
7109
+ tokens : list [str ] = []
7110
+ toktypes : list [int ] = []
7111
+ for i in range (vocab_size ):
7112
+ if i not in reverse_vocab :
7113
+ tokens .append (f"[PAD{ i } ]" )
7114
+ toktypes .append (gguf .TokenType .UNUSED )
7115
+ else :
7116
+ token = reverse_vocab [i ]
7117
+ tokens .append (token )
7118
+ if i in special_tokens .values ():
7119
+ toktypes .append (gguf .TokenType .CONTROL )
7120
+ else :
7121
+ toktypes .append (gguf .TokenType .NORMAL )
7122
+
7123
+ # 4. Write all vocab-related fields to the GGUF writer
7124
+ self .gguf_writer .add_tokenizer_model ("gpt2" )
7125
+ self .gguf_writer .add_tokenizer_pre (tokpre )
7126
+ self .gguf_writer .add_token_list (tokens )
7127
+ self .gguf_writer .add_token_types (toktypes )
7128
+ self .gguf_writer .add_token_merges (merges )
7129
+
7130
+ # 5. Add special tokens and chat templates
7131
+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False )
7132
+ special_vocab .add_to_gguf (self .gguf_writer )
7133
+ # FIX for BOS token: Overwrite incorrect id read from config.json
7134
+ self .gguf_writer .add_bos_token_id (127959 ) # <|bos|>
7135
+
7136
+ def set_gguf_parameters (self ):
7137
+ super ().set_gguf_parameters ()
7138
+ hparams = self .hparams
7139
+
7140
+ self .gguf_writer .add_expert_shared_feed_forward_length (hparams ["intermediate_size" ])
7141
+
7142
+ # Rope
7143
+ rope_scaling = hparams .get ("rope_scaling" , {})
7144
+ if rope_scaling .get ("type" ) == "dynamic" :
7145
+ # HunYuan uses NTK Aware Alpha based scaling. Original implementation: https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/
7146
+ # 1000 corresponds to a usable context length of 256k (https://github.com/Tencent-Hunyuan/Hunyuan-A13B/blob/main/report/Hunyuan_A13B_Technical_Report.pdf)
7147
+ alpha = rope_scaling .get ("alpha" , 50 )
7148
+ base = hparams .get ("rope_theta" , 10000.0 )
7149
+ dim = (hparams ["hidden_size" ] // hparams ["num_attention_heads" ]) # 128
7150
+ scaled_base = base * (alpha ** (dim / (dim - 2 ))) # 10000 * (50 ** (128 / 126)) = 532032.0339
7151
+ self .gguf_writer .add_rope_freq_base (scaled_base )
7152
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .NONE )
7153
+ self .gguf_writer .add_rope_scaling_factor (1 )
7154
+ # There is no consistent way to calculate ctx from alpha, and the config is incorrectly set to 32k
7155
+ self .gguf_writer .add_rope_scaling_orig_ctx_len (256 * 1024 ) # 256k context length
7156
+ self .gguf_writer .add_context_length (256 * 1024 ) # 256k context length
7157
+
7158
+ # if any of our assumptions about the values are wrong, something has changed and this may need to be updated
7159
+ assert alpha == 50 and base == 10000.0 and dim in [96 , 128 ] and self .hparams ["max_position_embeddings" ] in [32 * 1024 , 256 * 1024 ] , \
7160
+ "HunYuan dynamic RoPE scaling assumptions changed, please update the logic or context length manually"
7161
+
7162
+ _experts : list [dict [str , Tensor ]] | None = None
7163
+
7164
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
7165
+ if name == "model.embed_tokens.weight" :
7166
+ self ._tok_embd = data_torch .clone ()
7167
+
7168
+ if name == "lm_head.weight" :
7169
+ if self .hparams .get ("tie_word_embeddings" , False ):
7170
+ logger .info ("Skipping tied output layer 'lm_head.weight'" )
7171
+ return []
7172
+
7173
+ return [(self .map_tensor_name (name ), data_torch )]
7174
+
7073
7175
@ModelBase .register ("SmolLM3ForCausalLM" )
7074
7176
class SmolLM3Model (LlamaModel ):
7075
7177
model_arch = gguf .MODEL_ARCH .SMOLLM3
0 commit comments