@@ -818,6 +818,21 @@ def get_vocab_base_pre(self, tokenizer) -> str:
818
818
if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664" :
819
819
# ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
820
820
res = "hunyuan"
821
+ if chkhsh == "b0a6b1c0bd5998ebd9df08611efde34a4ff03faed45ae09c43e6b31ebd4b94cf" :
822
+ # ref: https://huggingface.co/skt/A.X-4.0
823
+ res = "a.x-4.0"
824
+ if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6" :
825
+ # ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
826
+ res = "falcon-h1"
827
+ if chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86" :
828
+ # ref: https://huggingface.co/tiiuae/Falcon-H1-1B-Base
829
+ res = "falcon-h1"
830
+ if chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896" :
831
+ # ref: https://huggingface.co/tiiuae/Falcon-H1-7B-Base
832
+ res = "falcon-h1"
833
+ if chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b" :
834
+ # ref: https://huggingface.co/tiiuae/Falcon-H1-34B-Base
835
+ res = "falcon-h1"
821
836
822
837
if res is None :
823
838
logger .warning ("\n " )
@@ -4876,7 +4891,7 @@ def __init__(self, dir_model: Path, *args, **kwargs):
4876
4891
hparams = json .load (f )
4877
4892
super ().__init__ (dir_model , * args , hparams = hparams , ** kwargs )
4878
4893
self .d_model = self .find_hparam (["hidden_size" , "d_model" , "dim" ])
4879
- self .d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * self .d_model
4894
+ self .d_inner = self .find_hparam (["mamba_d_ssm" , " intermediate_size" , "d_inner" ], optional = True ) or 2 * self .d_model
4880
4895
self .n_group = self .find_hparam (["n_groups" ], optional = True ) or 1
4881
4896
4882
4897
def set_vocab (self ):
@@ -4900,16 +4915,18 @@ def set_vocab(self):
4900
4915
self ._set_vocab_builtin ("gpt-neox" , vocab_size )
4901
4916
4902
4917
def set_gguf_parameters (self ):
4903
- d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
4904
- d_state = self .find_hparam (["state_size" , "d_state" ], optional = True ) or 128
4905
- head_dim = self .find_hparam (["head_dim" ], optional = True ) or 64
4918
+ d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
4919
+ d_state = self .find_hparam (["state_size" , "d_state" ], optional = True ) or 128
4920
+ head_dim = self .find_hparam (["mamba_d_head" , " head_dim" ], optional = True ) or 64
4906
4921
4907
4922
rms_norm_eps = self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True ) or 1e-5
4908
4923
4909
4924
# Fail early for models which don't have a block expansion factor of 2
4910
4925
# TODO: does this really matter?
4911
- assert self .d_inner == 2 * self .d_model
4912
- assert self .d_inner % head_dim == 0
4926
+ # skip the assertion for FalconH1 Model
4927
+ if self .model_arch != gguf .MODEL_ARCH .FALCON_H1 :
4928
+ assert self .d_inner == 2 * self .d_model
4929
+ assert self .d_inner % head_dim == 0
4913
4930
4914
4931
self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
4915
4932
self .gguf_writer .add_embedding_length (self .d_model )
@@ -6804,6 +6821,113 @@ def set_gguf_parameters(self):
6804
6821
self .gguf_writer .add_audio_stack_factor (self .global_config ["stack_factor" ])
6805
6822
6806
6823
6824
+ @ModelBase .register ("FalconH1ForCausalLM" )
6825
+ class FalconH1Model (Mamba2Model ):
6826
+ model_arch = gguf .MODEL_ARCH .FALCON_H1
6827
+
6828
+ def __init__ (self , * args , ** kwargs ):
6829
+ # Set the hparam prefixes for Falcon Mamba2
6830
+ self .hparam_prefixes = ["mamba" ]
6831
+
6832
+ # Initialize the base Mamba2Model
6833
+ super ().__init__ (* args , ** kwargs )
6834
+
6835
+ # Use Llama conversion for attention
6836
+ self ._transformer_model_class = LlamaModel
6837
+
6838
+ # n_group and d_inner are used during reshape_tensors for mamaba2
6839
+ self .n_group = self .find_hparam (["n_groups" ])
6840
+ self .d_inner = self .find_hparam (["mamba_d_ssm" ])
6841
+ self .d_head = self .find_hparam (["d_head" ])
6842
+
6843
+ # Initialize any Falcon Mamba2 specific attributes
6844
+ self .has_attention = True # Falcon Mamba2 has attention components
6845
+
6846
+ # Load Falcon-H1 multipliers from hyperparameters
6847
+ self .attention_in_multiplier = self .find_hparam (["attention_in_multiplier" ], optional = True )
6848
+ self .attention_out_multiplier = self .find_hparam (["attention_out_multiplier" ], optional = True )
6849
+ self .ssm_in_multiplier = self .find_hparam (["ssm_in_multiplier" ], optional = True )
6850
+ self .ssm_out_multiplier = self .find_hparam (["ssm_out_multiplier" ], optional = True )
6851
+ self .mlp_multipliers = self .find_hparam (["mlp_multipliers" ], optional = True )
6852
+ self .ssm_multipliers = self .find_hparam (["ssm_multipliers" ], optional = True )
6853
+ self .intermediate_size = self .find_hparam (["intermediate_size" ])
6854
+ self .key_multiplier = self .find_hparam (["key_multiplier" ], optional = True )
6855
+
6856
+ def find_hparam (self , keys : Iterable [str ], * args , ** kwargs ) -> Any :
6857
+ prefixed = []
6858
+ for pfx in self .hparam_prefixes :
6859
+ prefixed .extend (
6860
+ "_" .join ([pfx , k ])
6861
+ for k in keys
6862
+ )
6863
+ keys = list (keys ) + prefixed
6864
+ return super ().find_hparam (keys , * args , ** kwargs )
6865
+
6866
+ def set_vocab (self ):
6867
+ self ._set_vocab_gpt2 ()
6868
+
6869
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
6870
+ tensors = list (super ().modify_tensors (data_torch , name , bid ))
6871
+ tensor = tensors [0 ][1 ]
6872
+
6873
+ if "down_proj" in name :
6874
+ tensor = tensor * self .mlp_multipliers [1 ]
6875
+ elif "gate_proj" in name :
6876
+ tensor = tensor * self .mlp_multipliers [0 ]
6877
+ elif "k_proj" in name :
6878
+ tensor = tensor * self .key_multiplier * self .attention_in_multiplier
6879
+ elif "q_proj" in name :
6880
+ tensor = tensor * self .attention_in_multiplier
6881
+ elif "v_proj" in name :
6882
+ tensor = tensor * self .attention_in_multiplier
6883
+ elif "o_proj" in name :
6884
+ tensor = tensor * self .attention_out_multiplier
6885
+ elif "out_proj" in name :
6886
+ tensor = tensor * self .ssm_out_multiplier
6887
+ elif "in_proj" in name :
6888
+ tensor = tensor * self .ssm_in_multiplier
6889
+ zxbcdt_multipliers = self .hparams ["ssm_multipliers" ]
6890
+ intermediate_size = self .hparams ["mamba_d_ssm" ]
6891
+ groups_time_state_size = self .hparams ["mamba_n_groups" ] * self .hparams ["mamba_d_state" ]
6892
+ tensor [:intermediate_size , :] *= zxbcdt_multipliers [0 ]
6893
+ tensor [intermediate_size :2 * intermediate_size , :] *= zxbcdt_multipliers [1 ]
6894
+ tensor [2 * intermediate_size :2 * intermediate_size + groups_time_state_size , :] *= zxbcdt_multipliers [2 ]
6895
+ tensor [2 * intermediate_size + groups_time_state_size :2 * intermediate_size + 2 * groups_time_state_size , :] *= zxbcdt_multipliers [3 ]
6896
+ tensor [2 * intermediate_size + 2 * groups_time_state_size :, :] *= zxbcdt_multipliers [4 ]
6897
+ elif "lm_head" in name :
6898
+ tensor = tensor * self .hparams ["lm_head_multiplier" ]
6899
+ elif "embed_tokens" in name :
6900
+ tensor = tensor * self .hparams ["embedding_multiplier" ]
6901
+ elif "mamba.norm" in name :
6902
+ tensor = tensor .reshape (self .n_group , self .d_inner // self .n_group )
6903
+
6904
+ tensors = [(tensors [0 ][0 ], tensor )]
6905
+ return tensors
6906
+
6907
+ def set_gguf_parameters (self ):
6908
+ super ().set_gguf_parameters ()
6909
+
6910
+ ## General Params ##
6911
+ self .gguf_writer .add_vocab_size (self .hparams ["vocab_size" ])
6912
+ # Override some Mamba2 defaults
6913
+ self .gguf_writer .add_block_count (self .block_count )
6914
+ self .gguf_writer .add_context_length (self .hparams .get ("max_position_embeddings" , 0 ))
6915
+ self .gguf_writer .add_feed_forward_length (self .hparams ["intermediate_size" ])
6916
+
6917
+ ## Attention params ##
6918
+ self .gguf_writer .add_head_count (self .hparams ["num_attention_heads" ]) # Override value 0 from Mamba2
6919
+ self .gguf_writer .add_head_count_kv (self .hparams ["num_key_value_heads" ])
6920
+ self .gguf_writer .add_key_length (self .hparams ["head_dim" ])
6921
+ self .gguf_writer .add_value_length (self .hparams ["head_dim" ])
6922
+
6923
+ ## Validation ##
6924
+ assert self .hparams .get ("hidden_act" ) in [None , "silu" ], "Only SILU activation supported"
6925
+ assert self .d_inner % self .d_head == 0 , f"SSM inner size { self .d_inner } not a multiple of head dim { self .d_head } "
6926
+
6927
+ # Add any other Falcon Mamba2 specific configuration
6928
+ self .gguf_writer .add_rope_freq_base (self .find_hparam (["rope_theta" ]))
6929
+
6930
+
6807
6931
@ModelBase .register ("HunYuanMoEV1ForCausalLM" )
6808
6932
class HunYuanMoEModel (TextModel ):
6809
6933
model_arch = gguf .MODEL_ARCH .HUNYUAN_MOE
@@ -6957,6 +7081,16 @@ def prepare_tensors(self):
6957
7081
class SmolLM3Model (LlamaModel ):
6958
7082
model_arch = gguf .MODEL_ARCH .SMOLLM3
6959
7083
7084
+ def set_vocab (self ):
7085
+ super ().set_vocab ()
7086
+ # remove unsupported array slicing in chat template
7087
+ # ref: https://huggingface.co/ggml-org/SmolLM3-3B-GGUF/discussions/1
7088
+ from transformers import AutoTokenizer
7089
+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model )
7090
+ if tokenizer .chat_template is not None :
7091
+ chat_template = tokenizer .chat_template .replace ("[:]" , "" )
7092
+ self .gguf_writer .add_chat_template (chat_template )
7093
+
6960
7094
###### CONVERSION LOGIC ######
6961
7095
6962
7096
0 commit comments