@@ -4636,6 +4636,14 @@ def set_gguf_parameters(self):
4636
4636
class MambaModel (TextModel ):
4637
4637
model_arch = gguf .MODEL_ARCH .MAMBA
4638
4638
4639
+ def __init__ (self , dir_model : Path , * args , ** kwargs ):
4640
+ # Avoid using AutoConfig for hparams
4641
+ hparams = kwargs .pop ("hparams" , None )
4642
+ if hparams is None :
4643
+ with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
4644
+ hparams = json .load (f )
4645
+ super ().__init__ (dir_model , * args , hparams = hparams , ** kwargs )
4646
+
4639
4647
def set_vocab (self ):
4640
4648
vocab_size = self .hparams ["vocab_size" ]
4641
4649
# Round vocab size to next multiple of 8
@@ -4710,6 +4718,100 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
4710
4718
return [(new_name , data_torch )]
4711
4719
4712
4720
4721
+ @ModelBase .register ("Mamba2ForCausalLM" )
4722
+ class Mamba2Model (TextModel ):
4723
+ model_arch = gguf .MODEL_ARCH .MAMBA2
4724
+
4725
+ def __init__ (self , dir_model : Path , * args , ** kwargs ):
4726
+ # Avoid using AutoConfig for hparams
4727
+ # It wrongly assumes all Mamba2 models are Mamba-Codestral-7B-v0.1
4728
+ hparams = kwargs .pop ("hparams" , None )
4729
+ if hparams is None :
4730
+ with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
4731
+ hparams = json .load (f )
4732
+ super ().__init__ (dir_model , * args , hparams = hparams , ** kwargs )
4733
+
4734
+ def set_vocab (self ):
4735
+ vocab_size = self .hparams ["vocab_size" ]
4736
+ # Round vocab size to next multiple of 16
4737
+ pad_vocab = self .hparams .get ("pad_vocab_size_multiple" , 16 )
4738
+ # pad using ceiling division
4739
+ # ref: https://stackoverflow.com/a/17511341/22827863
4740
+ vocab_size = - (vocab_size // - pad_vocab ) * pad_vocab
4741
+ self .hparams ["vocab_size" ] = vocab_size
4742
+
4743
+ if (self .dir_model / "tokenizer.model" ).is_file ():
4744
+ self ._set_vocab_sentencepiece ()
4745
+ elif (self .dir_model / "tokenizer.model.v3" ).is_file ():
4746
+ # mamba-codestral
4747
+ raise NotImplementedError (f"Please rename { self .dir_model / 'tokenizer.model.v3' } to { self .dir_model / 'tokenizer.model' } " )
4748
+ elif (self .dir_model / "tokenizer.json" ).is_file ():
4749
+ self ._set_vocab_gpt2 ()
4750
+ else :
4751
+ # Use the GPT-NeoX tokenizer when no tokenizer files are present
4752
+ self ._set_vocab_builtin ("gpt-neox" , vocab_size )
4753
+
4754
+ def set_gguf_parameters (self ):
4755
+ d_model = self .find_hparam (["hidden_size" , "d_model" , "dim" ])
4756
+ d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
4757
+ d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * d_model
4758
+ d_state = self .find_hparam (["state_size" , "d_state" ], optional = True ) or 128
4759
+ head_dim = self .find_hparam (["head_dim" ], optional = True ) or 64
4760
+ n_group = self .find_hparam (["n_groups" ], optional = True ) or 1
4761
+
4762
+ rms_norm_eps = self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True ) or 1e-5
4763
+
4764
+ # Fail early for models which don't have a block expansion factor of 2
4765
+ # TODO: does this really matter?
4766
+ assert d_inner == 2 * d_model
4767
+ assert d_inner % head_dim == 0
4768
+
4769
+ self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
4770
+ self .gguf_writer .add_embedding_length (d_model )
4771
+ self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
4772
+ self .gguf_writer .add_head_count (0 ) # unused, but seemingly required when loading
4773
+ self .gguf_writer .add_block_count (self .block_count )
4774
+ self .gguf_writer .add_ssm_conv_kernel (d_conv )
4775
+ self .gguf_writer .add_ssm_inner_size (d_inner )
4776
+ self .gguf_writer .add_ssm_state_size (d_state )
4777
+ self .gguf_writer .add_ssm_time_step_rank (d_inner // head_dim )
4778
+ self .gguf_writer .add_ssm_group_count (n_group )
4779
+ self .gguf_writer .add_layer_norm_rms_eps (rms_norm_eps )
4780
+ self .gguf_writer .add_file_type (self .ftype )
4781
+
4782
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
4783
+
4784
+ if name .startswith ("model.backbone" ) or name .startswith ("model.lm_head" ):
4785
+ # map Mamba-Codestral-7B-v0.1 tensor names to the names used by Mamba-2
4786
+ name = name .removeprefix ("model." )
4787
+
4788
+ if name .endswith (".dt_bias" ):
4789
+ name = name .rpartition (".dt_bias" )[0 ] + ".dt_proj.bias"
4790
+
4791
+ new_name = self .map_tensor_name (name )
4792
+
4793
+ if self .match_model_tensor_name (new_name , gguf .MODEL_TENSOR .SSM_CONV1D , bid ):
4794
+ data_torch = data_torch .squeeze ()
4795
+ elif any (self .match_model_tensor_name (new_name , t , bid , suffix = "" ) for t in [
4796
+ gguf .MODEL_TENSOR .SSM_A ,
4797
+ gguf .MODEL_TENSOR .SSM_D ,
4798
+ ]):
4799
+ # unsqueeze A to use similar shape semantics as Mamba-1
4800
+ # (D is also unsqueezed, but for more straightforward broadcast internally)
4801
+ data_torch = data_torch .reshape ((* data_torch .shape , 1 ))
4802
+ elif self .match_model_tensor_name (new_name , gguf .MODEL_TENSOR .SSM_NORM , bid ):
4803
+ d_model = self .find_hparam (["hidden_size" , "d_model" , "dim" ])
4804
+ d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * d_model
4805
+ n_group = self .hparams .get ("n_groups" , 1 )
4806
+ data_torch = data_torch .reshape ((n_group , d_inner // n_group ))
4807
+
4808
+ if name .endswith (".A_log" ):
4809
+ logger .debug ("A_log --> A ==> " + new_name )
4810
+ data_torch = - torch .exp (data_torch )
4811
+
4812
+ yield (new_name , data_torch )
4813
+
4814
+
4713
4815
@ModelBase .register ("CohereForCausalLM" )
4714
4816
class CommandR2Model (TextModel ):
4715
4817
model_arch = gguf .MODEL_ARCH .COMMAND_R
@@ -6477,12 +6579,20 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
6477
6579
# maybe we should fallback to text model's arch in that case, since not many models have both
6478
6580
text_config = hparams .get ("text_config" , {})
6479
6581
vision_config = hparams .get ("vision_config" , {})
6480
- arch = hparams ["architectures" ][0 ]
6582
+ arch = None
6583
+ if (arches := hparams .get ("architectures" )) is not None and len (arches ) > 0 :
6584
+ arch = arches [0 ]
6585
+ elif "ssm_cfg" in hparams :
6586
+ # For non-hf Mamba and Mamba2 models
6587
+ arch = hparams ["ssm_cfg" ].get ("layer" , "Mamba" ) + "ForCausalLM"
6588
+
6481
6589
# if "architectures" is found in the sub-config, use that instead
6482
6590
if model_type == ModelType .TEXT and text_config .get ("architectures" ) is not None :
6483
6591
arch = text_config ["architectures" ][0 ]
6484
6592
elif model_type == ModelType .MMPROJ and vision_config .get ("architectures" ) is not None :
6485
6593
arch = vision_config ["architectures" ][0 ]
6594
+ if arch is None :
6595
+ raise ValueError ("Failed to detect model architecture" )
6486
6596
return arch
6487
6597
6488
6598
0 commit comments