@@ -4586,6 +4586,9 @@ def __init__(self, dir_model: Path, *args, **kwargs):
4586
4586
with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
4587
4587
hparams = json .load (f )
4588
4588
super ().__init__ (dir_model , * args , hparams = hparams , ** kwargs )
4589
+ self .d_model = self .find_hparam (["hidden_size" , "d_model" , "dim" ])
4590
+ self .d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * self .d_model
4591
+ self .n_group = self .hparams .get ("n_groups" , 1 )
4589
4592
4590
4593
def set_vocab (self ):
4591
4594
vocab_size = self .hparams ["vocab_size" ]
@@ -4656,10 +4659,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
4656
4659
# (D is also unsqueezed, but for more straightforward broadcast internally)
4657
4660
data_torch = data_torch .reshape ((* data_torch .shape , 1 ))
4658
4661
elif self .match_model_tensor_name (new_name , gguf .MODEL_TENSOR .SSM_NORM , bid ):
4659
- d_model = self .find_hparam (["hidden_size" , "d_model" , "dim" ])
4660
- d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * d_model
4661
- n_group = self .hparams .get ("n_groups" , 1 )
4662
- data_torch = data_torch .reshape ((n_group , d_inner // n_group ))
4662
+ data_torch = data_torch .reshape ((self .n_group , self .d_inner // self .n_group ))
4663
4663
4664
4664
if name .endswith (".A_log" ):
4665
4665
logger .debug ("A_log --> A ==> " + new_name )
@@ -4668,6 +4668,107 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
4668
4668
yield (new_name , data_torch )
4669
4669
4670
4670
4671
+ @ModelBase .register ("BambaForCausalLM" )
4672
+ class BambaModel (Mamba2Model ):
4673
+ """Bamba is a hybrid SSM + Attention model that uses Mamba2 SSM layers"""
4674
+ model_arch = gguf .MODEL_ARCH .BAMBA
4675
+ undo_permute = True
4676
+
4677
+ def __init__ (self , * args , ** kwargs ):
4678
+
4679
+ # Hybrid mamba models use a prefix for the mamba-specific params.
4680
+ # TODO: Extend this if the prefix(es) need to be configurable
4681
+ self .hparam_prefixes = ["mamba" ]
4682
+
4683
+ super ().__init__ (* args , ** kwargs )
4684
+
4685
+ # Use Llama conversion for attention
4686
+ self ._transformer_model_class : type [TextModel ] = LlamaModel
4687
+
4688
+ # Lists of which layers use ssm vs attention
4689
+ self ._attn_layers = self .hparams .get ("attn_layer_indices" , [])
4690
+ if not self ._attn_layers :
4691
+ attn_period = self .hparams .get ("attn_layer_period" )
4692
+ assert attn_period , "Didn't find attn_layer_indices or attn_layer_period"
4693
+ attn_offset = self .hparams .get ("attn_layer_offset" )
4694
+ assert attn_offset is not None , "No attention layer offset set with attn_layer_period"
4695
+ self ._attn_layers = [
4696
+ i for i in range (self .block_count )
4697
+ if i % attn_period == attn_offset
4698
+ ]
4699
+ self ._ssm_layers = [
4700
+ i for i in range (self .block_count )
4701
+ if i not in self ._attn_layers
4702
+ ]
4703
+
4704
+ # n_group and d_inner are used during reshape_tensors for mamaba2
4705
+ self .d_model = self .find_hparam (["hidden_size" , "d_model" ])
4706
+ self .n_group = self .find_hparam (["n_groups" ])
4707
+ self .d_inner = self .find_hparam (["expand" ]) * self .d_model
4708
+
4709
+ def find_hparam (self , keys : Iterable [str ], * args , ** kwargs ) -> Any :
4710
+ prefixed = []
4711
+ for pfx in self .hparam_prefixes :
4712
+ prefixed .extend (
4713
+ "_" .join ([pfx , k ])
4714
+ for k in keys
4715
+ )
4716
+ keys = list (keys ) + prefixed
4717
+ return super ().find_hparam (keys , * args , ** kwargs )
4718
+
4719
+ def set_gguf_parameters (self ):
4720
+
4721
+ ## General Params ##
4722
+ self .gguf_writer .add_embedding_length (self .d_model )
4723
+ self .gguf_writer .add_block_count (self .block_count )
4724
+ self .gguf_writer .add_context_length (self .hparams .get ("max_position_embeddings" , 0 ))
4725
+ self .gguf_writer .add_vocab_size (self .hparams ["vocab_size" ])
4726
+ self .gguf_writer .add_feed_forward_length (self .hparams ["intermediate_size" ])
4727
+
4728
+ ## Mamba mixer params ##
4729
+ self .gguf_writer .add_ssm_conv_kernel (self .find_hparam (["conv_kernel" , "d_conv" ]))
4730
+ self .gguf_writer .add_ssm_state_size (self .find_hparam (["state_size" , "d_state" ]))
4731
+ self .gguf_writer .add_ssm_group_count (self .n_group )
4732
+ self .gguf_writer .add_ssm_inner_size (self .d_inner )
4733
+ # NOTE: The mamba_dt_rank is _not_ the right field for how this is used
4734
+ # in llama.cpp
4735
+ self .gguf_writer .add_ssm_time_step_rank (self .find_hparam (["n_heads" ]))
4736
+
4737
+ ## Attention params ##
4738
+ self .gguf_writer .add_attn_layer_indices (self ._attn_layers )
4739
+ self .gguf_writer .add_rope_dimension_count (self .hparams ["attn_rotary_emb" ])
4740
+ self .gguf_writer .add_head_count (self .hparams ["num_attention_heads" ])
4741
+ self .gguf_writer .add_head_count_kv (self .find_hparam (["num_key_value_heads" , "n_head_kv" ]))
4742
+
4743
+ ## Feed Forward Params ##
4744
+ self .gguf_writer .add_layer_norm_rms_eps (
4745
+ self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True ) or 1e-5
4746
+ )
4747
+
4748
+ ## Validation ##
4749
+ d_head = self .find_hparam (["d_head" ], optional = True ) or 64
4750
+ assert self .hparams .get ("hidden_act" ) in [None , "silu" ], "Only SILU activation supported"
4751
+ assert self .d_inner % d_head == 0 , f"SSM inner size { self .d_inner } not a multiple of head dim { d_head } "
4752
+
4753
+ def modify_tensors (
4754
+ self , data_torch : Tensor , name : str , bid : int | None
4755
+ ) -> Iterable [tuple [str , Tensor ]]:
4756
+
4757
+ # Determine whether this is a mamaba layer or an attention layer
4758
+ if bid in self ._ssm_layers :
4759
+ for mamba_new_name , data_torch in super ().modify_tensors (
4760
+ data_torch , name , bid
4761
+ ):
4762
+ yield mamba_new_name , data_torch
4763
+ elif bid in self ._attn_layers :
4764
+ for llama_new_name , data_torch in self ._transformer_model_class .modify_tensors (
4765
+ self , data_torch , name , bid
4766
+ ):
4767
+ yield llama_new_name , data_torch
4768
+ else :
4769
+ yield self .map_tensor_name (name ), data_torch
4770
+
4771
+
4671
4772
@ModelBase .register ("CohereForCausalLM" )
4672
4773
class CommandR2Model (TextModel ):
4673
4774
model_arch = gguf .MODEL_ARCH .COMMAND_R
0 commit comments