File tree Expand file tree Collapse file tree 3 files changed +2
-5
lines changed Expand file tree Collapse file tree 3 files changed +2
-5
lines changed Original file line number Diff line number Diff line change @@ -218,7 +218,7 @@ def from_model_architecture(model_architecture):
218
218
return BertModel
219
219
if model_architecture == "NomicBertModel" :
220
220
return NomicBertModel
221
- if model_architecture == "MambaForCausalLM" :
221
+ if model_architecture in ( "MambaForCausalLM" , "MambaLMHeadModel" ) :
222
222
return MambaModel
223
223
return Model
224
224
@@ -279,7 +279,7 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
279
279
return gguf .MODEL_ARCH .BERT
280
280
if arch == "NomicBertModel" :
281
281
return gguf .MODEL_ARCH .NOMIC_BERT
282
- if arch == "MambaForCausalLM" :
282
+ if arch in ( "MambaForCausalLM" , "MambaLMHeadModel" ) :
283
283
return gguf .MODEL_ARCH .MAMBA
284
284
285
285
raise NotImplementedError (f'Architecture "{ arch } " not supported!' )
Original file line number Diff line number Diff line change @@ -205,7 +205,6 @@ class MODEL_TENSOR(IntEnum):
205
205
MODEL_TENSOR .FFN_DOWN_EXP : "blk.{bid}.ffn_down.{xid}" ,
206
206
MODEL_TENSOR .FFN_UP_EXP : "blk.{bid}.ffn_up.{xid}" ,
207
207
MODEL_TENSOR .LAYER_OUT_NORM : "blk.{bid}.layer_output_norm" ,
208
- # FIXME: NAMES FOR MAMBA ARE NOT FINAL
209
208
MODEL_TENSOR .SSM_IN : "blk.{bid}.ssm_in" ,
210
209
MODEL_TENSOR .SSM_CONV1D : "blk.{bid}.ssm_conv1d" ,
211
210
MODEL_TENSOR .SSM_X : "blk.{bid}.ssm_x" ,
Original file line number Diff line number Diff line change @@ -393,8 +393,6 @@ enum llm_tensor {
393
393
LLM_TENSOR_ATTN_Q_NORM,
394
394
LLM_TENSOR_ATTN_K_NORM,
395
395
LLM_TENSOR_LAYER_OUT_NORM,
396
- // TODO: maybe use longer names?
397
- // TODO: can the in_proj and/or the out_proj instead re-use some of the above types?
398
396
LLM_TENSOR_SSM_IN,
399
397
LLM_TENSOR_SSM_CONV1D,
400
398
LLM_TENSOR_SSM_X,
You can’t perform that action at this time.
0 commit comments