Skip to content

Commit 5a8961a

Browse files
committed
convert : for Mamba, also consider the "MambaLMHeadModel" arch name
It's the name of the class of the official implementation, though they don't use it (yet) in the "architectures" field of config.json
1 parent 494d075 commit 5a8961a

File tree

3 files changed

+2
-5
lines changed

3 files changed

+2
-5
lines changed

convert-hf-to-gguf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def from_model_architecture(model_architecture):
218218
return BertModel
219219
if model_architecture == "NomicBertModel":
220220
return NomicBertModel
221-
if model_architecture == "MambaForCausalLM":
221+
if model_architecture in ("MambaForCausalLM", "MambaLMHeadModel"):
222222
return MambaModel
223223
return Model
224224

@@ -279,7 +279,7 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
279279
return gguf.MODEL_ARCH.BERT
280280
if arch == "NomicBertModel":
281281
return gguf.MODEL_ARCH.NOMIC_BERT
282-
if arch == "MambaForCausalLM":
282+
if arch in ("MambaForCausalLM", "MambaLMHeadModel"):
283283
return gguf.MODEL_ARCH.MAMBA
284284

285285
raise NotImplementedError(f'Architecture "{arch}" not supported!')

gguf-py/gguf/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ class MODEL_TENSOR(IntEnum):
205205
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}",
206206
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}",
207207
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
208-
# FIXME: NAMES FOR MAMBA ARE NOT FINAL
209208
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
210209
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
211210
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",

llama.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,6 @@ enum llm_tensor {
393393
LLM_TENSOR_ATTN_Q_NORM,
394394
LLM_TENSOR_ATTN_K_NORM,
395395
LLM_TENSOR_LAYER_OUT_NORM,
396-
// TODO: maybe use longer names?
397-
// TODO: can the in_proj and/or the out_proj instead re-use some of the above types?
398396
LLM_TENSOR_SSM_IN,
399397
LLM_TENSOR_SSM_CONV1D,
400398
LLM_TENSOR_SSM_X,

0 commit comments

Comments
 (0)