Skip to content

Add OPT model support - Add OPT architecture support in C++ code - Im… #13799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 30 additions & 4 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,12 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec":
# ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
res = "seed-coder"
if chkhsh == "7f2212c1b7fec62b4b75447509a4ecc8acd82813ce90d715dd99c1460a52d978":
# ref: https://huggingface.co/facebook/opt-13b
res = "gpt-2"
if chkhsh == "2c934e5e1c8275b75011b9942836389a87eaa1a63116104e52424515e7649c46":
# ref: https://huggingface.co/SousChef/OPT-13B-Erebus (OPT-13B-Erebus model)
res = "gpt-2"

if res is None:
logger.warning("\n")
Expand Down Expand Up @@ -3902,7 +3908,7 @@ def set_vocab(self):
def set_gguf_parameters(self):
hparams = self.hparams
block_count = hparams["num_hidden_layers"]

self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
self.gguf_writer.add_block_count(block_count)
Expand Down Expand Up @@ -4014,7 +4020,7 @@ def set_gguf_parameters(self):

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused

if name.startswith("language_model."):
name = name.replace("language_model.", "")

Expand Down Expand Up @@ -4520,7 +4526,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
if name.endswith("k_proj.weight"):
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)

return [(self.map_tensor_name(name), data_torch)]


Expand Down Expand Up @@ -5231,7 +5237,7 @@ def set_gguf_parameters(self):

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused

# T5 based models contain shared token embeddings tensors saved randomly as either "encoder.embed_tokens.weight",
# "decoder.embed_tokens.weight" or "shared.weight" tensor. In some models there are even multiple of them stored
# in the safetensors files. We use the first tensor from these three as the token embeddings for both encoder
Expand Down Expand Up @@ -6124,6 +6130,26 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
return cls._wrap_fn(func)(*args, **kwargs)


@ModelBase.register("OPTForCausalLM")
class OPTModel(TextModel):
model_arch = gguf.MODEL_ARCH.OPT

def set_vocab(self):
# OPT typically uses GPT2 tokenizer
self._set_vocab_gpt2()

def set_gguf_parameters(self):
super().set_gguf_parameters()
hparams = self.hparams

# OPT-specific parameters that are not handled by the base class
self.gguf_writer.add_vocab_size(hparams["vocab_size"])

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# OPT model uses standard tensor mapping - let the mapping handle the conversion
return [(self.map_tensor_name(name), data_torch)]


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Convert a huggingface model to a GGML compatible file")
Expand Down
16 changes: 16 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ class MODEL_ARCH(IntEnum):
WAVTOKENIZER_DEC = auto()
PLM = auto()
BAILINGMOE = auto()
OPT = auto()


class VISION_PROJECTOR_TYPE(IntEnum):
Expand Down Expand Up @@ -620,6 +621,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
MODEL_ARCH.PLM: "plm",
MODEL_ARCH.BAILINGMOE: "bailingmoe",
MODEL_ARCH.OPT: "opt",
}

VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
Expand Down Expand Up @@ -2040,6 +2042,20 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.FFN_DOWN_SHEXP,
MODEL_TENSOR.FFN_UP_SHEXP,
],
MODEL_ARCH.OPT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.POS_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
# TODO
}

Expand Down
13 changes: 12 additions & 1 deletion gguf-py/gguf/tensor_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class TensorNameMap:
"model.embeddings", # rwkv7
"model.word_embeddings", # bailingmoe
"language_model.model.embed_tokens", # llama4
"model.decoder.embed_tokens", # opt
),

# Token type embeddings
Expand All @@ -56,6 +57,7 @@ class TensorNameMap:
"transformer.wpe", # gpt2
"embeddings.position_embeddings", # bert
"wpe", # gpt2
"model.decoder.embed_positions", # opt
),

# Output
Expand All @@ -68,7 +70,7 @@ class TensorNameMap:
"output_layer", # chatglm
"head", # rwkv
"head.out", # wavtokenizer
"lm_head", # llama4
"lm_head", # llama4 opt
),

# Output norm
Expand All @@ -92,6 +94,7 @@ class TensorNameMap:
"model.ln_out", # rwkv7
"backbone.final_layer_norm", # wavtokenizer
"model.norm", # llama4
"model.decoder.final_layer_norm", # opt
),

# Rope frequencies
Expand Down Expand Up @@ -134,6 +137,7 @@ class TensorNameMap:
"rwkv.blocks.{bid}.ln1", # rwkv6
"model.layers.{bid}.ln1", # rwkv7
"model.layers.{bid}.input_layernorm", # llama4
"model.decoder.layers.{bid}.self_attn_layer_norm", # opt
),

# Attention norm 2
Expand Down Expand Up @@ -174,6 +178,7 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
"transformer.h.{bid}.attn.attention.q_proj", # exaone
"model.layers.{bid}.self_attn.q_proj", # llama4
"model.decoder.layers.{bid}.self_attn.q_proj", # opt
),

# Attention key
Expand All @@ -189,6 +194,7 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
"transformer.h.{bid}.attn.attention.k_proj", # exaone
"model.layers.{bid}.self_attn.k_proj", # llama4
"model.decoder.layers.{bid}.self_attn.k_proj", # opt
),

# Attention value
Expand All @@ -203,6 +209,7 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
"transformer.h.{bid}.attn.attention.v_proj", # exaone
"model.layers.{bid}.self_attn.v_proj", # llama4
"model.decoder.layers.{bid}.self_attn.v_proj", # opt
),

# Attention output
Expand Down Expand Up @@ -230,6 +237,7 @@ class TensorNameMap:
"transformer.layers.{bid}.attn.out_proj", # openelm
"transformer.h.{bid}.attn.attention.out_proj", # exaone
"model.layers.{bid}.self_attn.o_proj", # llama4
"model.decoder.layers.{bid}.self_attn.out_proj", # opt
),

# Attention output norm
Expand Down Expand Up @@ -269,6 +277,7 @@ class TensorNameMap:
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
"transformer.layers.{bid}.ffn_norm", # openelm
"model.layers.{bid}.post_attention_layernorm", # llama4
"model.decoder.layers.{bid}.final_layer_norm", # opt
),

# Post feed-forward norm
Expand Down Expand Up @@ -330,6 +339,7 @@ class TensorNameMap:
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
"transformer.h.{bid}.mlp.c_fc_1", # exaone
"model.layers.{bid}.feed_forward.up_proj", # llama4
"model.decoder.layers.{bid}.fc1", # opt
),

MODEL_TENSOR.FFN_UP_EXP: (
Expand Down Expand Up @@ -411,6 +421,7 @@ class TensorNameMap:
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
"model.layers.h.{bid}.mlp.c_proj", # exaone
"model.layers.{bid}.feed_forward.down_proj", # llama4
"model.decoder.layers.{bid}.fc2", # opt
),

MODEL_TENSOR.FFN_DOWN_EXP: (
Expand Down
18 changes: 18 additions & 0 deletions src/llama-arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
{ LLM_ARCH_PLM, "plm" },
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
{ LLM_ARCH_OPT, "opt" },
{ LLM_ARCH_UNKNOWN, "(unknown)" },
};

Expand Down Expand Up @@ -1530,6 +1531,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_POS_NET_ATTN_OUT, "posnet.%d.attn_output" },
},
},
{
LLM_ARCH_OPT,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_POS_EMBD, "position_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
},
},
{
LLM_ARCH_BAILINGMOE,
{
Expand Down
1 change: 1 addition & 0 deletions src/llama-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ enum llm_arch {
LLM_ARCH_WAVTOKENIZER_DEC,
LLM_ARCH_PLM,
LLM_ARCH_BAILINGMOE,
LLM_ARCH_OPT,
LLM_ARCH_UNKNOWN,
};

Expand Down
Loading