Skip to content

Commit 8d0f632

Browse files
committed
Add EXAONE 4.0 model architecture
1 parent 576c82e commit 8d0f632

12 files changed

+369
-25
lines changed

convert_hf_to_gguf.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
761761
if chkhsh == "4e2b24cc4770243d65a2c9ec19770a72f08cffc161adbb73fcbb6b7dd45a0aae":
762762
# ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct
763763
res = "exaone"
764+
if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb":
765+
# ref: temporary model
766+
res = "exaone4"
764767
if chkhsh == "fcace8b9cac38ce847670c970cd5892031a753a1ef381abd1d9af00f713da085":
765768
# ref: https://huggingface.co/microsoft/phi-2
766769
res = "phi-2"
@@ -6388,6 +6391,77 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
63886391
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
63896392

63906393

6394+
@ModelBase.register("Exaone4ForCausalLM")
6395+
class Exaone4Model(TextModel):
6396+
model_arch = gguf.MODEL_ARCH.EXAONE4
6397+
6398+
def set_vocab(self):
6399+
tokens, toktypes, tokpre = self.get_vocab_base()
6400+
self.gguf_writer.add_tokenizer_model("gpt2")
6401+
self.gguf_writer.add_tokenizer_pre(tokpre)
6402+
self.gguf_writer.add_token_list(tokens)
6403+
self.gguf_writer.add_token_types(toktypes)
6404+
6405+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
6406+
special_vocab.chat_template = "exaone4"
6407+
special_vocab.add_to_gguf(self.gguf_writer)
6408+
6409+
def set_gguf_parameters(self):
6410+
super().set_gguf_parameters()
6411+
hparams = self.hparams
6412+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
6413+
6414+
self.gguf_writer.add_context_length(hparams.get("max_position_embeddings", 131072))
6415+
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
6416+
self.gguf_writer.add_feed_forward_length(hparams.get("intermediate_size", 4 * hparams["hidden_size"]))
6417+
self.gguf_writer.add_block_count(hparams["num_hidden_layers"])
6418+
self.gguf_writer.add_head_count(hparams["num_attention_heads"])
6419+
self.gguf_writer.add_head_count_kv(hparams.get("num_key_value_heads", hparams["num_attention_heads"]))
6420+
self.gguf_writer.add_layer_norm_rms_eps(hparams.get("layer_norm_epsilon", 1e-5))
6421+
self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 1_000_000.0))
6422+
self.gguf_writer.add_key_length(hparams["head_dim"])
6423+
self.gguf_writer.add_value_length(hparams["head_dim"])
6424+
self.gguf_writer.add_file_type(self.ftype)
6425+
6426+
if hparams.get("sliding_window") is not None:
6427+
self.gguf_writer.add_sliding_window(hparams["sliding_window"])
6428+
# sliding window pattern 어떻게?
6429+
6430+
rope_scaling = self.hparams.get("rope_scaling") or {}
6431+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
6432+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
6433+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
6434+
6435+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
6436+
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
6437+
if rope_scaling.get("rope_type", '').lower() == "llama3":
6438+
base = self.hparams.get("rope_theta", 1_000_000.0)
6439+
if (dim := self.hparams.get("head_dim")) is None:
6440+
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
6441+
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
6442+
6443+
factor = rope_scaling.get("factor", 16.0)
6444+
low_freq_factor = rope_scaling.get("low_freq_factor", 1.0)
6445+
high_freq_factor = rope_scaling.get("high_freq_factor", 4.0)
6446+
old_context_len = self.hparams.get("original_max_position_embeddings", 8192)
6447+
6448+
low_freq_wavelen = old_context_len / low_freq_factor
6449+
high_freq_wavelen = old_context_len / high_freq_factor
6450+
6451+
rope_factors = []
6452+
for freq in freqs:
6453+
wavelen = 2 * math.pi / freq
6454+
if wavelen < high_freq_wavelen:
6455+
rope_factors.append(1)
6456+
elif wavelen > low_freq_wavelen:
6457+
rope_factors.append(factor)
6458+
else:
6459+
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
6460+
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
6461+
6462+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
6463+
6464+
63916465
@ModelBase.register("GraniteForCausalLM")
63926466
class GraniteModel(LlamaModel):
63936467
"""Conversion for IBM's GraniteForCausalLM"""

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class TOKENIZER_TYPE(IntEnum):
114114
{'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", },
115115
{'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", },
116116
{"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
117+
{"name": "exaone4", "tokt": TOKENIZER_TYPE.BPE, "repo": "temporary model", },
117118
{"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
118119
{"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
119120
{"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},

gguf-py/gguf/constants.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ class MODEL_ARCH(IntEnum):
350350
JAIS = auto()
351351
NEMOTRON = auto()
352352
EXAONE = auto()
353+
EXAONE4 = auto()
353354
GRANITE = auto()
354355
GRANITE_MOE = auto()
355356
GRANITE_HYBRID = auto()
@@ -660,6 +661,7 @@ class MODEL_TENSOR(IntEnum):
660661
MODEL_ARCH.JAIS: "jais",
661662
MODEL_ARCH.NEMOTRON: "nemotron",
662663
MODEL_ARCH.EXAONE: "exaone",
664+
MODEL_ARCH.EXAONE4: "exaone4",
663665
MODEL_ARCH.GRANITE: "granite",
664666
MODEL_ARCH.GRANITE_MOE: "granitemoe",
665667
MODEL_ARCH.GRANITE_HYBRID: "granitehybrid",
@@ -2113,6 +2115,23 @@ class MODEL_TENSOR(IntEnum):
21132115
MODEL_TENSOR.FFN_DOWN,
21142116
MODEL_TENSOR.FFN_UP,
21152117
],
2118+
MODEL_ARCH.EXAONE4: [
2119+
MODEL_TENSOR.TOKEN_EMBD,
2120+
MODEL_TENSOR.OUTPUT_NORM,
2121+
MODEL_TENSOR.OUTPUT,
2122+
MODEL_TENSOR.ROPE_FREQS,
2123+
MODEL_TENSOR.ATTN_Q,
2124+
MODEL_TENSOR.ATTN_Q_NORM,
2125+
MODEL_TENSOR.ATTN_K,
2126+
MODEL_TENSOR.ATTN_K_NORM,
2127+
MODEL_TENSOR.ATTN_V,
2128+
MODEL_TENSOR.ATTN_OUT,
2129+
MODEL_TENSOR.ATTN_POST_NORM,
2130+
MODEL_TENSOR.FFN_GATE,
2131+
MODEL_TENSOR.FFN_DOWN,
2132+
MODEL_TENSOR.FFN_UP,
2133+
MODEL_TENSOR.FFN_POST_NORM,
2134+
],
21162135
MODEL_ARCH.GRANITE: [
21172136
MODEL_TENSOR.TOKEN_EMBD,
21182137
MODEL_TENSOR.OUTPUT_NORM,

gguf-py/gguf/tensor_mapping.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class TensorNameMap:
1313
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
1414
"transformer.word_embeddings", # falcon
1515
"word_embeddings", # bloom
16-
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid
16+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid exaone4
1717
"tok_embeddings", # llama-pth
1818
"embeddings.word_embeddings", # bert nomic-bert
1919
"language_model.embedding.word_embeddings", # persimmon
@@ -62,7 +62,7 @@ class TensorNameMap:
6262
# Output
6363
MODEL_TENSOR.OUTPUT: (
6464
"embed_out", # gptneox
65-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
65+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone exaone4 olmoe olmo2 phimoe
6666
"output", # llama-pth bloom internlm2
6767
"word_embeddings_for_head", # persimmon
6868
"lm_head.linear", # phi2
@@ -76,7 +76,7 @@ class TensorNameMap:
7676
MODEL_TENSOR.OUTPUT_NORM: (
7777
"gpt_neox.final_layer_norm", # gptneox
7878
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
79-
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
79+
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe exaone4
8080
"norm", # llama-pth
8181
"transformer.norm_f", # mpt dbrx
8282
"ln_f", # refact bloom qwen gpt2
@@ -168,7 +168,7 @@ class TensorNameMap:
168168

169169
# Attention query
170170
MODEL_TENSOR.ATTN_Q: (
171-
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe
171+
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe exaone4
172172
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
173173
"layers.{bid}.attention.wq", # llama-pth
174174
"encoder.layer.{bid}.attention.self.query", # bert
@@ -183,7 +183,7 @@ class TensorNameMap:
183183

184184
# Attention key
185185
MODEL_TENSOR.ATTN_K: (
186-
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe
186+
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe exaone4
187187
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
188188
"layers.{bid}.attention.wk", # llama-pth
189189
"encoder.layer.{bid}.attention.self.key", # bert
@@ -199,7 +199,7 @@ class TensorNameMap:
199199

200200
# Attention value
201201
MODEL_TENSOR.ATTN_V: (
202-
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
202+
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe exaone4
203203
"layers.{bid}.attention.wv", # llama-pth
204204
"encoder.layer.{bid}.attention.self.value", # bert
205205
"transformer.layer.{bid}.attention.v_lin", # distillbert
@@ -219,7 +219,7 @@ class TensorNameMap:
219219
"transformer.blocks.{bid}.attn.out_proj", # mpt
220220
"transformer.h.{bid}.self_attention.dense", # falcon
221221
"h.{bid}.self_attention.dense", # bloom
222-
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
222+
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe exaone4
223223
"model.layers.{bid}.self_attn.linear_attn", # deci
224224
"layers.{bid}.attention.wo", # llama-pth
225225
"encoder.layer.{bid}.attention.output.dense", # bert
@@ -252,7 +252,7 @@ class TensorNameMap:
252252
),
253253

254254
MODEL_TENSOR.ATTN_POST_NORM: (
255-
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
255+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 exaone4 # ge
256256
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
257257
),
258258

@@ -293,7 +293,7 @@ class TensorNameMap:
293293

294294
# Post feed-forward norm
295295
MODEL_TENSOR.FFN_POST_NORM: (
296-
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
296+
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2 exaone4
297297
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
298298
"model.layers.{bid}.feed_forward.up_proj",
299299
),
@@ -325,7 +325,7 @@ class TensorNameMap:
325325
"transformer.blocks.{bid}.ffn.up_proj", # mpt
326326
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
327327
"h.{bid}.mlp.dense_h_to_4h", # bloom
328-
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
328+
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2 exaone4
329329
"layers.{bid}.feed_forward.w3", # llama-pth
330330
"encoder.layer.{bid}.intermediate.dense", # bert
331331
"transformer.layer.{bid}.ffn.lin1", # distillbert
@@ -378,7 +378,7 @@ class TensorNameMap:
378378

379379
# Feed-forward gate
380380
MODEL_TENSOR.FFN_GATE: (
381-
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
381+
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2 exaone4
382382
"layers.{bid}.feed_forward.w1", # llama-pth
383383
"transformer.h.{bid}.mlp.w2", # qwen
384384
"transformer.h.{bid}.mlp.c_fc2", # jais
@@ -415,7 +415,7 @@ class TensorNameMap:
415415
"transformer.blocks.{bid}.ffn.down_proj", # mpt
416416
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
417417
"h.{bid}.mlp.dense_4h_to_h", # bloom
418-
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
418+
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2 exaone4
419419
"layers.{bid}.feed_forward.w2", # llama-pth
420420
"encoder.layer.{bid}.output.dense", # bert
421421
"transformer.layer.{bid}.ffn.lin2", # distillbert
@@ -462,7 +462,7 @@ class TensorNameMap:
462462
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
463463
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
464464
"model.layers.{bid}.self_attn.query_layernorm", # hunyuan
465-
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
465+
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2 exaone4
466466
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
467467
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
468468
"transformer.layers.{bid}.attn.q_norm", # openelm
@@ -472,7 +472,7 @@ class TensorNameMap:
472472
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
473473
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
474474
"model.layers.{bid}.self_attn.key_layernorm", # hunyuan
475-
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
475+
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2 exaone4
476476
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
477477
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
478478
"transformer.layers.{bid}.attn.k_norm", # openelm

include/llama.h

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,18 @@ extern "C" {
107107
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
108108
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
109109
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
110-
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
111-
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
112-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
113-
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
114-
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
115-
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
116-
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
117-
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
118-
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
119-
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
120-
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36,
110+
LLAMA_VOCAB_PRE_TYPE_EXAONE4 = 26,
111+
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 27,
112+
LLAMA_VOCAB_PRE_TYPE_MINERVA = 28,
113+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 29,
114+
LLAMA_VOCAB_PRE_TYPE_GPT4O = 30,
115+
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 31,
116+
LLAMA_VOCAB_PRE_TYPE_TRILLION = 32,
117+
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 33,
118+
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 34,
119+
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 35,
120+
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 36,
121+
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 37,
121122
};
122123

123124
enum llama_rope_type {

src/llama-arch.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
6767
{ LLM_ARCH_JAIS, "jais" },
6868
{ LLM_ARCH_NEMOTRON, "nemotron" },
6969
{ LLM_ARCH_EXAONE, "exaone" },
70+
{ LLM_ARCH_EXAONE4, "exaone4" },
7071
{ LLM_ARCH_RWKV6, "rwkv6" },
7172
{ LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
7273
{ LLM_ARCH_RWKV7, "rwkv7" },
@@ -1474,6 +1475,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
14741475
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
14751476
},
14761477
},
1478+
{
1479+
LLM_ARCH_EXAONE4,
1480+
{
1481+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1482+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1483+
{ LLM_TENSOR_OUTPUT, "output" },
1484+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1485+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1486+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1487+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1488+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1489+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1490+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1491+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1492+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1493+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1494+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1495+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1496+
}
1497+
},
14771498
{
14781499
LLM_ARCH_RWKV6,
14791500
{

src/llama-arch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ enum llm_arch {
7171
LLM_ARCH_JAIS,
7272
LLM_ARCH_NEMOTRON,
7373
LLM_ARCH_EXAONE,
74+
LLM_ARCH_EXAONE4,
7475
LLM_ARCH_RWKV6,
7576
LLM_ARCH_RWKV6QWEN2,
7677
LLM_ARCH_RWKV7,

0 commit comments

Comments
 (0)