Skip to content

Commit cd722ae

Browse files
committed
Add EXAONE 4.0 model architecture
1 parent 6efcd65 commit cd722ae

12 files changed

+369
-25
lines changed

convert_hf_to_gguf.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
761761
if chkhsh == "4e2b24cc4770243d65a2c9ec19770a72f08cffc161adbb73fcbb6b7dd45a0aae":
762762
# ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct
763763
res = "exaone"
764+
if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb":
765+
# ref: temporary model
766+
res = "exaone4"
764767
if chkhsh == "fcace8b9cac38ce847670c970cd5892031a753a1ef381abd1d9af00f713da085":
765768
# ref: https://huggingface.co/microsoft/phi-2
766769
res = "phi-2"
@@ -6254,6 +6257,77 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
62546257
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
62556258

62566259

6260+
@ModelBase.register("Exaone4ForCausalLM")
6261+
class Exaone4Model(TextModel):
6262+
model_arch = gguf.MODEL_ARCH.EXAONE4
6263+
6264+
def set_vocab(self):
6265+
tokens, toktypes, tokpre = self.get_vocab_base()
6266+
self.gguf_writer.add_tokenizer_model("gpt2")
6267+
self.gguf_writer.add_tokenizer_pre(tokpre)
6268+
self.gguf_writer.add_token_list(tokens)
6269+
self.gguf_writer.add_token_types(toktypes)
6270+
6271+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
6272+
special_vocab.chat_template = "exaone4"
6273+
special_vocab.add_to_gguf(self.gguf_writer)
6274+
6275+
def set_gguf_parameters(self):
6276+
super().set_gguf_parameters()
6277+
hparams = self.hparams
6278+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
6279+
6280+
self.gguf_writer.add_context_length(hparams.get("max_position_embeddings", 131072))
6281+
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
6282+
self.gguf_writer.add_feed_forward_length(hparams.get("intermediate_size", 4 * hparams["hidden_size"]))
6283+
self.gguf_writer.add_block_count(hparams["num_hidden_layers"])
6284+
self.gguf_writer.add_head_count(hparams["num_attention_heads"])
6285+
self.gguf_writer.add_head_count_kv(hparams.get("num_key_value_heads", hparams["num_attention_heads"]))
6286+
self.gguf_writer.add_layer_norm_rms_eps(hparams.get("layer_norm_epsilon", 1e-5))
6287+
self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 1_000_000.0))
6288+
self.gguf_writer.add_key_length(hparams["head_dim"])
6289+
self.gguf_writer.add_value_length(hparams["head_dim"])
6290+
self.gguf_writer.add_file_type(self.ftype)
6291+
6292+
if hparams.get("sliding_window") is not None:
6293+
self.gguf_writer.add_sliding_window(hparams["sliding_window"])
6294+
# sliding window pattern 어떻게?
6295+
6296+
rope_scaling = self.hparams.get("rope_scaling") or {}
6297+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "linear" and "factor" in rope_scaling:
6298+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
6299+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
6300+
6301+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
6302+
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
6303+
if rope_scaling.get("rope_type", '').lower() == "llama3":
6304+
base = self.hparams.get("rope_theta", 1_000_000.0)
6305+
if (dim := self.hparams.get("head_dim")) is None:
6306+
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
6307+
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
6308+
6309+
factor = rope_scaling.get("factor", 16.0)
6310+
low_freq_factor = rope_scaling.get("low_freq_factor", 1.0)
6311+
high_freq_factor = rope_scaling.get("high_freq_factor", 4.0)
6312+
old_context_len = self.hparams.get("original_max_position_embeddings", 8192)
6313+
6314+
low_freq_wavelen = old_context_len / low_freq_factor
6315+
high_freq_wavelen = old_context_len / high_freq_factor
6316+
6317+
rope_factors = []
6318+
for freq in freqs:
6319+
wavelen = 2 * math.pi / freq
6320+
if wavelen < high_freq_wavelen:
6321+
rope_factors.append(1)
6322+
elif wavelen > low_freq_wavelen:
6323+
rope_factors.append(factor)
6324+
else:
6325+
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
6326+
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
6327+
6328+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
6329+
6330+
62576331
@ModelBase.register("GraniteForCausalLM")
62586332
class GraniteModel(LlamaModel):
62596333
"""Conversion for IBM's GraniteForCausalLM"""

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class TOKENIZER_TYPE(IntEnum):
114114
{'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", },
115115
{'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", },
116116
{"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
117+
{"name": "exaone4", "tokt": TOKENIZER_TYPE.BPE, "repo": "temporary model", },
117118
{"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
118119
{"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
119120
{"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},

gguf-py/gguf/constants.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ class MODEL_ARCH(IntEnum):
348348
JAIS = auto()
349349
NEMOTRON = auto()
350350
EXAONE = auto()
351+
EXAONE4 = auto()
351352
GRANITE = auto()
352353
GRANITE_MOE = auto()
353354
CHAMELEON = auto()
@@ -653,6 +654,7 @@ class MODEL_TENSOR(IntEnum):
653654
MODEL_ARCH.JAIS: "jais",
654655
MODEL_ARCH.NEMOTRON: "nemotron",
655656
MODEL_ARCH.EXAONE: "exaone",
657+
MODEL_ARCH.EXAONE4: "exaone4",
656658
MODEL_ARCH.GRANITE: "granite",
657659
MODEL_ARCH.GRANITE_MOE: "granitemoe",
658660
MODEL_ARCH.CHAMELEON: "chameleon",
@@ -2073,6 +2075,23 @@ class MODEL_TENSOR(IntEnum):
20732075
MODEL_TENSOR.FFN_DOWN,
20742076
MODEL_TENSOR.FFN_UP,
20752077
],
2078+
MODEL_ARCH.EXAONE4: [
2079+
MODEL_TENSOR.TOKEN_EMBD,
2080+
MODEL_TENSOR.OUTPUT_NORM,
2081+
MODEL_TENSOR.OUTPUT,
2082+
MODEL_TENSOR.ROPE_FREQS,
2083+
MODEL_TENSOR.ATTN_Q,
2084+
MODEL_TENSOR.ATTN_Q_NORM,
2085+
MODEL_TENSOR.ATTN_K,
2086+
MODEL_TENSOR.ATTN_K_NORM,
2087+
MODEL_TENSOR.ATTN_V,
2088+
MODEL_TENSOR.ATTN_OUT,
2089+
MODEL_TENSOR.ATTN_POST_NORM,
2090+
MODEL_TENSOR.FFN_GATE,
2091+
MODEL_TENSOR.FFN_DOWN,
2092+
MODEL_TENSOR.FFN_UP,
2093+
MODEL_TENSOR.FFN_POST_NORM,
2094+
],
20762095
MODEL_ARCH.GRANITE: [
20772096
MODEL_TENSOR.TOKEN_EMBD,
20782097
MODEL_TENSOR.OUTPUT_NORM,

gguf-py/gguf/tensor_mapping.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class TensorNameMap:
1313
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
1414
"transformer.word_embeddings", # falcon
1515
"word_embeddings", # bloom
16-
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
16+
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 exaone4
1717
"tok_embeddings", # llama-pth
1818
"embeddings.word_embeddings", # bert nomic-bert
1919
"language_model.embedding.word_embeddings", # persimmon
@@ -62,7 +62,7 @@ class TensorNameMap:
6262
# Output
6363
MODEL_TENSOR.OUTPUT: (
6464
"embed_out", # gptneox
65-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
65+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone exaone4 olmoe olmo2 phimoe
6666
"output", # llama-pth bloom internlm2
6767
"word_embeddings_for_head", # persimmon
6868
"lm_head.linear", # phi2
@@ -76,7 +76,7 @@ class TensorNameMap:
7676
MODEL_TENSOR.OUTPUT_NORM: (
7777
"gpt_neox.final_layer_norm", # gptneox
7878
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
79-
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe
79+
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe exaone4
8080
"norm", # llama-pth
8181
"transformer.norm_f", # mpt dbrx
8282
"ln_f", # refact bloom qwen gpt2
@@ -168,7 +168,7 @@ class TensorNameMap:
168168

169169
# Attention query
170170
MODEL_TENSOR.ATTN_Q: (
171-
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe
171+
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2 phimoe exaone4
172172
"model.layers.{bid}.self_attn.q_proj_no_perm", # llama-custom
173173
"layers.{bid}.attention.wq", # llama-pth
174174
"encoder.layer.{bid}.attention.self.query", # bert
@@ -183,7 +183,7 @@ class TensorNameMap:
183183

184184
# Attention key
185185
MODEL_TENSOR.ATTN_K: (
186-
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe
186+
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2 phimoe exaone4
187187
"model.layers.{bid}.self_attn.k_proj_no_perm", # llama-custom
188188
"layers.{bid}.attention.wk", # llama-pth
189189
"encoder.layer.{bid}.attention.self.key", # bert
@@ -199,7 +199,7 @@ class TensorNameMap:
199199

200200
# Attention value
201201
MODEL_TENSOR.ATTN_V: (
202-
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe
202+
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2 phimoe exaone4
203203
"layers.{bid}.attention.wv", # llama-pth
204204
"encoder.layer.{bid}.attention.self.value", # bert
205205
"transformer.layer.{bid}.attention.v_lin", # distillbert
@@ -219,7 +219,7 @@ class TensorNameMap:
219219
"transformer.blocks.{bid}.attn.out_proj", # mpt
220220
"transformer.h.{bid}.self_attention.dense", # falcon
221221
"h.{bid}.self_attention.dense", # bloom
222-
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe
222+
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe exaone4
223223
"model.layers.{bid}.self_attn.linear_attn", # deci
224224
"layers.{bid}.attention.wo", # llama-pth
225225
"encoder.layer.{bid}.attention.output.dense", # bert
@@ -252,7 +252,7 @@ class TensorNameMap:
252252
),
253253

254254
MODEL_TENSOR.ATTN_POST_NORM: (
255-
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
255+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 exaone4 # ge
256256
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
257257
),
258258

@@ -290,7 +290,7 @@ class TensorNameMap:
290290

291291
# Post feed-forward norm
292292
MODEL_TENSOR.FFN_POST_NORM: (
293-
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
293+
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2 exaone4
294294
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
295295
),
296296

@@ -321,7 +321,7 @@ class TensorNameMap:
321321
"transformer.blocks.{bid}.ffn.up_proj", # mpt
322322
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
323323
"h.{bid}.mlp.dense_h_to_4h", # bloom
324-
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
324+
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2 exaone4
325325
"layers.{bid}.feed_forward.w3", # llama-pth
326326
"encoder.layer.{bid}.intermediate.dense", # bert
327327
"transformer.layer.{bid}.ffn.lin1", # distillbert
@@ -373,7 +373,7 @@ class TensorNameMap:
373373

374374
# Feed-forward gate
375375
MODEL_TENSOR.FFN_GATE: (
376-
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
376+
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2 exaone4
377377
"layers.{bid}.feed_forward.w1", # llama-pth
378378
"transformer.h.{bid}.mlp.w2", # qwen
379379
"transformer.h.{bid}.mlp.c_fc2", # jais
@@ -410,7 +410,7 @@ class TensorNameMap:
410410
"transformer.blocks.{bid}.ffn.down_proj", # mpt
411411
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
412412
"h.{bid}.mlp.dense_4h_to_h", # bloom
413-
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
413+
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2 exaone4
414414
"layers.{bid}.feed_forward.w2", # llama-pth
415415
"encoder.layer.{bid}.output.dense", # bert
416416
"transformer.layer.{bid}.ffn.lin2", # distillbert
@@ -457,7 +457,7 @@ class TensorNameMap:
457457
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
458458
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
459459
"model.layers.{bid}.self_attn.query_layernorm", # hunyuan
460-
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
460+
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2 exaone4
461461
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
462462
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
463463
"transformer.layers.{bid}.attn.q_norm", # openelm
@@ -467,7 +467,7 @@ class TensorNameMap:
467467
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
468468
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
469469
"model.layers.{bid}.self_attn.key_layernorm", # hunyuan
470-
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
470+
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2 exaone4
471471
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
472472
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
473473
"transformer.layers.{bid}.attn.k_norm", # openelm

include/llama.h

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,18 @@ extern "C" {
107107
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
108108
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
109109
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
110-
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
111-
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
112-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
113-
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
114-
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
115-
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
116-
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
117-
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
118-
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
119-
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
120-
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36,
110+
LLAMA_VOCAB_PRE_TYPE_EXAONE4 = 26,
111+
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 27,
112+
LLAMA_VOCAB_PRE_TYPE_MINERVA = 28,
113+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 29,
114+
LLAMA_VOCAB_PRE_TYPE_GPT4O = 30,
115+
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 31,
116+
LLAMA_VOCAB_PRE_TYPE_TRILLION = 32,
117+
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 33,
118+
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 34,
119+
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 35,
120+
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 36,
121+
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 37,
121122
};
122123

123124
enum llama_rope_type {

src/llama-arch.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
6565
{ LLM_ARCH_JAIS, "jais" },
6666
{ LLM_ARCH_NEMOTRON, "nemotron" },
6767
{ LLM_ARCH_EXAONE, "exaone" },
68+
{ LLM_ARCH_EXAONE4, "exaone4" },
6869
{ LLM_ARCH_RWKV6, "rwkv6" },
6970
{ LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
7071
{ LLM_ARCH_RWKV7, "rwkv7" },
@@ -1417,6 +1418,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
14171418
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
14181419
},
14191420
},
1421+
{
1422+
LLM_ARCH_EXAONE4,
1423+
{
1424+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1425+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1426+
{ LLM_TENSOR_OUTPUT, "output" },
1427+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1428+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1429+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1430+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1431+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1432+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1433+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1434+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1435+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1436+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1437+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1438+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1439+
}
1440+
},
14201441
{
14211442
LLM_ARCH_RWKV6,
14221443
{

src/llama-arch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ enum llm_arch {
6969
LLM_ARCH_JAIS,
7070
LLM_ARCH_NEMOTRON,
7171
LLM_ARCH_EXAONE,
72+
LLM_ARCH_EXAONE4,
7273
LLM_ARCH_RWKV6,
7374
LLM_ARCH_RWKV6QWEN2,
7475
LLM_ARCH_RWKV7,

0 commit comments

Comments
 (0)