@@ -843,6 +843,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
843
843
if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51" :
844
844
# ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer
845
845
res = "lfm2"
846
+ if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb" :
847
+ # ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B
848
+ res = "exaone4"
846
849
847
850
if res is None :
848
851
logger .warning ("\n " )
@@ -2861,7 +2864,8 @@ def set_gguf_parameters(self):
2861
2864
def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2862
2865
num_heads = self .hparams ["num_attention_heads" ]
2863
2866
num_kv_heads = self .hparams ["num_key_value_heads" ]
2864
- head_dim = self .hparams ["head_dim" ]
2867
+ if (head_dim := self .hparams .get ("head_dim" )) is None :
2868
+ head_dim = self .hparams ["hidden_size" ] // num_heads
2865
2869
2866
2870
if "ernie." in name :
2867
2871
name = name .replace ("ernie." , "model." )
@@ -2894,6 +2898,93 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
2894
2898
return [(self .map_tensor_name (name ), data_torch )]
2895
2899
2896
2900
2901
+ @ModelBase .register ("Ernie4_5_MoeForCausalLM" )
2902
+ class Ernie4_5MoeModel (Ernie4_5Model ):
2903
+ model_arch = gguf .MODEL_ARCH .ERNIE4_5_MOE
2904
+ _experts : list [dict [str , Tensor ]] | None = None
2905
+
2906
+ def __init__ (self , * args , ** kwargs ):
2907
+ super ().__init__ (* args , ** kwargs )
2908
+ self ._experts = [{} for _ in range (self .block_count )]
2909
+
2910
+ def set_gguf_parameters (self ):
2911
+ super ().set_gguf_parameters ()
2912
+ self .gguf_writer .add_expert_count (self .hparams ["moe_num_experts" ])
2913
+ self .gguf_writer .add_expert_used_count (self .hparams ["moe_k" ])
2914
+ self .gguf_writer .add_interleave_moe_layer_step (self .hparams ["moe_layer_interval" ])
2915
+ self .gguf_writer .add_leading_dense_block_count (self .hparams ["moe_layer_start_index" ])
2916
+ if (moe_intermediate_size := self .hparams .get ("moe_intermediate_size" )) is not None :
2917
+ self .gguf_writer .add_expert_feed_forward_length (moe_intermediate_size )
2918
+ if (shared_expert_count := self .hparams .get ('moe_num_shared_experts' )) is not None :
2919
+ self .gguf_writer .add_expert_shared_count (shared_expert_count )
2920
+ if shared_expert_count > 0 and (shared_expert_intermediate_size := self .hparams .get ('intermediate_size' )) is not None and (num_key_value_heads := self .hparams .get ('num_key_value_heads' )) is not None :
2921
+ self .gguf_writer .add_expert_shared_feed_forward_length (shared_expert_intermediate_size // num_key_value_heads )
2922
+
2923
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2924
+ # Modify correction bias name as in DeepseekV2
2925
+ if name .endswith ("e_score_correction_bias" ):
2926
+ name = name .replace ("e_score_correction_bias" , "e_score_correction.bias" )
2927
+
2928
+ # skip Multi-Token Prediction (MTP) layers (again, same as DeepseekV2)
2929
+ match = re .match (r"model.mtp_block.(\d+)" , name )
2930
+ if match :
2931
+ return []
2932
+
2933
+ # skip all other MTP tensors for now
2934
+ match = re .match (r"model.mtp_emb_norm.(\d+)" , name )
2935
+ if match :
2936
+ return []
2937
+
2938
+ match = re .match (r"model.mtp_hidden_norm.(\d+)" , name )
2939
+ if match :
2940
+ return []
2941
+
2942
+ match = re .match (r"model.mtp_linear_proj.(\d+)" , name )
2943
+ if match :
2944
+ return []
2945
+
2946
+ # process the experts separately
2947
+ if name .find ("mlp.experts" ) != - 1 :
2948
+ n_experts = self .hparams ["moe_num_experts" ]
2949
+ assert bid is not None
2950
+
2951
+ if self ._experts is None :
2952
+ self ._experts = [{} for _ in range (self .block_count )]
2953
+
2954
+ self ._experts [bid ][name ] = data_torch
2955
+
2956
+ if len (self ._experts [bid ]) >= n_experts * 3 :
2957
+ tensors : list [tuple [str , Tensor ]] = []
2958
+
2959
+ # merge the experts into a single 3d tensor
2960
+ for w_name in ["gate_proj" , "up_proj" , "down_proj" ]:
2961
+ datas : list [Tensor ] = []
2962
+
2963
+ for xid in range (n_experts ):
2964
+ ename_to_retrieve = f"model.layers.{ bid } .mlp.experts.{ xid } .{ w_name } .weight"
2965
+ datas .append (self ._experts [bid ][ename_to_retrieve ])
2966
+ del self ._experts [bid ][ename_to_retrieve ]
2967
+
2968
+ data_torch = torch .stack (datas , dim = 0 )
2969
+ merged_name = f"model.layers.{ bid } .mlp.experts.{ w_name } .weight"
2970
+ new_name = self .map_tensor_name (merged_name )
2971
+ tensors .append ((new_name , data_torch ))
2972
+
2973
+ return tensors
2974
+ else :
2975
+ return []
2976
+ return [(self .map_tensor_name (name ), data_torch )]
2977
+
2978
+ def prepare_tensors (self ):
2979
+ super ().prepare_tensors ()
2980
+
2981
+ if self ._experts is not None :
2982
+ # flatten `list[dict[str, Tensor]]` into `list[str]`
2983
+ experts = [k for d in self ._experts for k in d .keys ()]
2984
+ if len (experts ) > 0 :
2985
+ raise ValueError (f"Unprocessed experts: { experts } " )
2986
+
2987
+
2897
2988
@ModelBase .register (
2898
2989
"Qwen2VLModel" ,
2899
2990
"Qwen2VLForConditionalGeneration" ,
@@ -6692,6 +6783,75 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
6692
6783
yield (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), torch .tensor (rope_factors , dtype = torch .float32 ))
6693
6784
6694
6785
6786
+ @ModelBase .register ("Exaone4ForCausalLM" )
6787
+ class Exaone4Model (TextModel ):
6788
+ model_arch = gguf .MODEL_ARCH .EXAONE4
6789
+
6790
+ def set_vocab (self ):
6791
+ tokens , toktypes , tokpre = self .get_vocab_base ()
6792
+ self .gguf_writer .add_tokenizer_model ("gpt2" )
6793
+ self .gguf_writer .add_tokenizer_pre (tokpre )
6794
+ self .gguf_writer .add_token_list (tokens )
6795
+ self .gguf_writer .add_token_types (toktypes )
6796
+
6797
+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
6798
+ special_vocab .add_to_gguf (self .gguf_writer )
6799
+
6800
+ def set_gguf_parameters (self ):
6801
+ super ().set_gguf_parameters ()
6802
+ hparams = self .hparams
6803
+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
6804
+
6805
+ if hparams .get ("sliding_window" ) is not None :
6806
+ self .gguf_writer .add_sliding_window (hparams ["sliding_window" ])
6807
+ if "layer_types" in hparams :
6808
+ self .gguf_writer .add_sliding_window_pattern ([t == "sliding_attention" for t in hparams ["layer_types" ]])
6809
+ elif "sliding_window_pattern" in hparams :
6810
+ sliding_window_pattern = []
6811
+ if isinstance (hparams ["sliding_window_pattern" ], str ): # e.g. LLLG
6812
+ for i in range (hparams ["num_hidden_layers" ]):
6813
+ sliding_window_pattern .append (hparams ["sliding_window_pattern" ][i % len (hparams ["sliding_window_pattern" ])] == "L" )
6814
+ if isinstance (hparams ["sliding_window_pattern" ], int ): # e.g. 4
6815
+ for i in range (hparams ["num_hidden_layers" ]):
6816
+ sliding_window_pattern .append ((i + 1 ) % hparams ["sliding_window_pattern" ] != 0 )
6817
+ if len (sliding_window_pattern ) == hparams ["num_hidden_layers" ]:
6818
+ self .gguf_writer .add_sliding_window_pattern (sliding_window_pattern )
6819
+
6820
+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
6821
+ if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
6822
+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
6823
+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
6824
+
6825
+ def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
6826
+ if rope_scaling := self .find_hparam (["rope_scaling" ], optional = True ):
6827
+ if rope_scaling .get ("rope_type" , '' ).lower () == "llama3" :
6828
+ base = self .hparams .get ("rope_theta" , 10_000.0 )
6829
+ if (dim := self .hparams .get ("head_dim" )) is None :
6830
+ dim = self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
6831
+ freqs = 1.0 / (base ** (torch .arange (0 , dim , 2 , dtype = torch .float32 ) / dim ))
6832
+
6833
+ factor = rope_scaling .get ("factor" , 16.0 )
6834
+ low_freq_factor = rope_scaling .get ("low_freq_factor" , 1.0 )
6835
+ high_freq_factor = rope_scaling .get ("high_freq_factor" , 4.0 )
6836
+ old_context_len = self .hparams .get ("original_max_position_embeddings" , 8192 )
6837
+
6838
+ low_freq_wavelen = old_context_len / low_freq_factor
6839
+ high_freq_wavelen = old_context_len / high_freq_factor
6840
+
6841
+ rope_factors = []
6842
+ for freq in freqs :
6843
+ wavelen = 2 * math .pi / freq
6844
+ if wavelen < high_freq_wavelen :
6845
+ rope_factors .append (1 )
6846
+ elif wavelen > low_freq_wavelen :
6847
+ rope_factors .append (factor )
6848
+ else :
6849
+ smooth = (old_context_len / wavelen - low_freq_factor ) / (high_freq_factor - low_freq_factor )
6850
+ rope_factors .append (1 / ((1 - smooth ) / factor + smooth ))
6851
+
6852
+ yield (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), torch .tensor (rope_factors , dtype = torch .float32 ))
6853
+
6854
+
6695
6855
@ModelBase .register ("GraniteForCausalLM" )
6696
6856
class GraniteModel (LlamaModel ):
6697
6857
"""Conversion for IBM's GraniteForCausalLM"""
0 commit comments