@@ -1517,199 +1517,6 @@ def write_tensors(self):
1517
1517
raise ValueError (f"Unprocessed experts: { experts .keys ()} " )
1518
1518
1519
1519
1520
- @Model .register ("ArcticForCausalLM" )
1521
- class ArcticModel (Model ):
1522
- model_arch = gguf .MODEL_ARCH .ARCTIC
1523
-
1524
- def set_vocab (self ):
1525
- # The reason for using a custom implementation here is that the
1526
- # snowflake-arctic-instruct model redefined tokens 31998 and 31999 from
1527
- # tokenizer.model and used them as BOS and EOS instead of adding new tokens.
1528
- from sentencepiece import SentencePieceProcessor
1529
-
1530
- tokenizer_path = self .dir_model / 'tokenizer.model'
1531
-
1532
- if not tokenizer_path .is_file ():
1533
- print (f'Error: Missing { tokenizer_path } ' , file = sys .stderr )
1534
- sys .exit (1 )
1535
-
1536
- # Read the whole vocabulary from the tokenizer.model file
1537
- tokenizer = SentencePieceProcessor (str (tokenizer_path ))
1538
-
1539
- vocab_size = self .hparams .get ('vocab_size' , tokenizer .vocab_size ())
1540
-
1541
- tokens : list [bytes ] = [f"[PAD{ i } ]" .encode ("utf-8" ) for i in range (vocab_size )]
1542
- scores : list [float ] = [- 10000.0 ] * vocab_size
1543
- toktypes : list [int ] = [SentencePieceTokenTypes .UNKNOWN ] * vocab_size
1544
-
1545
- for token_id in range (tokenizer .vocab_size ()):
1546
-
1547
- piece = tokenizer .id_to_piece (token_id )
1548
- text = piece .encode ("utf-8" )
1549
- score = tokenizer .get_score (token_id )
1550
-
1551
- toktype = SentencePieceTokenTypes .NORMAL
1552
- if tokenizer .is_unknown (token_id ):
1553
- toktype = SentencePieceTokenTypes .UNKNOWN
1554
- elif tokenizer .is_control (token_id ):
1555
- toktype = SentencePieceTokenTypes .CONTROL
1556
- elif tokenizer .is_unused (token_id ):
1557
- toktype = SentencePieceTokenTypes .UNUSED
1558
- elif tokenizer .is_byte (token_id ):
1559
- toktype = SentencePieceTokenTypes .BYTE
1560
-
1561
- tokens [token_id ] = text
1562
- scores [token_id ] = score
1563
- toktypes [token_id ] = toktype
1564
-
1565
- # Use the added_tokens_decoder field from tokeniser_config.json as the source
1566
- # of information about added/redefined tokens and modify them accordingly.
1567
- tokenizer_config_file = self .dir_model / 'tokenizer_config.json'
1568
- if tokenizer_config_file .is_file ():
1569
- with open (tokenizer_config_file , "r" , encoding = "utf-8" ) as f :
1570
- tokenizer_config_json = json .load (f )
1571
-
1572
- if "added_tokens_decoder" in tokenizer_config_json :
1573
- added_tokens_decoder = tokenizer_config_json ["added_tokens_decoder" ]
1574
- for token_id , token_json in added_tokens_decoder .items ():
1575
- token_id = int (token_id )
1576
- if (token_id >= vocab_size ):
1577
- print (f'ignore token { token_id } : id is out of range, max={ vocab_size - 1 } ' )
1578
- continue
1579
-
1580
- token_content = token_json ["content" ]
1581
- token_type = SentencePieceTokenTypes .USER_DEFINED
1582
- token_score = - 10000.0
1583
-
1584
- # Map unk_token to UNKNOWN, other special tokens to CONTROL
1585
- # Set the score to 0.0 as in the original tokenizer.model
1586
- if ("special" in token_json ) and token_json ["special" ]:
1587
- if token_content == tokenizer_config_json ["unk_token" ]:
1588
- token_type = SentencePieceTokenTypes .UNKNOWN
1589
- else :
1590
- token_type = SentencePieceTokenTypes .CONTROL
1591
- token_score = 0.0
1592
-
1593
- print (f"Setting token { token_id } to '{ token_content } ' (type: { token_type } , score: { token_score :.2f} )" )
1594
- tokens [token_id ] = token_content .encode ("utf-8" )
1595
- toktypes [token_id ] = token_type
1596
- scores [token_id ] = token_score
1597
-
1598
- self .gguf_writer .add_tokenizer_model ("llama" )
1599
- self .gguf_writer .add_tokenizer_pre ("default" )
1600
- self .gguf_writer .add_token_list (tokens )
1601
- self .gguf_writer .add_token_scores (scores )
1602
- self .gguf_writer .add_token_types (toktypes )
1603
-
1604
- special_vocab = gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
1605
- special_vocab .add_to_gguf (self .gguf_writer )
1606
-
1607
- def set_gguf_parameters (self ):
1608
- super ().set_gguf_parameters ()
1609
- hparams = self .hparams
1610
- self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
1611
- self .gguf_writer .add_rope_dimension_count (hparams ["hidden_size" ] // hparams ["num_attention_heads" ])
1612
-
1613
- # Same as super class, but permuting q_proj, k_proj
1614
- def write_tensors (self ):
1615
- block_count = self .hparams .get ("n_layers" , self .hparams .get ("num_hidden_layers" , self .hparams .get ("n_layer" )))
1616
- tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
1617
- n_head = self .hparams .get ("num_attention_heads" )
1618
- n_kv_head = self .hparams .get ("num_key_value_heads" )
1619
- n_experts = self .hparams .get ("num_local_experts" )
1620
- experts = dict ()
1621
- for name , data_torch in self .get_tensors ():
1622
- # we don't need these
1623
- if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".attention.rotary_emb.inv_freq" )):
1624
- continue
1625
-
1626
- old_dtype = data_torch .dtype
1627
-
1628
- # convert any unsupported data types to float32
1629
- if data_torch .dtype not in (torch .float16 , torch .float32 ):
1630
- data_torch = data_torch .to (torch .float32 )
1631
-
1632
- data = data_torch .numpy ()
1633
-
1634
- if name .endswith ("q_proj.weight" ):
1635
- data = permute (data , n_head , n_head )
1636
- if name .endswith ("k_proj.weight" ):
1637
- data = permute (data , n_head , n_kv_head )
1638
-
1639
- data = data .squeeze ()
1640
-
1641
- # process the experts separately
1642
- if name .find ("block_sparse_moe.experts" ) != - 1 :
1643
- experts [name ] = data
1644
- if len (experts ) >= n_experts :
1645
- # merge the experts into a single 3d tensor
1646
- for bid in range (block_count ):
1647
- for wid in range (1 , 4 ):
1648
- full = True
1649
- for xid in range (n_experts ):
1650
- ename = f"model.layers.{ bid } .block_sparse_moe.experts.{ xid } .w{ wid } .weight"
1651
- if ename not in experts :
1652
- full = False
1653
- break
1654
- if not full :
1655
- continue
1656
-
1657
- datas = []
1658
- for xid in range (n_experts ):
1659
- ename = f"model.layers.{ bid } .block_sparse_moe.experts.{ xid } .w{ wid } .weight"
1660
- datas .append (experts [ename ])
1661
- del experts [ename ]
1662
-
1663
- data = np .stack (datas , axis = 0 )
1664
- data_dtype = data .dtype
1665
-
1666
- if self .ftype == 0 and data_dtype == np .float16 :
1667
- data = data .astype (np .float32 )
1668
-
1669
- if self .ftype == 1 and data_dtype == np .float32 :
1670
- data = data .astype (np .float16 )
1671
-
1672
- merged_name = f"layers.{ bid } .feed_forward.experts.w{ wid } .weight"
1673
-
1674
- new_name = tensor_map .get_name (merged_name , try_suffixes = (".weight" , ".bias" ))
1675
- if new_name is None :
1676
- print (f"Can not map tensor { name !r} " )
1677
- sys .exit ()
1678
-
1679
- print (f"{ new_name } , n_dims = { len (data .shape )} , shape = { data .shape } --> { data .dtype } " )
1680
-
1681
- self .gguf_writer .add_tensor (new_name , data )
1682
- continue
1683
-
1684
- # map tensor names
1685
- new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
1686
- if new_name is None :
1687
- print (f"Can not map tensor { name !r} " )
1688
- sys .exit ()
1689
-
1690
- n_dims = len (data .shape )
1691
- data_dtype = data .dtype
1692
-
1693
- # if f32 desired, convert any float16 to float32
1694
- if self .ftype == 0 and data_dtype == np .float16 :
1695
- data = data .astype (np .float32 )
1696
-
1697
- # 1d tensors need to be converted to float32
1698
- if self .ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
1699
- data = data .astype (np .float32 )
1700
-
1701
- # if f16 desired, convert any float32 2-dim weight tensors to float16
1702
- if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
1703
- data = data .astype (np .float16 )
1704
-
1705
- print (f"{ new_name } , n_dims = { n_dims } , { old_dtype } --> { data .dtype } " )
1706
-
1707
- self .gguf_writer .add_tensor (new_name , data )
1708
-
1709
- if len (experts ) > 0 :
1710
- raise ValueError (f"Unprocessed experts: { experts .keys ()} " )
1711
-
1712
-
1713
1520
@Model .register ("GrokForCausalLM" )
1714
1521
class GrokModel (Model ):
1715
1522
model_arch = gguf .MODEL_ARCH .GROK
@@ -3101,6 +2908,199 @@ def write_tensors(self):
3101
2908
self .gguf_writer .add_tensor (new_name , data )
3102
2909
3103
2910
2911
+ @Model .register ("ArcticForCausalLM" )
2912
+ class ArcticModel (Model ):
2913
+ model_arch = gguf .MODEL_ARCH .ARCTIC
2914
+
2915
+ def set_vocab (self ):
2916
+ # The reason for using a custom implementation here is that the
2917
+ # snowflake-arctic-instruct model redefined tokens 31998 and 31999 from
2918
+ # tokenizer.model and used them as BOS and EOS instead of adding new tokens.
2919
+ from sentencepiece import SentencePieceProcessor
2920
+
2921
+ tokenizer_path = self .dir_model / 'tokenizer.model'
2922
+
2923
+ if not tokenizer_path .is_file ():
2924
+ print (f'Error: Missing { tokenizer_path } ' , file = sys .stderr )
2925
+ sys .exit (1 )
2926
+
2927
+ # Read the whole vocabulary from the tokenizer.model file
2928
+ tokenizer = SentencePieceProcessor (str (tokenizer_path ))
2929
+
2930
+ vocab_size = self .hparams .get ('vocab_size' , tokenizer .vocab_size ())
2931
+
2932
+ tokens : list [bytes ] = [f"[PAD{ i } ]" .encode ("utf-8" ) for i in range (vocab_size )]
2933
+ scores : list [float ] = [- 10000.0 ] * vocab_size
2934
+ toktypes : list [int ] = [SentencePieceTokenTypes .UNKNOWN ] * vocab_size
2935
+
2936
+ for token_id in range (tokenizer .vocab_size ()):
2937
+
2938
+ piece = tokenizer .id_to_piece (token_id )
2939
+ text = piece .encode ("utf-8" )
2940
+ score = tokenizer .get_score (token_id )
2941
+
2942
+ toktype = SentencePieceTokenTypes .NORMAL
2943
+ if tokenizer .is_unknown (token_id ):
2944
+ toktype = SentencePieceTokenTypes .UNKNOWN
2945
+ elif tokenizer .is_control (token_id ):
2946
+ toktype = SentencePieceTokenTypes .CONTROL
2947
+ elif tokenizer .is_unused (token_id ):
2948
+ toktype = SentencePieceTokenTypes .UNUSED
2949
+ elif tokenizer .is_byte (token_id ):
2950
+ toktype = SentencePieceTokenTypes .BYTE
2951
+
2952
+ tokens [token_id ] = text
2953
+ scores [token_id ] = score
2954
+ toktypes [token_id ] = toktype
2955
+
2956
+ # Use the added_tokens_decoder field from tokeniser_config.json as the source
2957
+ # of information about added/redefined tokens and modify them accordingly.
2958
+ tokenizer_config_file = self .dir_model / 'tokenizer_config.json'
2959
+ if tokenizer_config_file .is_file ():
2960
+ with open (tokenizer_config_file , "r" , encoding = "utf-8" ) as f :
2961
+ tokenizer_config_json = json .load (f )
2962
+
2963
+ if "added_tokens_decoder" in tokenizer_config_json :
2964
+ added_tokens_decoder = tokenizer_config_json ["added_tokens_decoder" ]
2965
+ for token_id , token_json in added_tokens_decoder .items ():
2966
+ token_id = int (token_id )
2967
+ if (token_id >= vocab_size ):
2968
+ print (f'ignore token { token_id } : id is out of range, max={ vocab_size - 1 } ' )
2969
+ continue
2970
+
2971
+ token_content = token_json ["content" ]
2972
+ token_type = SentencePieceTokenTypes .USER_DEFINED
2973
+ token_score = - 10000.0
2974
+
2975
+ # Map unk_token to UNKNOWN, other special tokens to CONTROL
2976
+ # Set the score to 0.0 as in the original tokenizer.model
2977
+ if ("special" in token_json ) and token_json ["special" ]:
2978
+ if token_content == tokenizer_config_json ["unk_token" ]:
2979
+ token_type = SentencePieceTokenTypes .UNKNOWN
2980
+ else :
2981
+ token_type = SentencePieceTokenTypes .CONTROL
2982
+ token_score = 0.0
2983
+
2984
+ print (f"Setting token { token_id } to '{ token_content } ' (type: { token_type } , score: { token_score :.2f} )" )
2985
+ tokens [token_id ] = token_content .encode ("utf-8" )
2986
+ toktypes [token_id ] = token_type
2987
+ scores [token_id ] = token_score
2988
+
2989
+ self .gguf_writer .add_tokenizer_model ("llama" )
2990
+ self .gguf_writer .add_tokenizer_pre ("default" )
2991
+ self .gguf_writer .add_token_list (tokens )
2992
+ self .gguf_writer .add_token_scores (scores )
2993
+ self .gguf_writer .add_token_types (toktypes )
2994
+
2995
+ special_vocab = gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
2996
+ special_vocab .add_to_gguf (self .gguf_writer )
2997
+
2998
+ def set_gguf_parameters (self ):
2999
+ super ().set_gguf_parameters ()
3000
+ hparams = self .hparams
3001
+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
3002
+ self .gguf_writer .add_rope_dimension_count (hparams ["hidden_size" ] // hparams ["num_attention_heads" ])
3003
+
3004
+ # Same as super class, but permuting q_proj, k_proj
3005
+ def write_tensors (self ):
3006
+ block_count = self .hparams .get ("n_layers" , self .hparams .get ("num_hidden_layers" , self .hparams .get ("n_layer" )))
3007
+ tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
3008
+ n_head = self .hparams .get ("num_attention_heads" )
3009
+ n_kv_head = self .hparams .get ("num_key_value_heads" )
3010
+ n_experts = self .hparams .get ("num_local_experts" )
3011
+ experts = dict ()
3012
+ for name , data_torch in self .get_tensors ():
3013
+ # we don't need these
3014
+ if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".attention.rotary_emb.inv_freq" )):
3015
+ continue
3016
+
3017
+ old_dtype = data_torch .dtype
3018
+
3019
+ # convert any unsupported data types to float32
3020
+ if data_torch .dtype not in (torch .float16 , torch .float32 ):
3021
+ data_torch = data_torch .to (torch .float32 )
3022
+
3023
+ data = data_torch .numpy ()
3024
+
3025
+ if name .endswith ("q_proj.weight" ):
3026
+ data = permute (data , n_head , n_head )
3027
+ if name .endswith ("k_proj.weight" ):
3028
+ data = permute (data , n_head , n_kv_head )
3029
+
3030
+ data = data .squeeze ()
3031
+
3032
+ # process the experts separately
3033
+ if name .find ("block_sparse_moe.experts" ) != - 1 :
3034
+ experts [name ] = data
3035
+ if len (experts ) >= n_experts :
3036
+ # merge the experts into a single 3d tensor
3037
+ for bid in range (block_count ):
3038
+ for wid in range (1 , 4 ):
3039
+ full = True
3040
+ for xid in range (n_experts ):
3041
+ ename = f"model.layers.{ bid } .block_sparse_moe.experts.{ xid } .w{ wid } .weight"
3042
+ if ename not in experts :
3043
+ full = False
3044
+ break
3045
+ if not full :
3046
+ continue
3047
+
3048
+ datas = []
3049
+ for xid in range (n_experts ):
3050
+ ename = f"model.layers.{ bid } .block_sparse_moe.experts.{ xid } .w{ wid } .weight"
3051
+ datas .append (experts [ename ])
3052
+ del experts [ename ]
3053
+
3054
+ data = np .stack (datas , axis = 0 )
3055
+ data_dtype = data .dtype
3056
+
3057
+ if self .ftype == 0 and data_dtype == np .float16 :
3058
+ data = data .astype (np .float32 )
3059
+
3060
+ if self .ftype == 1 and data_dtype == np .float32 :
3061
+ data = data .astype (np .float16 )
3062
+
3063
+ merged_name = f"layers.{ bid } .feed_forward.experts.w{ wid } .weight"
3064
+
3065
+ new_name = tensor_map .get_name (merged_name , try_suffixes = (".weight" , ".bias" ))
3066
+ if new_name is None :
3067
+ print (f"Can not map tensor { name !r} " )
3068
+ sys .exit ()
3069
+
3070
+ print (f"{ new_name } , n_dims = { len (data .shape )} , shape = { data .shape } --> { data .dtype } " )
3071
+
3072
+ self .gguf_writer .add_tensor (new_name , data )
3073
+ continue
3074
+
3075
+ # map tensor names
3076
+ new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
3077
+ if new_name is None :
3078
+ print (f"Can not map tensor { name !r} " )
3079
+ sys .exit ()
3080
+
3081
+ n_dims = len (data .shape )
3082
+ data_dtype = data .dtype
3083
+
3084
+ # if f32 desired, convert any float16 to float32
3085
+ if self .ftype == 0 and data_dtype == np .float16 :
3086
+ data = data .astype (np .float32 )
3087
+
3088
+ # 1d tensors need to be converted to float32
3089
+ if self .ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
3090
+ data = data .astype (np .float32 )
3091
+
3092
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
3093
+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
3094
+ data = data .astype (np .float16 )
3095
+
3096
+ print (f"{ new_name } , n_dims = { n_dims } , { old_dtype } --> { data .dtype } " )
3097
+
3098
+ self .gguf_writer .add_tensor (new_name , data )
3099
+
3100
+ if len (experts ) > 0 :
3101
+ raise ValueError (f"Unprocessed experts: { experts .keys ()} " )
3102
+
3103
+
3104
3104
###### CONVERSION LOGIC ######
3105
3105
3106
3106
0 commit comments