Skip to content

Commit 0cffda8

Browse files
committed
Moved ArcticModel to the end of the file.
1 parent c6f15a7 commit 0cffda8

File tree

1 file changed

+193
-193
lines changed

1 file changed

+193
-193
lines changed

convert-hf-to-gguf.py

Lines changed: 193 additions & 193 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,199 +1517,6 @@ def write_tensors(self):
15171517
raise ValueError(f"Unprocessed experts: {experts.keys()}")
15181518

15191519

1520-
@Model.register("ArcticForCausalLM")
1521-
class ArcticModel(Model):
1522-
model_arch = gguf.MODEL_ARCH.ARCTIC
1523-
1524-
def set_vocab(self):
1525-
# The reason for using a custom implementation here is that the
1526-
# snowflake-arctic-instruct model redefined tokens 31998 and 31999 from
1527-
# tokenizer.model and used them as BOS and EOS instead of adding new tokens.
1528-
from sentencepiece import SentencePieceProcessor
1529-
1530-
tokenizer_path = self.dir_model / 'tokenizer.model'
1531-
1532-
if not tokenizer_path.is_file():
1533-
print(f'Error: Missing {tokenizer_path}', file=sys.stderr)
1534-
sys.exit(1)
1535-
1536-
# Read the whole vocabulary from the tokenizer.model file
1537-
tokenizer = SentencePieceProcessor(str(tokenizer_path))
1538-
1539-
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
1540-
1541-
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
1542-
scores: list[float] = [-10000.0] * vocab_size
1543-
toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
1544-
1545-
for token_id in range(tokenizer.vocab_size()):
1546-
1547-
piece = tokenizer.id_to_piece(token_id)
1548-
text = piece.encode("utf-8")
1549-
score = tokenizer.get_score(token_id)
1550-
1551-
toktype = SentencePieceTokenTypes.NORMAL
1552-
if tokenizer.is_unknown(token_id):
1553-
toktype = SentencePieceTokenTypes.UNKNOWN
1554-
elif tokenizer.is_control(token_id):
1555-
toktype = SentencePieceTokenTypes.CONTROL
1556-
elif tokenizer.is_unused(token_id):
1557-
toktype = SentencePieceTokenTypes.UNUSED
1558-
elif tokenizer.is_byte(token_id):
1559-
toktype = SentencePieceTokenTypes.BYTE
1560-
1561-
tokens[token_id] = text
1562-
scores[token_id] = score
1563-
toktypes[token_id] = toktype
1564-
1565-
# Use the added_tokens_decoder field from tokeniser_config.json as the source
1566-
# of information about added/redefined tokens and modify them accordingly.
1567-
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
1568-
if tokenizer_config_file.is_file():
1569-
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
1570-
tokenizer_config_json = json.load(f)
1571-
1572-
if "added_tokens_decoder" in tokenizer_config_json:
1573-
added_tokens_decoder = tokenizer_config_json["added_tokens_decoder"]
1574-
for token_id, token_json in added_tokens_decoder.items():
1575-
token_id = int(token_id)
1576-
if (token_id >= vocab_size):
1577-
print(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
1578-
continue
1579-
1580-
token_content = token_json["content"]
1581-
token_type = SentencePieceTokenTypes.USER_DEFINED
1582-
token_score = -10000.0
1583-
1584-
# Map unk_token to UNKNOWN, other special tokens to CONTROL
1585-
# Set the score to 0.0 as in the original tokenizer.model
1586-
if ("special" in token_json) and token_json["special"]:
1587-
if token_content == tokenizer_config_json["unk_token"]:
1588-
token_type = SentencePieceTokenTypes.UNKNOWN
1589-
else:
1590-
token_type = SentencePieceTokenTypes.CONTROL
1591-
token_score = 0.0
1592-
1593-
print(f"Setting token {token_id} to '{token_content}' (type: {token_type}, score: {token_score:.2f})")
1594-
tokens[token_id] = token_content.encode("utf-8")
1595-
toktypes[token_id] = token_type
1596-
scores[token_id] = token_score
1597-
1598-
self.gguf_writer.add_tokenizer_model("llama")
1599-
self.gguf_writer.add_tokenizer_pre("default")
1600-
self.gguf_writer.add_token_list(tokens)
1601-
self.gguf_writer.add_token_scores(scores)
1602-
self.gguf_writer.add_token_types(toktypes)
1603-
1604-
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
1605-
special_vocab.add_to_gguf(self.gguf_writer)
1606-
1607-
def set_gguf_parameters(self):
1608-
super().set_gguf_parameters()
1609-
hparams = self.hparams
1610-
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
1611-
self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
1612-
1613-
# Same as super class, but permuting q_proj, k_proj
1614-
def write_tensors(self):
1615-
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
1616-
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
1617-
n_head = self.hparams.get("num_attention_heads")
1618-
n_kv_head = self.hparams.get("num_key_value_heads")
1619-
n_experts = self.hparams.get("num_local_experts")
1620-
experts = dict()
1621-
for name, data_torch in self.get_tensors():
1622-
# we don't need these
1623-
if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")):
1624-
continue
1625-
1626-
old_dtype = data_torch.dtype
1627-
1628-
# convert any unsupported data types to float32
1629-
if data_torch.dtype not in (torch.float16, torch.float32):
1630-
data_torch = data_torch.to(torch.float32)
1631-
1632-
data = data_torch.numpy()
1633-
1634-
if name.endswith("q_proj.weight"):
1635-
data = permute(data, n_head, n_head)
1636-
if name.endswith("k_proj.weight"):
1637-
data = permute(data, n_head, n_kv_head)
1638-
1639-
data = data.squeeze()
1640-
1641-
# process the experts separately
1642-
if name.find("block_sparse_moe.experts") != -1:
1643-
experts[name] = data
1644-
if len(experts) >= n_experts:
1645-
# merge the experts into a single 3d tensor
1646-
for bid in range(block_count):
1647-
for wid in range(1, 4):
1648-
full = True
1649-
for xid in range(n_experts):
1650-
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.w{wid}.weight"
1651-
if ename not in experts:
1652-
full = False
1653-
break
1654-
if not full:
1655-
continue
1656-
1657-
datas = []
1658-
for xid in range(n_experts):
1659-
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.w{wid}.weight"
1660-
datas.append(experts[ename])
1661-
del experts[ename]
1662-
1663-
data = np.stack(datas, axis=0)
1664-
data_dtype = data.dtype
1665-
1666-
if self.ftype == 0 and data_dtype == np.float16:
1667-
data = data.astype(np.float32)
1668-
1669-
if self.ftype == 1 and data_dtype == np.float32:
1670-
data = data.astype(np.float16)
1671-
1672-
merged_name = f"layers.{bid}.feed_forward.experts.w{wid}.weight"
1673-
1674-
new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias"))
1675-
if new_name is None:
1676-
print(f"Can not map tensor {name!r}")
1677-
sys.exit()
1678-
1679-
print(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}")
1680-
1681-
self.gguf_writer.add_tensor(new_name, data)
1682-
continue
1683-
1684-
# map tensor names
1685-
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
1686-
if new_name is None:
1687-
print(f"Can not map tensor {name!r}")
1688-
sys.exit()
1689-
1690-
n_dims = len(data.shape)
1691-
data_dtype = data.dtype
1692-
1693-
# if f32 desired, convert any float16 to float32
1694-
if self.ftype == 0 and data_dtype == np.float16:
1695-
data = data.astype(np.float32)
1696-
1697-
# 1d tensors need to be converted to float32
1698-
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
1699-
data = data.astype(np.float32)
1700-
1701-
# if f16 desired, convert any float32 2-dim weight tensors to float16
1702-
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
1703-
data = data.astype(np.float16)
1704-
1705-
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
1706-
1707-
self.gguf_writer.add_tensor(new_name, data)
1708-
1709-
if len(experts) > 0:
1710-
raise ValueError(f"Unprocessed experts: {experts.keys()}")
1711-
1712-
17131520
@Model.register("GrokForCausalLM")
17141521
class GrokModel(Model):
17151522
model_arch = gguf.MODEL_ARCH.GROK
@@ -3101,6 +2908,199 @@ def write_tensors(self):
31012908
self.gguf_writer.add_tensor(new_name, data)
31022909

31032910

2911+
@Model.register("ArcticForCausalLM")
2912+
class ArcticModel(Model):
2913+
model_arch = gguf.MODEL_ARCH.ARCTIC
2914+
2915+
def set_vocab(self):
2916+
# The reason for using a custom implementation here is that the
2917+
# snowflake-arctic-instruct model redefined tokens 31998 and 31999 from
2918+
# tokenizer.model and used them as BOS and EOS instead of adding new tokens.
2919+
from sentencepiece import SentencePieceProcessor
2920+
2921+
tokenizer_path = self.dir_model / 'tokenizer.model'
2922+
2923+
if not tokenizer_path.is_file():
2924+
print(f'Error: Missing {tokenizer_path}', file=sys.stderr)
2925+
sys.exit(1)
2926+
2927+
# Read the whole vocabulary from the tokenizer.model file
2928+
tokenizer = SentencePieceProcessor(str(tokenizer_path))
2929+
2930+
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
2931+
2932+
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
2933+
scores: list[float] = [-10000.0] * vocab_size
2934+
toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
2935+
2936+
for token_id in range(tokenizer.vocab_size()):
2937+
2938+
piece = tokenizer.id_to_piece(token_id)
2939+
text = piece.encode("utf-8")
2940+
score = tokenizer.get_score(token_id)
2941+
2942+
toktype = SentencePieceTokenTypes.NORMAL
2943+
if tokenizer.is_unknown(token_id):
2944+
toktype = SentencePieceTokenTypes.UNKNOWN
2945+
elif tokenizer.is_control(token_id):
2946+
toktype = SentencePieceTokenTypes.CONTROL
2947+
elif tokenizer.is_unused(token_id):
2948+
toktype = SentencePieceTokenTypes.UNUSED
2949+
elif tokenizer.is_byte(token_id):
2950+
toktype = SentencePieceTokenTypes.BYTE
2951+
2952+
tokens[token_id] = text
2953+
scores[token_id] = score
2954+
toktypes[token_id] = toktype
2955+
2956+
# Use the added_tokens_decoder field from tokeniser_config.json as the source
2957+
# of information about added/redefined tokens and modify them accordingly.
2958+
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
2959+
if tokenizer_config_file.is_file():
2960+
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
2961+
tokenizer_config_json = json.load(f)
2962+
2963+
if "added_tokens_decoder" in tokenizer_config_json:
2964+
added_tokens_decoder = tokenizer_config_json["added_tokens_decoder"]
2965+
for token_id, token_json in added_tokens_decoder.items():
2966+
token_id = int(token_id)
2967+
if (token_id >= vocab_size):
2968+
print(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
2969+
continue
2970+
2971+
token_content = token_json["content"]
2972+
token_type = SentencePieceTokenTypes.USER_DEFINED
2973+
token_score = -10000.0
2974+
2975+
# Map unk_token to UNKNOWN, other special tokens to CONTROL
2976+
# Set the score to 0.0 as in the original tokenizer.model
2977+
if ("special" in token_json) and token_json["special"]:
2978+
if token_content == tokenizer_config_json["unk_token"]:
2979+
token_type = SentencePieceTokenTypes.UNKNOWN
2980+
else:
2981+
token_type = SentencePieceTokenTypes.CONTROL
2982+
token_score = 0.0
2983+
2984+
print(f"Setting token {token_id} to '{token_content}' (type: {token_type}, score: {token_score:.2f})")
2985+
tokens[token_id] = token_content.encode("utf-8")
2986+
toktypes[token_id] = token_type
2987+
scores[token_id] = token_score
2988+
2989+
self.gguf_writer.add_tokenizer_model("llama")
2990+
self.gguf_writer.add_tokenizer_pre("default")
2991+
self.gguf_writer.add_token_list(tokens)
2992+
self.gguf_writer.add_token_scores(scores)
2993+
self.gguf_writer.add_token_types(toktypes)
2994+
2995+
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
2996+
special_vocab.add_to_gguf(self.gguf_writer)
2997+
2998+
def set_gguf_parameters(self):
2999+
super().set_gguf_parameters()
3000+
hparams = self.hparams
3001+
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
3002+
self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
3003+
3004+
# Same as super class, but permuting q_proj, k_proj
3005+
def write_tensors(self):
3006+
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
3007+
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
3008+
n_head = self.hparams.get("num_attention_heads")
3009+
n_kv_head = self.hparams.get("num_key_value_heads")
3010+
n_experts = self.hparams.get("num_local_experts")
3011+
experts = dict()
3012+
for name, data_torch in self.get_tensors():
3013+
# we don't need these
3014+
if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")):
3015+
continue
3016+
3017+
old_dtype = data_torch.dtype
3018+
3019+
# convert any unsupported data types to float32
3020+
if data_torch.dtype not in (torch.float16, torch.float32):
3021+
data_torch = data_torch.to(torch.float32)
3022+
3023+
data = data_torch.numpy()
3024+
3025+
if name.endswith("q_proj.weight"):
3026+
data = permute(data, n_head, n_head)
3027+
if name.endswith("k_proj.weight"):
3028+
data = permute(data, n_head, n_kv_head)
3029+
3030+
data = data.squeeze()
3031+
3032+
# process the experts separately
3033+
if name.find("block_sparse_moe.experts") != -1:
3034+
experts[name] = data
3035+
if len(experts) >= n_experts:
3036+
# merge the experts into a single 3d tensor
3037+
for bid in range(block_count):
3038+
for wid in range(1, 4):
3039+
full = True
3040+
for xid in range(n_experts):
3041+
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.w{wid}.weight"
3042+
if ename not in experts:
3043+
full = False
3044+
break
3045+
if not full:
3046+
continue
3047+
3048+
datas = []
3049+
for xid in range(n_experts):
3050+
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.w{wid}.weight"
3051+
datas.append(experts[ename])
3052+
del experts[ename]
3053+
3054+
data = np.stack(datas, axis=0)
3055+
data_dtype = data.dtype
3056+
3057+
if self.ftype == 0 and data_dtype == np.float16:
3058+
data = data.astype(np.float32)
3059+
3060+
if self.ftype == 1 and data_dtype == np.float32:
3061+
data = data.astype(np.float16)
3062+
3063+
merged_name = f"layers.{bid}.feed_forward.experts.w{wid}.weight"
3064+
3065+
new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias"))
3066+
if new_name is None:
3067+
print(f"Can not map tensor {name!r}")
3068+
sys.exit()
3069+
3070+
print(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}")
3071+
3072+
self.gguf_writer.add_tensor(new_name, data)
3073+
continue
3074+
3075+
# map tensor names
3076+
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
3077+
if new_name is None:
3078+
print(f"Can not map tensor {name!r}")
3079+
sys.exit()
3080+
3081+
n_dims = len(data.shape)
3082+
data_dtype = data.dtype
3083+
3084+
# if f32 desired, convert any float16 to float32
3085+
if self.ftype == 0 and data_dtype == np.float16:
3086+
data = data.astype(np.float32)
3087+
3088+
# 1d tensors need to be converted to float32
3089+
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
3090+
data = data.astype(np.float32)
3091+
3092+
# if f16 desired, convert any float32 2-dim weight tensors to float16
3093+
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
3094+
data = data.astype(np.float16)
3095+
3096+
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
3097+
3098+
self.gguf_writer.add_tensor(new_name, data)
3099+
3100+
if len(experts) > 0:
3101+
raise ValueError(f"Unprocessed experts: {experts.keys()}")
3102+
3103+
31043104
###### CONVERSION LOGIC ######
31053105

31063106

0 commit comments

Comments
 (0)