@@ -380,6 +380,39 @@ def load(model_plus: ModelPlus) -> "Params":
380
380
return params
381
381
382
382
383
+ @dataclass
384
+ class Metadata :
385
+ name : Optional [str ] = None
386
+ author : Optional [str ] = None
387
+ version : Optional [str ] = None
388
+ url : Optional [str ] = None
389
+ description : Optional [str ] = None
390
+ licence : Optional [str ] = None
391
+ source_url : Optional [str ] = None
392
+ source_hf_repo : Optional [str ] = None
393
+
394
+ @staticmethod
395
+ def load (metadata_path : Path ) -> "Metadata" :
396
+ if metadata_path is None or not metadata_path .exists ():
397
+ return Metadata ()
398
+
399
+ with open (metadata_path , 'r' ) as file :
400
+ data = json .load (file )
401
+
402
+ # Create a new Metadata instance
403
+ metadata = Metadata ()
404
+
405
+ # Assigning values to Metadata attributes if they exist in the JSON file
406
+ metadata .name = data .get ("general.name" )
407
+ metadata .author = data .get ("general.author" )
408
+ metadata .version = data .get ("general.version" )
409
+ metadata .url = data .get ("general.url" )
410
+ metadata .description = data .get ("general.description" )
411
+ metadata .license = data .get ("general.license" )
412
+
413
+ return metadata
414
+
415
+
383
416
class BpeVocab : # GPT
384
417
def __init__ (
385
418
self , fname_tokenizer : Path , fname_added_tokens : Optional [Path ]
@@ -1026,16 +1059,37 @@ def __init__(
1026
1059
fname_out , gguf .MODEL_ARCH_NAMES [ARCH ], endianess = endianess
1027
1060
)
1028
1061
1029
- def add_meta_arch (self , params : Params ) -> None :
1062
+ def add_meta_model (self , params : Params , metadata : Metadata ) -> None :
1063
+ # Metadata About The Model And It's Provenence
1030
1064
name = "LLaMA"
1031
-
1032
- # TODO: better logic to determine model name
1033
- if params .n_ctx == 4096 :
1034
- name = "LLaMA v2"
1065
+ if metadata is not None and metadata .name is not None :
1066
+ name = metadata .name
1035
1067
elif params .path_model is not None :
1036
1068
name = str (params .path_model .parent ).split ("/" )[- 1 ]
1069
+ elif params .n_ctx == 4096 :
1070
+ # Heuristic detection of LLaMA v2 model
1071
+ name = "LLaMA v2"
1037
1072
1038
1073
self .gguf .add_name (name )
1074
+
1075
+ if metadata is not None :
1076
+ if metadata .author is not None :
1077
+ self .gguf .add_author (metadata .author )
1078
+ if metadata .version is not None :
1079
+ self .gguf .add_version (metadata .version )
1080
+ if metadata .url is not None :
1081
+ self .gguf .add_url (metadata .url )
1082
+ if metadata .description is not None :
1083
+ self .gguf .add_description (metadata .description )
1084
+ if metadata .licence is not None :
1085
+ self .gguf .add_licence (metadata .licence )
1086
+ if metadata .source_url is not None :
1087
+ self .gguf .add_source_url (metadata .source_url )
1088
+ if metadata .source_hf_repo is not None :
1089
+ self .gguf .add_source_hf_repo (metadata .source_hf_repo )
1090
+
1091
+ def add_meta_arch (self , params : Params ) -> None :
1092
+ # Metadata About The Neural Architecture Itself
1039
1093
self .gguf .add_context_length (params .n_ctx )
1040
1094
self .gguf .add_embedding_length (params .n_embd )
1041
1095
self .gguf .add_block_count (params .n_layer )
@@ -1146,12 +1200,14 @@ def write_vocab_only(
1146
1200
svocab : gguf .SpecialVocab ,
1147
1201
endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ,
1148
1202
pad_vocab : bool = False ,
1203
+ metadata : Metadata = None ,
1149
1204
) -> None :
1150
1205
check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1151
1206
1152
1207
of = OutputFile (fname_out , endianess = endianess )
1153
1208
1154
1209
# meta data
1210
+ of .add_meta_model (params , metadata )
1155
1211
of .add_meta_arch (params )
1156
1212
of .add_meta_vocab (vocab )
1157
1213
of .add_meta_special_vocab (svocab )
@@ -1184,12 +1240,14 @@ def write_all(
1184
1240
concurrency : int = DEFAULT_CONCURRENCY ,
1185
1241
endianess : gguf .GGUFEndian = gguf .GGUFEndian .LITTLE ,
1186
1242
pad_vocab : bool = False ,
1243
+ metadata : Metadata = None ,
1187
1244
) -> None :
1188
1245
check_vocab_size (params , vocab , pad_vocab = pad_vocab )
1189
1246
1190
1247
of = OutputFile (fname_out , endianess = endianess )
1191
1248
1192
1249
# meta data
1250
+ of .add_meta_model (params , metadata )
1193
1251
of .add_meta_arch (params )
1194
1252
of .add_meta_vocab (vocab )
1195
1253
of .add_meta_special_vocab (svocab )
@@ -1463,7 +1521,7 @@ def load_vocab(
1463
1521
return vocab , special_vocab
1464
1522
1465
1523
1466
- def default_outfile (model_paths : list [Path ], file_type : GGMLFileType , params : Params , model_params_count : int ) -> Path :
1524
+ def default_outfile (model_paths : list [Path ], file_type : GGMLFileType , params : Params , model_params_count : int , metadata : Metadata ) -> Path :
1467
1525
quantization = {
1468
1526
GGMLFileType .AllF32 : "f32" ,
1469
1527
GGMLFileType .MostlyF16 : "f16" ,
@@ -1472,11 +1530,17 @@ def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Pa
1472
1530
1473
1531
parameters = model_parameter_count_rounded_notation (model_params_count )
1474
1532
1533
+ version = ""
1534
+ if metadata is not None and metadata .version is not None :
1535
+ version = f"-{ metadata .version } "
1536
+
1475
1537
name = "ggml-model"
1476
- if params .path_model is not None :
1538
+ if metadata is not None and metadata .name is not None :
1539
+ name = metadata .name
1540
+ elif params .path_model is not None :
1477
1541
name = params .path_model .name
1478
1542
1479
- ret = model_paths [0 ].parent / f"{ name } -{ parameters } -{ quantization } .gguf"
1543
+ ret = model_paths [0 ].parent / f"{ name } { version } -{ parameters } -{ quantization } .gguf"
1480
1544
if ret in model_paths :
1481
1545
sys .stderr .write (
1482
1546
f"Error: Default output path ({ ret } ) would overwrite the input. "
@@ -1585,13 +1649,22 @@ def get_argument_parser() -> ArgumentParser:
1585
1649
help = "Indicate that the model is executed on a big-endian machine" ,
1586
1650
)
1587
1651
1652
+ # https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#general-metadata
1653
+ parser .add_argument (
1654
+ "--metadata" ,
1655
+ type = Path ,
1656
+ help = "Specify the path for a metadata file" ,
1657
+ )
1658
+
1588
1659
return parser
1589
1660
1590
1661
1591
1662
def main (argv : Optional [list [str ]] = None ) -> None :
1592
1663
parser = get_argument_parser ()
1593
1664
args = parser .parse_args (argv )
1594
1665
1666
+ metadata = Metadata .load (args .metadata )
1667
+
1595
1668
if args .awq_path :
1596
1669
sys .path .insert (1 , str (Path (__file__ ).resolve ().parent / "awq-py" ))
1597
1670
from awq .apply_awq import add_scale_weights
@@ -1665,6 +1738,7 @@ def main(argv: Optional[list[str]] = None) -> None:
1665
1738
special_vocab ,
1666
1739
endianess = endianess ,
1667
1740
pad_vocab = args .pad_vocab ,
1741
+ metadata = metadata ,
1668
1742
)
1669
1743
print (f"Wrote { outfile } " )
1670
1744
return
@@ -1683,7 +1757,7 @@ def main(argv: Optional[list[str]] = None) -> None:
1683
1757
model = convert_model_names (model , params )
1684
1758
ftype = pick_output_type (model , args .outtype )
1685
1759
model = convert_to_output_type (model , ftype )
1686
- outfile = args .outfile or default_outfile (model_plus .paths , ftype , params , model_params_count )
1760
+ outfile = args .outfile or default_outfile (model_plus .paths , ftype , params , model_params_count , metadata )
1687
1761
1688
1762
params .ftype = ftype
1689
1763
print (f"Writing { outfile } , format { ftype } " )
@@ -1698,6 +1772,7 @@ def main(argv: Optional[list[str]] = None) -> None:
1698
1772
concurrency = args .concurrency ,
1699
1773
endianess = endianess ,
1700
1774
pad_vocab = args .pad_vocab ,
1775
+ metadata = metadata ,
1701
1776
)
1702
1777
print (f"Wrote { outfile } " )
1703
1778
0 commit comments