@@ -343,6 +343,54 @@ def prepare_tensors(self):
343
343
# TODO: use Q4_K and Q6_K
344
344
data_qtype = gguf .GGMLQuantizationType .Q6_0
345
345
346
+ if data_qtype is False and any (
347
+ self .match_model_tensor_name (new_name , key , bid )
348
+ for key in (
349
+ gguf .MODEL_TENSOR .ATTN_Q ,
350
+ )
351
+ ):
352
+ if self .ftype in (
353
+ gguf .LlamaFileType .MOSTLY_Q4_1_S ,
354
+ gguf .LlamaFileType .MOSTLY_Q5_0_S ,
355
+ gguf .LlamaFileType .MOSTLY_Q4_1_XS ,
356
+ gguf .LlamaFileType .MOSTLY_Q5_0_XS ,
357
+ ):
358
+ data_qtype = gguf .GGMLQuantizationType .Q4_0
359
+ elif self .ftype in (
360
+ gguf .LlamaFileType .MOSTLY_Q5_1_S ,
361
+ gguf .LlamaFileType .MOSTLY_Q6_0_S ,
362
+ gguf .LlamaFileType .MOSTLY_Q5_1_XS ,
363
+ gguf .LlamaFileType .MOSTLY_Q6_0_XS ,
364
+ ):
365
+ data_qtype = gguf .GGMLQuantizationType .Q5_0
366
+ elif self .ftype in (
367
+ gguf .LlamaFileType .MOSTLY_Q8_0_S ,
368
+ gguf .LlamaFileType .MOSTLY_Q8_0_XS ,
369
+ ):
370
+ data_qtype = gguf .GGMLQuantizationType .Q6_0
371
+
372
+ if data_qtype is False and any (
373
+ self .match_model_tensor_name (new_name , key , bid )
374
+ for key in (
375
+ gguf .MODEL_TENSOR .FFN_UP ,
376
+ gguf .MODEL_TENSOR .FFN_GATE ,
377
+ )
378
+ ):
379
+ if self .ftype in (
380
+ gguf .LlamaFileType .MOSTLY_Q4_1_XS ,
381
+ gguf .LlamaFileType .MOSTLY_Q5_0_XS ,
382
+ ):
383
+ data_qtype = gguf .GGMLQuantizationType .Q4_0
384
+ elif self .ftype in (
385
+ gguf .LlamaFileType .MOSTLY_Q5_1_XS ,
386
+ gguf .LlamaFileType .MOSTLY_Q6_0_XS ,
387
+ ):
388
+ data_qtype = gguf .GGMLQuantizationType .Q5_0
389
+ elif self .ftype in (
390
+ gguf .LlamaFileType .MOSTLY_Q8_0_XS ,
391
+ ):
392
+ data_qtype = gguf .GGMLQuantizationType .Q6_0
393
+
346
394
# No override (data_qtype is False), or wants to be quantized (data_qtype is True)
347
395
if isinstance (data_qtype , bool ):
348
396
if self .ftype == gguf .LlamaFileType .ALL_F32 :
@@ -363,6 +411,26 @@ def prepare_tensors(self):
363
411
data_qtype = gguf .GGMLQuantizationType .Q6_0
364
412
elif self .ftype == gguf .LlamaFileType .MOSTLY_Q8_0 :
365
413
data_qtype = gguf .GGMLQuantizationType .Q8_0
414
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q4_1_S :
415
+ data_qtype = gguf .GGMLQuantizationType .Q4_1
416
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q5_0_S :
417
+ data_qtype = gguf .GGMLQuantizationType .Q5_0
418
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q5_1_S :
419
+ data_qtype = gguf .GGMLQuantizationType .Q5_1
420
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q6_0_S :
421
+ data_qtype = gguf .GGMLQuantizationType .Q6_0
422
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q8_0_S :
423
+ data_qtype = gguf .GGMLQuantizationType .Q8_0
424
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q4_1_XS :
425
+ data_qtype = gguf .GGMLQuantizationType .Q4_1
426
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q5_0_XS :
427
+ data_qtype = gguf .GGMLQuantizationType .Q5_0
428
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q5_1_XS :
429
+ data_qtype = gguf .GGMLQuantizationType .Q5_1
430
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q6_0_XS :
431
+ data_qtype = gguf .GGMLQuantizationType .Q6_0
432
+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q8_0_XS :
433
+ data_qtype = gguf .GGMLQuantizationType .Q8_0
366
434
elif self .ftype == gguf .LlamaFileType .MOSTLY_TQ1_0 :
367
435
data_qtype = gguf .GGMLQuantizationType .TQ1_0
368
436
elif self .ftype == gguf .LlamaFileType .MOSTLY_TQ2_0 :
@@ -6373,7 +6441,7 @@ def parse_args() -> argparse.Namespace:
6373
6441
help = "path to write to; default: based on input. {ftype} will be replaced by the outtype." ,
6374
6442
)
6375
6443
parser .add_argument (
6376
- "--outtype" , type = str , choices = ["f32" , "f16" , "bf16" , "q8_0" , "q4_0" , "q4_1" , "q5_0" , "q5_1" , "q6_0" , "tq1_0" , "tq2_0" , "auto" ], default = "f16" ,
6444
+ "--outtype" , type = str , choices = ["f32" , "f16" , "bf16" , "q8_0" , "q4_0" , "q4_1" , "q5_0" , "q5_1" , "q6_0" , "q4_1_s" , "q5_0_s" , "q5_1_s" , "q6_0_s" , "q8_0_s" , "q4_1_xs" , "q5_0_xs" , "q5_1_xs" , "q6_0_xs" , "q8_0_xs" , " tq1_0" , "tq2_0" , "auto" ], default = "f16" ,
6377
6445
help = "output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, q4_0, q4_1, q5_0, q5_1, q6_0 for a smaller conversion to then create an iMatrix file for example, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type" ,
6378
6446
)
6379
6447
parser .add_argument (
@@ -6509,6 +6577,16 @@ def main() -> None:
6509
6577
"q5_1" : gguf .LlamaFileType .MOSTLY_Q5_1 ,
6510
6578
"q6_0" : gguf .LlamaFileType .MOSTLY_Q6_0 ,
6511
6579
"q8_0" : gguf .LlamaFileType .MOSTLY_Q8_0 ,
6580
+ "q4_1_s" : gguf .LlamaFileType .MOSTLY_Q4_1_S ,
6581
+ "q5_0_s" : gguf .LlamaFileType .MOSTLY_Q5_0_S ,
6582
+ "q5_1_s" : gguf .LlamaFileType .MOSTLY_Q5_1_S ,
6583
+ "q6_0_s" : gguf .LlamaFileType .MOSTLY_Q6_0_S ,
6584
+ "q8_0_s" : gguf .LlamaFileType .MOSTLY_Q8_0_S ,
6585
+ "q4_1_xs" : gguf .LlamaFileType .MOSTLY_Q4_1_XS ,
6586
+ "q5_0_xs" : gguf .LlamaFileType .MOSTLY_Q5_0_XS ,
6587
+ "q5_1_xs" : gguf .LlamaFileType .MOSTLY_Q5_1_XS ,
6588
+ "q6_0_xs" : gguf .LlamaFileType .MOSTLY_Q6_0_XS ,
6589
+ "q8_0_xs" : gguf .LlamaFileType .MOSTLY_Q8_0_XS ,
6512
6590
"tq1_0" : gguf .LlamaFileType .MOSTLY_TQ1_0 ,
6513
6591
"tq2_0" : gguf .LlamaFileType .MOSTLY_TQ2_0 ,
6514
6592
"auto" : gguf .LlamaFileType .GUESSED ,
0 commit comments