@@ -17,6 +17,7 @@ struct quant_option {
17
17
18
18
static const std::vector<struct quant_option > QUANT_OPTIONS = {
19
19
{ " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G, +0.2166 ppl @ LLaMA-v1-7B" , },
20
+ { " Q4_0_B16" , LLAMA_FTYPE_MOSTLY_Q4_0_B16, " 3.56G, 5.9624 +/- 0.03348 ppl @ LLaMA-v2-7B" , },
20
21
{ " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1585 ppl @ LLaMA-v1-7B" , },
21
22
{ " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G, +0.0683 ppl @ LLaMA-v1-7B" , },
22
23
{ " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 4.70G, +0.0349 ppl @ LLaMA-v1-7B" , },
@@ -46,6 +47,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
46
47
{ " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0122 ppl @ LLaMA-v1-7B" , },
47
48
{ " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G, +0.0008 ppl @ LLaMA-v1-7B" , },
48
49
{ " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G, +0.0004 ppl @ LLaMA-v1-7B" , },
50
+ { " Q8_0_B16" , LLAMA_FTYPE_MOSTLY_Q8_0_B16, " 6.70G, 5.8011 +/- 0.03239 ppl @ LLaMA-v1-7B" , },
49
51
{ " F16" , LLAMA_FTYPE_MOSTLY_F16, " 14.00G, -0.0020 ppl @ Mistral-7B" , },
50
52
{ " BF16" , LLAMA_FTYPE_MOSTLY_BF16, " 14.00G, -0.0050 ppl @ Mistral-7B" , },
51
53
{ " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
0 commit comments