@@ -17,6 +17,7 @@ struct quant_option {
17
17
18
18
static const std::vector<struct quant_option > QUANT_OPTIONS = {
19
19
{ " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G, +0.4685 ppl @ Llama-3-8B" , },
20
+ { " Q4_0_B16" , LLAMA_FTYPE_MOSTLY_Q4_0_B16, " 3.56G, 5.9624 +/- 0.03348 ppl @ LLaMA-v2-7B" , },
20
21
{ " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G, +0.4511 ppl @ Llama-3-8B" , },
21
22
{ " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G, +0.1316 ppl @ Llama-3-8B" , },
22
23
{ " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 5.65G, +0.1062 ppl @ Llama-3-8B" , },
@@ -46,6 +47,7 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
46
47
{ " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 5.33G, +0.0569 ppl @ Llama-3-8B" , },
47
48
{ " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 6.14G, +0.0217 ppl @ Llama-3-8B" , },
48
49
{ " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 7.96G, +0.0026 ppl @ Llama-3-8B" , },
50
+ { " Q8_0_B16" , LLAMA_FTYPE_MOSTLY_Q8_0_B16, " 6.70G, 5.8011 +/- 0.03239 ppl @ LLaMA-v1-7B" , },
49
51
{ " F16" , LLAMA_FTYPE_MOSTLY_F16, " 14.00G, +0.0020 ppl @ Mistral-7B" , },
50
52
{ " BF16" , LLAMA_FTYPE_MOSTLY_BF16, " 14.00G, -0.0050 ppl @ Mistral-7B" , },
51
53
{ " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
0 commit comments