Skip to content

Commit 63fbbca

Browse files
committed
Update llama-quant.cpp
1 parent 32c2df2 commit 63fbbca

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/llama-quant.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,10 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
213213
// 3, 4, 5 left as Q2_K
214214
if (is_one_bit) {
215215
// 3, 4, 5, 6, 7, 8 left as 2.06 bpw
216-
if (i_layer < 9) new_type = GGML_TYPE_IQ2_XXS; // 2.06 bpw
216+
if (i_layer < 6) new_type = GGML_TYPE_Q4_K; // 4.5 bpw
217+
else if (i_layer < 12) new_type = GGML_TYPE_Q3_K; // 3.5 bpw
218+
else if (i_layer < 18) new_type = GGML_TYPE_IQ2_XXS; // 2.06 bpw
219+
else if (i_layer > 58) new_type = GGML_TYPE_IQ2_XXS; // 3.5 bpw
217220
}
218221
else {
219222
if (i_layer < 6) new_type = GGML_TYPE_Q4_K;

0 commit comments

Comments
 (0)