Skip to content

Commit 32c2df2

Browse files
committed
Update llama-quant.cpp
1 parent 4d87133 commit 32c2df2

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

src/llama-quant.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,8 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
216216
if (i_layer < 9) new_type = GGML_TYPE_IQ2_XXS; // 2.06 bpw
217217
}
218218
else {
219-
if (i_layer < 6) new_type = GGML_TYPE_Q3_K;
220-
else new_type = GGML_TYPE_Q2_K;
219+
if (i_layer < 6) new_type = GGML_TYPE_Q4_K;
220+
else new_type = GGML_TYPE_Q3_K;
221221
}
222222
++qs.i_ffn_down;
223223
}
@@ -338,7 +338,8 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
338338
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) {
339339
// Layers 0, 1, 2 are Dense so Q4_K
340340
// 3, 4, 5 left as Q3_K
341-
new_type = GGML_TYPE_Q3_K;
341+
if (i_layer < 6) new_type = GGML_TYPE_Q4_K;
342+
else new_type = GGML_TYPE_Q3_K;
342343
}
343344
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S) {
344345
if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K;

0 commit comments

Comments
 (0)