Skip to content

Commit 85fb55a

Browse files
committed
correct settings
1 parent cfc9544 commit 85fb55a

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/llama.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16654,7 +16654,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1665416654
new_type = (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) ? GGML_TYPE_Q4_K : GGML_TYPE_IQ3_S;
1665516655
}
1665616656
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
16657-
if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K;
16657+
if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
1665816658
else if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
1665916659
else new_type = GGML_TYPE_IQ3_S;
1666016660
}
@@ -16859,24 +16859,24 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1685916859
else new_type = difquant_half_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ2_S : GGML_TYPE_IQ2_XS;
1686016860
}
1686116861
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
16862-
if (qs.model.hparams.n_gqa() >= 8 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
16862+
if (qs.model.hparams.n_gqa() >= 8 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ4_XS;
1686316863
else if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_IQ3_S;
1686416864
else new_type = GGML_TYPE_IQ3_XXS;
1686516865
}
1686616866
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
16867-
if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K;
16867+
if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
1686816868
else if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2)
1686916869
new_type = difquant_first_last_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1687016870
else new_type = difquant_fl_more_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1687116871
}
1687216872
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
16873-
if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K;
16873+
if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
1687416874
else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
1687516875
new_type = difquant_five_eights_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1687616876
else new_type = GGML_TYPE_IQ3_S;
1687716877
}
1687816878
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
16879-
if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q6_K;
16879+
if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K;
1688016880
else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
1688116881
new_type = difquant_first_last_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
1688216882
else new_type = difquant_fl_more_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;

0 commit comments

Comments
 (0)