Skip to content

Commit 8b8b88f

Browse files
committed
ggml-quants : restore Q2_K use of make_qp_quants
Weirdly, it seems like in practice replacing this instance is not better. This is probably because of its interaction with make_qkx3_quants.
1 parent a411397 commit 8b8b88f

File tree

1 file changed

+3
-10
lines changed

1 file changed

+3
-10
lines changed

ggml/src/ggml-quants.c

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,18 +1519,11 @@ static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * G
15191519

15201520
uint8_t L[QK_K];
15211521
uint8_t Laux[16];
1522-
int8_t Lsaux[16];
15231522
float mins[QK_K/16];
15241523
float scales[QK_K/16];
15251524
float sw[QK_K/16];
15261525
float weight[16];
1527-
int8_t Ls[QK_K/16], Lm[QK_K/16];
1528-
1529-
struct k_heap_cell heap_cells_s[QK_K/16];
1530-
float odd_s[16];
1531-
struct k_heap k_heap_s;
1532-
1533-
k_heap_init_linear(&k_heap_s, 0, 15, heap_cells_s, odd_s);
1526+
uint8_t Ls[QK_K/16], Lm[QK_K/16];
15341527

15351528
for (int i = 0; i < nb; i++) {
15361529
memset(sw, 0, QK_K/16*sizeof(float));
@@ -1545,8 +1538,8 @@ static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * G
15451538
}
15461539

15471540
float dm, mm;
1548-
dm = make_qkxh_quants(QK_K/16, scales, sw, Ls, Lsaux, &k_heap_s, false);
1549-
mm = make_qkxh_quants(QK_K/16, mins, sw, Lm, Lsaux, &k_heap_s, false);
1541+
dm = make_qp_quants(QK_K/16, 15, scales, Ls, sw);
1542+
mm = make_qp_quants(QK_K/16, 15, mins, Lm, sw);
15501543

15511544
y[i].d = GGML_FP32_TO_FP16(dm);
15521545
y[i].dmin = GGML_FP32_TO_FP16(mm);

0 commit comments

Comments
 (0)