@@ -1519,18 +1519,11 @@ static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * G
1519
1519
1520
1520
uint8_t L [QK_K ];
1521
1521
uint8_t Laux [16 ];
1522
- int8_t Lsaux [16 ];
1523
1522
float mins [QK_K /16 ];
1524
1523
float scales [QK_K /16 ];
1525
1524
float sw [QK_K /16 ];
1526
1525
float weight [16 ];
1527
- int8_t Ls [QK_K /16 ], Lm [QK_K /16 ];
1528
-
1529
- struct k_heap_cell heap_cells_s [QK_K /16 ];
1530
- float odd_s [16 ];
1531
- struct k_heap k_heap_s ;
1532
-
1533
- k_heap_init_linear (& k_heap_s , 0 , 15 , heap_cells_s , odd_s );
1526
+ uint8_t Ls [QK_K /16 ], Lm [QK_K /16 ];
1534
1527
1535
1528
for (int i = 0 ; i < nb ; i ++ ) {
1536
1529
memset (sw , 0 , QK_K /16 * sizeof (float ));
@@ -1545,8 +1538,8 @@ static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * G
1545
1538
}
1546
1539
1547
1540
float dm , mm ;
1548
- dm = make_qkxh_quants (QK_K /16 , scales , sw , Ls , Lsaux , & k_heap_s , false );
1549
- mm = make_qkxh_quants (QK_K /16 , mins , sw , Lm , Lsaux , & k_heap_s , false );
1541
+ dm = make_qp_quants (QK_K /16 , 15 , scales , Ls , sw );
1542
+ mm = make_qp_quants (QK_K /16 , 15 , mins , Lm , sw );
1550
1543
1551
1544
y [i ].d = GGML_FP32_TO_FP16 (dm );
1552
1545
y [i ].dmin = GGML_FP32_TO_FP16 (mm );
0 commit comments