@@ -676,14 +676,14 @@ typedef struct {
676
676
} block_q4_3 ;
677
677
static_assert (sizeof (block_q4_3 ) == 2 * sizeof (ggml_fp16_t ) + QK4_3 / 2 , "wrong q4_3 block size/padding" );
678
678
679
- #define QK5_0 32
679
+ #define QK5_1 32
680
680
typedef struct {
681
681
ggml_fp16_t d ; // delta
682
682
ggml_fp16_t m ; // min
683
683
uint32_t qh ; // 5-th bit of quants
684
- uint8_t qs [QK5_0 / 2 ]; // nibbles / quants
685
- } block_q5_0 ;
686
- static_assert (sizeof (block_q5_0 ) == 2 * sizeof (ggml_fp16_t ) + sizeof (uint32_t ) + QK5_0 / 2 , "wrong q5_0 block size/padding" );
684
+ uint8_t qs [QK5_1 / 2 ]; // nibbles / quants
685
+ } block_q5_1 ;
686
+ static_assert (sizeof (block_q5_1 ) == 2 * sizeof (ggml_fp16_t ) + sizeof (uint32_t ) + QK5_1 / 2 , "wrong q5_1 block size/padding" );
687
687
688
688
#define QK8_0 32
689
689
typedef struct {
@@ -1300,16 +1300,16 @@ static void quantize_row_q4_3(const float * restrict x, void * restrict vy, int
1300
1300
quantize_row_q4_3_reference (x , y , k );
1301
1301
}
1302
1302
1303
- static void quantize_row_q5_0_reference (const float * restrict x , block_q5_0 * restrict y , int k ) {
1304
- assert (k % QK5_0 == 0 );
1305
- const int nb = k / QK5_0 ;
1303
+ static void quantize_row_q5_1_reference (const float * restrict x , block_q5_1 * restrict y , int k ) {
1304
+ assert (k % QK5_1 == 0 );
1305
+ const int nb = k / QK5_1 ;
1306
1306
1307
1307
for (int i = 0 ; i < nb ; i ++ ) {
1308
1308
float min = FLT_MAX ;
1309
1309
float max = - FLT_MAX ;
1310
1310
1311
- for (int l = 0 ; l < QK5_0 ; l ++ ) {
1312
- const float v = x [i * QK5_0 + l ];
1311
+ for (int l = 0 ; l < QK5_1 ; l ++ ) {
1312
+ const float v = x [i * QK5_1 + l ];
1313
1313
if (v < min ) min = v ;
1314
1314
if (v > max ) max = v ;
1315
1315
}
@@ -1321,9 +1321,9 @@ static void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * r
1321
1321
y [i ].m = GGML_FP32_TO_FP16 (min );
1322
1322
y [i ].qh = 0 ;
1323
1323
1324
- for (int l = 0 ; l < QK5_0 ; l += 2 ) {
1325
- const float v0 = (x [i * QK5_0 + l + 0 ] - min )* id ;
1326
- const float v1 = (x [i * QK5_0 + l + 1 ] - min )* id ;
1324
+ for (int l = 0 ; l < QK5_1 ; l += 2 ) {
1325
+ const float v0 = (x [i * QK5_1 + l + 0 ] - min )* id ;
1326
+ const float v1 = (x [i * QK5_1 + l + 1 ] - min )* id ;
1327
1327
1328
1328
const uint32_t vi0 = (int ) (v0 + 0.5f );
1329
1329
const uint32_t vi1 = (int ) (v1 + 0.5f );
@@ -1337,12 +1337,12 @@ static void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * r
1337
1337
}
1338
1338
}
1339
1339
1340
- static void quantize_row_q5_0 (const float * restrict x , void * restrict vy , int k ) {
1341
- assert (k % QK5_0 == 0 );
1340
+ static void quantize_row_q5_1 (const float * restrict x , void * restrict vy , int k ) {
1341
+ assert (k % QK5_1 == 0 );
1342
1342
1343
- block_q5_0 * restrict y = vy ;
1343
+ block_q5_1 * restrict y = vy ;
1344
1344
1345
- quantize_row_q5_0_reference (x , y , k );
1345
+ quantize_row_q5_1_reference (x , y , k );
1346
1346
}
1347
1347
1348
1348
// reference implementation for deterministic creation of model files
@@ -1861,11 +1861,11 @@ static void dequantize_row_q4_3(const void * restrict vx, float * restrict y, in
1861
1861
}
1862
1862
}
1863
1863
1864
- static void dequantize_row_q5_0 (const void * restrict vx , float * restrict y , int k ) {
1865
- assert (k % QK5_0 == 0 );
1866
- const int nb = k / QK5_0 ;
1864
+ static void dequantize_row_q5_1 (const void * restrict vx , float * restrict y , int k ) {
1865
+ assert (k % QK5_1 == 0 );
1866
+ const int nb = k / QK5_1 ;
1867
1867
1868
- const block_q5_0 * restrict x = vx ;
1868
+ const block_q5_1 * restrict x = vx ;
1869
1869
1870
1870
for (int i = 0 ; i < nb ; i ++ ) {
1871
1871
const float d = GGML_FP16_TO_FP32 (x [i ].d );
@@ -1875,7 +1875,7 @@ static void dequantize_row_q5_0(const void * restrict vx, float * restrict y, in
1875
1875
1876
1876
const uint32_t qh = x [i ].qh ;
1877
1877
1878
- for (int l = 0 ; l < QK5_0 ; l += 2 ) {
1878
+ for (int l = 0 ; l < QK5_1 ; l += 2 ) {
1879
1879
const uint8_t vi = pp [l /2 ];
1880
1880
1881
1881
// extract the 5-th bit from qh
@@ -1888,11 +1888,11 @@ static void dequantize_row_q5_0(const void * restrict vx, float * restrict y, in
1888
1888
const float v0 = vi0 * d + m ;
1889
1889
const float v1 = vi1 * d + m ;
1890
1890
1891
- y [i * QK5_0 + l + 0 ] = v0 ;
1892
- y [i * QK5_0 + l + 1 ] = v1 ;
1891
+ y [i * QK5_1 + l + 0 ] = v0 ;
1892
+ y [i * QK5_1 + l + 1 ] = v1 ;
1893
1893
1894
- assert (!isnan (y [i * QK5_0 + l + 0 ]));
1895
- assert (!isnan (y [i * QK5_0 + l + 1 ]));
1894
+ assert (!isnan (y [i * QK5_1 + l + 0 ]));
1895
+ assert (!isnan (y [i * QK5_1 + l + 1 ]));
1896
1896
}
1897
1897
}
1898
1898
}
@@ -1918,7 +1918,7 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
1918
1918
static void ggml_vec_dot_q4_1_q8_1 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1919
1919
static void ggml_vec_dot_q4_2_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1920
1920
static void ggml_vec_dot_q4_3_q8_1 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1921
- static void ggml_vec_dot_q5_0_q8_1 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1921
+ static void ggml_vec_dot_q5_1_q8_1 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1922
1922
static void ggml_vec_dot_q8_0_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1923
1923
1924
1924
static const quantize_fns_t quantize_fns [GGML_TYPE_COUNT ] = {
@@ -1954,12 +1954,12 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
1954
1954
.vec_dot_q = ggml_vec_dot_q4_3_q8_1 ,
1955
1955
.vec_dot_type = GGML_TYPE_Q8_1 ,
1956
1956
},
1957
- [GGML_TYPE_Q5_0 ] = {
1958
- .dequantize_row_q = dequantize_row_q5_0 ,
1959
- .quantize_row_q = quantize_row_q5_0 ,
1960
- .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q5_0_reference ,
1957
+ [GGML_TYPE_Q5_1 ] = {
1958
+ .dequantize_row_q = dequantize_row_q5_1 ,
1959
+ .quantize_row_q = quantize_row_q5_1 ,
1960
+ .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q5_1_reference ,
1961
1961
.quantize_row_q_dot = quantize_row_q8_1 ,
1962
- .vec_dot_q = ggml_vec_dot_q5_0_q8_1 ,
1962
+ .vec_dot_q = ggml_vec_dot_q5_1_q8_1 ,
1963
1963
.vec_dot_type = GGML_TYPE_Q8_1 ,
1964
1964
},
1965
1965
[GGML_TYPE_Q8_0 ] = {
@@ -3169,14 +3169,14 @@ static void ggml_vec_dot_q4_3_q8_1(const int n, float * restrict s, const void *
3169
3169
#endif
3170
3170
}
3171
3171
3172
- static void ggml_vec_dot_q5_0_q8_1 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy ) {
3172
+ static void ggml_vec_dot_q5_1_q8_1 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy ) {
3173
3173
const int nb = n / QK8_1 ;
3174
3174
3175
3175
assert (n % QK8_1 == 0 );
3176
3176
assert (nb % 2 == 0 );
3177
- assert (QK8_1 == QK5_0 );
3177
+ assert (QK8_1 == QK5_1 );
3178
3178
3179
- const block_q5_0 * restrict x = vx ;
3179
+ const block_q5_1 * restrict x = vx ;
3180
3180
const block_q8_1 * restrict y = vy ;
3181
3181
3182
3182
#if defined(__ARM_NEON )
@@ -3187,7 +3187,7 @@ static void ggml_vec_dot_q5_0_q8_1(const int n, float * restrict s, const void *
3187
3187
uint64_t tmp [4 ];
3188
3188
3189
3189
for (int i = 0 ; i < nb ; ++ i ) {
3190
- const block_q5_0 * restrict x0 = & x [i ];
3190
+ const block_q5_1 * restrict x0 = & x [i ];
3191
3191
const block_q8_1 * restrict y0 = & y [i ];
3192
3192
3193
3193
summs += GGML_FP16_TO_FP32 (x0 -> m ) * (y0 -> s0 + y0 -> s1 );
@@ -3646,7 +3646,7 @@ static const int GGML_BLCK_SIZE[GGML_TYPE_COUNT] = {
3646
3646
[GGML_TYPE_Q4_1 ] = QK4_1 ,
3647
3647
[GGML_TYPE_Q4_2 ] = QK4_2 ,
3648
3648
[GGML_TYPE_Q4_3 ] = QK4_3 ,
3649
- [GGML_TYPE_Q5_0 ] = QK5_0 ,
3649
+ [GGML_TYPE_Q5_1 ] = QK5_1 ,
3650
3650
[GGML_TYPE_Q8_0 ] = QK8_0 ,
3651
3651
[GGML_TYPE_Q8_1 ] = QK8_1 ,
3652
3652
[GGML_TYPE_I8 ] = 1 ,
@@ -3662,7 +3662,7 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
3662
3662
[GGML_TYPE_Q4_1 ] = sizeof (block_q4_1 ),
3663
3663
[GGML_TYPE_Q4_2 ] = sizeof (block_q4_2 ),
3664
3664
[GGML_TYPE_Q4_3 ] = sizeof (block_q4_3 ),
3665
- [GGML_TYPE_Q5_0 ] = sizeof (block_q5_0 ),
3665
+ [GGML_TYPE_Q5_1 ] = sizeof (block_q5_1 ),
3666
3666
[GGML_TYPE_Q8_0 ] = sizeof (block_q8_0 ),
3667
3667
[GGML_TYPE_Q8_1 ] = sizeof (block_q8_1 ),
3668
3668
[GGML_TYPE_I8 ] = sizeof (int8_t ),
@@ -3679,7 +3679,7 @@ static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
3679
3679
[GGML_TYPE_Q4_1 ] = "q4_1" ,
3680
3680
[GGML_TYPE_Q4_2 ] = "q4_2" ,
3681
3681
[GGML_TYPE_Q4_3 ] = "q4_3" ,
3682
- [GGML_TYPE_Q5_0 ] = "q5_0 " ,
3682
+ [GGML_TYPE_Q5_1 ] = "q5_1 " ,
3683
3683
[GGML_TYPE_Q8_0 ] = "q8_0" ,
3684
3684
[GGML_TYPE_Q8_1 ] = "q8_1" ,
3685
3685
[GGML_TYPE_I8 ] = "i8" ,
@@ -3695,7 +3695,7 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
3695
3695
[GGML_TYPE_Q4_1 ] = true,
3696
3696
[GGML_TYPE_Q4_2 ] = true,
3697
3697
[GGML_TYPE_Q4_3 ] = true,
3698
- [GGML_TYPE_Q5_0 ] = true,
3698
+ [GGML_TYPE_Q5_1 ] = true,
3699
3699
[GGML_TYPE_Q8_0 ] = true,
3700
3700
[GGML_TYPE_Q8_1 ] = true,
3701
3701
[GGML_TYPE_I8 ] = false,
@@ -6923,7 +6923,7 @@ static void ggml_compute_forward_add(
6923
6923
case GGML_TYPE_Q4_1 :
6924
6924
case GGML_TYPE_Q4_2 :
6925
6925
case GGML_TYPE_Q4_3 :
6926
- case GGML_TYPE_Q5_0 :
6926
+ case GGML_TYPE_Q5_1 :
6927
6927
case GGML_TYPE_Q8_0 :
6928
6928
{
6929
6929
ggml_compute_forward_add_q_f32 (params , src0 , src1 , dst );
@@ -8412,8 +8412,8 @@ static void ggml_compute_forward_mul_mat_q_f32(
8412
8412
else if (type == GGML_TYPE_Q4_3 ) {
8413
8413
dequantize_row_q_cuda = dequantize_row_q4_3_cuda ;
8414
8414
}
8415
- else if (type == GGML_TYPE_Q5_0 ) {
8416
- dequantize_row_q_cuda = dequantize_row_q5_0_cuda ;
8415
+ else if (type == GGML_TYPE_Q5_1 ) {
8416
+ dequantize_row_q_cuda = dequantize_row_q5_1_cuda ;
8417
8417
}
8418
8418
else if (type == GGML_TYPE_Q8_0 ) {
8419
8419
dequantize_row_q_cuda = dequantize_row_q8_0_cuda ;
@@ -8573,7 +8573,7 @@ static void ggml_compute_forward_mul_mat(
8573
8573
case GGML_TYPE_Q4_1 :
8574
8574
case GGML_TYPE_Q4_2 :
8575
8575
case GGML_TYPE_Q4_3 :
8576
- case GGML_TYPE_Q5_0 :
8576
+ case GGML_TYPE_Q5_1 :
8577
8577
case GGML_TYPE_Q8_0 :
8578
8578
case GGML_TYPE_Q8_1 :
8579
8579
{
@@ -8804,7 +8804,7 @@ static void ggml_compute_forward_get_rows(
8804
8804
case GGML_TYPE_Q4_1 :
8805
8805
case GGML_TYPE_Q4_2 :
8806
8806
case GGML_TYPE_Q4_3 :
8807
- case GGML_TYPE_Q5_0 :
8807
+ case GGML_TYPE_Q5_1 :
8808
8808
case GGML_TYPE_Q8_0 :
8809
8809
case GGML_TYPE_Q8_1 :
8810
8810
{
@@ -12598,17 +12598,17 @@ size_t ggml_quantize_q4_3(const float * src, void * dst, int n, int k, int64_t *
12598
12598
return (n /QK4_3 * sizeof (block_q4_3 ));
12599
12599
}
12600
12600
12601
- size_t ggml_quantize_q5_0 (const float * src , void * dst , int n , int k , int64_t * hist ) {
12602
- assert (k % QK5_0 == 0 );
12603
- const int nb = k / QK5_0 ;
12601
+ size_t ggml_quantize_q5_1 (const float * src , void * dst , int n , int k , int64_t * hist ) {
12602
+ assert (k % QK5_1 == 0 );
12603
+ const int nb = k / QK5_1 ;
12604
12604
12605
12605
for (int j = 0 ; j < n ; j += k ) {
12606
- block_q5_0 * restrict y = (block_q5_0 * )dst + j /QK5_0 ;
12606
+ block_q5_1 * restrict y = (block_q5_1 * )dst + j /QK5_1 ;
12607
12607
12608
- quantize_row_q5_0_reference (src + j , y , k );
12608
+ quantize_row_q5_1_reference (src + j , y , k );
12609
12609
12610
12610
for (int i = 0 ; i < nb ; i ++ ) {
12611
- for (int l = 0 ; l < QK5_0 ; l += 2 ) {
12611
+ for (int l = 0 ; l < QK5_1 ; l += 2 ) {
12612
12612
const uint8_t vh0 = ((y [i ].qh & (1 << (l + 0 ))) >> (l + 0 )) << 4 ;
12613
12613
const uint8_t vh1 = ((y [i ].qh & (1 << (l + 1 ))) >> (l + 1 )) << 4 ;
12614
12614
@@ -12622,7 +12622,7 @@ size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t *
12622
12622
}
12623
12623
}
12624
12624
12625
- return (n /QK5_0 * sizeof (block_q5_0 ));
12625
+ return (n /QK5_1 * sizeof (block_q5_1 ));
12626
12626
}
12627
12627
12628
12628
size_t ggml_quantize_q8_0 (const float * src , void * dst , int n , int k , int64_t * hist ) {
@@ -12673,11 +12673,11 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
12673
12673
block_q4_3 * block = (block_q4_3 * )dst + start / QK4_3 ;
12674
12674
result = ggml_quantize_q4_3 (src + start , block , n , n , hist );
12675
12675
} break ;
12676
- case GGML_TYPE_Q5_0 :
12676
+ case GGML_TYPE_Q5_1 :
12677
12677
{
12678
- GGML_ASSERT (start % QK5_0 == 0 );
12679
- block_q5_0 * block = (block_q5_0 * )dst + start / QK5_0 ;
12680
- result = ggml_quantize_q5_0 (src + start , block , n , n , hist );
12678
+ GGML_ASSERT (start % QK5_1 == 0 );
12679
+ block_q5_1 * block = (block_q5_1 * )dst + start / QK5_1 ;
12680
+ result = ggml_quantize_q5_1 (src + start , block , n , n , hist );
12681
12681
} break ;
12682
12682
case GGML_TYPE_Q8_0 :
12683
12683
{
0 commit comments