@@ -1028,7 +1028,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
1028
1028
.is_quantized = true,
1029
1029
.to_float = (ggml_to_float_t) dequantize_row_q4_0_b16,
1030
1030
.from_float = quantize_row_q4_0_b16,
1031
- .from_float_reference = (ggml_from_float_t) quantize_row_q4_0_b16_ref,
1031
+ .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_b16_ref,
1032
1032
.vec_dot = ggml_vec_dot_q4_0_b16_q8_0_b16,
1033
1033
.vec_dot_type = GGML_TYPE_Q8_0_B16,
1034
1034
.nrows = 1,
@@ -1040,7 +1040,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
1040
1040
.is_quantized = true,
1041
1041
.to_float = (ggml_to_float_t) dequantize_row_q8_0_b16,
1042
1042
.from_float = quantize_row_q8_0_b16,
1043
- .from_float_reference = (ggml_from_float_t) quantize_row_q8_0_b16_ref,
1043
+ .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_b16_ref,
1044
1044
.vec_dot = ggml_vec_dot_q8_0_b16_q8_0_b16,
1045
1045
.vec_dot_type = GGML_TYPE_Q8_0_B16,
1046
1046
.nrows = 1,
@@ -3321,15 +3321,16 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
3321
3321
enum ggml_type wtype = GGML_TYPE_COUNT;
3322
3322
3323
3323
switch (ftype) {
3324
- <<<<<<< HEAD
3325
3324
case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
3326
3325
case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
3327
3326
case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
3328
3327
case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
3328
+ case GGML_FTYPE_MOSTLY_Q4_0_B16: wtype = GGML_TYPE_Q4_0_B16; break;
3329
3329
case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
3330
3330
case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
3331
3331
case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
3332
3332
case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
3333
+ case GGML_FTYPE_MOSTLY_Q8_0_B16: wtype = GGML_TYPE_Q8_0_B16; break;
3333
3334
case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
3334
3335
case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
3335
3336
case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
@@ -3349,34 +3350,6 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
3349
3350
case GGML_FTYPE_MOSTLY_Q4_0_8_8: wtype = GGML_TYPE_Q4_0_8_8; break;
3350
3351
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
3351
3352
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
3352
- =======
3353
- case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
3354
- case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
3355
- case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
3356
- case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
3357
- case GGML_FTYPE_MOSTLY_Q4_0_B16: wtype = GGML_TYPE_Q4_0_B16; break;
3358
- case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
3359
- case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
3360
- case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
3361
- case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
3362
- case GGML_FTYPE_MOSTLY_Q8_0_B16: wtype = GGML_TYPE_Q8_0_B16; break;
3363
- case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
3364
- case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
3365
- case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
3366
- case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break;
3367
- case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
3368
- case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
3369
- case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
3370
- case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
3371
- case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
3372
- case GGML_FTYPE_MOSTLY_IQ1_M: wtype = GGML_TYPE_IQ1_M; break;
3373
- case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
3374
- case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
3375
- case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
3376
- case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
3377
- case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
3378
- case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
3379
- >>>>>>> ed837022 (Introduce Q4_0 and Q8_0 quantizations with BF16 delta values)
3380
3353
}
3381
3354
3382
3355
GGML_ASSERT(wtype != GGML_TYPE_COUNT);
0 commit comments