@@ -1041,7 +1041,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
1041
1041
.is_quantized = true,
1042
1042
.to_float = (ggml_to_float_t) dequantize_row_q4_0_b16,
1043
1043
.from_float = quantize_row_q4_0_b16,
1044
- .from_float_reference = (ggml_from_float_t) quantize_row_q4_0_b16_ref,
1044
+ .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_b16_ref,
1045
1045
.vec_dot = ggml_vec_dot_q4_0_b16_q8_0_b16,
1046
1046
.vec_dot_type = GGML_TYPE_Q8_0_B16,
1047
1047
.nrows = 1,
@@ -1053,7 +1053,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
1053
1053
.is_quantized = true,
1054
1054
.to_float = (ggml_to_float_t) dequantize_row_q8_0_b16,
1055
1055
.from_float = quantize_row_q8_0_b16,
1056
- .from_float_reference = (ggml_from_float_t) quantize_row_q8_0_b16_ref,
1056
+ .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_b16_ref,
1057
1057
.vec_dot = ggml_vec_dot_q8_0_b16_q8_0_b16,
1058
1058
.vec_dot_type = GGML_TYPE_Q8_0_B16,
1059
1059
.nrows = 1,
@@ -3334,15 +3334,16 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
3334
3334
enum ggml_type wtype = GGML_TYPE_COUNT;
3335
3335
3336
3336
switch (ftype) {
3337
- <<<<<<< HEAD
3338
3337
case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
3339
3338
case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
3340
3339
case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
3341
3340
case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
3341
+ case GGML_FTYPE_MOSTLY_Q4_0_B16: wtype = GGML_TYPE_Q4_0_B16; break;
3342
3342
case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
3343
3343
case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
3344
3344
case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
3345
3345
case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
3346
+ case GGML_FTYPE_MOSTLY_Q8_0_B16: wtype = GGML_TYPE_Q8_0_B16; break;
3346
3347
case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
3347
3348
case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
3348
3349
case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
@@ -3362,34 +3363,6 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
3362
3363
case GGML_FTYPE_MOSTLY_Q4_0_8_8: wtype = GGML_TYPE_Q4_0_8_8; break;
3363
3364
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
3364
3365
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
3365
- =======
3366
- case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
3367
- case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
3368
- case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
3369
- case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
3370
- case GGML_FTYPE_MOSTLY_Q4_0_B16: wtype = GGML_TYPE_Q4_0_B16; break;
3371
- case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
3372
- case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
3373
- case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
3374
- case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
3375
- case GGML_FTYPE_MOSTLY_Q8_0_B16: wtype = GGML_TYPE_Q8_0_B16; break;
3376
- case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
3377
- case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
3378
- case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
3379
- case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break;
3380
- case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
3381
- case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
3382
- case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
3383
- case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
3384
- case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
3385
- case GGML_FTYPE_MOSTLY_IQ1_M: wtype = GGML_TYPE_IQ1_M; break;
3386
- case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
3387
- case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
3388
- case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
3389
- case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
3390
- case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
3391
- case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
3392
- >>>>>>> ed837022 (Introduce Q4_0 and Q8_0 quantizations with BF16 delta values)
3393
3366
}
3394
3367
3395
3368
GGML_ASSERT(wtype != GGML_TYPE_COUNT);
0 commit comments