@@ -952,7 +952,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
952
952
.is_quantized = true,
953
953
.to_float = (ggml_to_float_t) dequantize_row_q4_0_b16,
954
954
.from_float = quantize_row_q4_0_b16,
955
- .from_float_reference = (ggml_from_float_t) quantize_row_q4_0_b16_ref,
955
+ .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_b16_ref,
956
956
.vec_dot = ggml_vec_dot_q4_0_b16_q8_0_b16,
957
957
.vec_dot_type = GGML_TYPE_Q8_0_B16,
958
958
.nrows = 1,
@@ -964,7 +964,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
964
964
.is_quantized = true,
965
965
.to_float = (ggml_to_float_t) dequantize_row_q8_0_b16,
966
966
.from_float = quantize_row_q8_0_b16,
967
- .from_float_reference = (ggml_from_float_t) quantize_row_q8_0_b16_ref,
967
+ .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_b16_ref,
968
968
.vec_dot = ggml_vec_dot_q8_0_b16_q8_0_b16,
969
969
.vec_dot_type = GGML_TYPE_Q8_0_B16,
970
970
.nrows = 1,
@@ -3245,15 +3245,16 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
3245
3245
enum ggml_type wtype = GGML_TYPE_COUNT;
3246
3246
3247
3247
switch (ftype) {
3248
- <<<<<<< HEAD
3249
3248
case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
3250
3249
case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
3251
3250
case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
3252
3251
case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
3252
+ case GGML_FTYPE_MOSTLY_Q4_0_B16: wtype = GGML_TYPE_Q4_0_B16; break;
3253
3253
case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
3254
3254
case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
3255
3255
case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
3256
3256
case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
3257
+ case GGML_FTYPE_MOSTLY_Q8_0_B16: wtype = GGML_TYPE_Q8_0_B16; break;
3257
3258
case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
3258
3259
case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
3259
3260
case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
@@ -3273,34 +3274,6 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
3273
3274
case GGML_FTYPE_MOSTLY_Q4_0_8_8: wtype = GGML_TYPE_Q4_0_8_8; break;
3274
3275
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
3275
3276
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
3276
- =======
3277
- case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
3278
- case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
3279
- case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
3280
- case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
3281
- case GGML_FTYPE_MOSTLY_Q4_0_B16: wtype = GGML_TYPE_Q4_0_B16; break;
3282
- case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
3283
- case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
3284
- case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
3285
- case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
3286
- case GGML_FTYPE_MOSTLY_Q8_0_B16: wtype = GGML_TYPE_Q8_0_B16; break;
3287
- case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
3288
- case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
3289
- case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
3290
- case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break;
3291
- case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
3292
- case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
3293
- case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
3294
- case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
3295
- case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
3296
- case GGML_FTYPE_MOSTLY_IQ1_M: wtype = GGML_TYPE_IQ1_M; break;
3297
- case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
3298
- case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
3299
- case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
3300
- case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
3301
- case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
3302
- case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
3303
- >>>>>>> ed837022 (Introduce Q4_0 and Q8_0 quantizations with BF16 delta values)
3304
3277
}
3305
3278
3306
3279
GGML_ASSERT(wtype != GGML_TYPE_COUNT);
0 commit comments