Skip to content

Commit 4bca666

Browse files
committed
Introduce Q4_0 and Q8_0 quantizations with BF16 delta values
1 parent 090fca7 commit 4bca666

File tree

8 files changed

+1109
-70
lines changed

8 files changed

+1109
-70
lines changed

ggml/include/ggml.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ extern "C" {
342342

343343
// google brain half-precision bfloat16
344344
typedef struct { uint16_t bits; } ggml_bf16_t;
345+
GGML_API ggml_bf16_t ggml_make_bf16(uint16_t val);
345346
GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
346347
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
347348
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
@@ -427,9 +428,14 @@ extern "C" {
427428
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
428429
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
429430
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
431+
<<<<<<< HEAD
430432
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
431433
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
432434
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
435+
=======
436+
GGML_FTYPE_MOSTLY_Q4_0_B16 = 25, // except 1d tensors
437+
GGML_FTYPE_MOSTLY_Q8_0_B16 = 26, // except 1d tensors
438+
>>>>>>> ed837022 (Introduce Q4_0 and Q8_0 quantizations with BF16 delta values)
433439
};
434440

435441
// available tensor operations:

ggml/src/ggml-impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
#if defined(_MSC_VER)
2121

2222
#define m512bh(p) p
23+
#define m128bh(p) p
2324
#define m512i(p) p
2425

2526
#else
2627

2728
#define m512bh(p) (__m512bh)(p)
29+
#define m128bh(p) (__m128bh)(p)
2830
#define m512i(p) (__m512i)(p)
2931

3032
#endif

0 commit comments

Comments
 (0)