Skip to content

Commit 2f13a1e

Browse files
committed
Introduce Q4_0 and Q8_0 quantizations with BF16 delta values
1 parent 7e72aa7 commit 2f13a1e

File tree

8 files changed

+1105
-69
lines changed

8 files changed

+1105
-69
lines changed

ggml/include/ggml.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ extern "C" {
346346

347347
// google brain half-precision bfloat16
348348
typedef struct { uint16_t bits; } ggml_bf16_t;
349+
GGML_API ggml_bf16_t ggml_make_bf16(uint16_t val);
349350
GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
350351
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
351352
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
@@ -431,9 +432,14 @@ extern "C" {
431432
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
432433
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
433434
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
435+
<<<<<<< HEAD
434436
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
435437
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
436438
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
439+
=======
440+
GGML_FTYPE_MOSTLY_Q4_0_B16 = 25, // except 1d tensors
441+
GGML_FTYPE_MOSTLY_Q8_0_B16 = 26, // except 1d tensors
442+
>>>>>>> ed837022 (Introduce Q4_0 and Q8_0 quantizations with BF16 delta values)
437443
};
438444

439445
// available tensor operations:

ggml/src/ggml-impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
#if defined(_MSC_VER)
2121

2222
#define m512bh(p) p
23+
#define m128bh(p) p
2324
#define m512i(p) p
2425

2526
#else
2627

2728
#define m512bh(p) (__m512bh)(p)
29+
#define m128bh(p) (__m128bh)(p)
2830
#define m512i(p) (__m512i)(p)
2931

3032
#endif

0 commit comments

Comments
 (0)