Skip to content

Commit ed83702

Browse files
committed
Introduce Q4_0 and Q8_0 quantizations with BF16 delta values
1 parent 86e7299 commit ed83702

File tree

9 files changed

+1357
-190
lines changed

9 files changed

+1357
-190
lines changed

ggml/include/ggml.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ extern "C" {
342342

343343
// google brain half-precision bfloat16
344344
typedef struct { uint16_t bits; } ggml_bf16_t;
345+
GGML_API ggml_bf16_t ggml_make_bf16(uint16_t val);
345346
GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
346347
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
347348
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
@@ -383,6 +384,8 @@ extern "C" {
383384
GGML_TYPE_F64 = 28,
384385
GGML_TYPE_IQ1_M = 29,
385386
GGML_TYPE_BF16 = 30,
387+
GGML_TYPE_Q4_0_B16 = 31,
388+
GGML_TYPE_Q8_0_B16 = 32,
386389
GGML_TYPE_COUNT,
387390
};
388391

@@ -424,6 +427,8 @@ extern "C" {
424427
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
425428
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
426429
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
430+
GGML_FTYPE_MOSTLY_Q4_0_B16 = 25, // except 1d tensors
431+
GGML_FTYPE_MOSTLY_Q8_0_B16 = 26, // except 1d tensors
427432
};
428433

429434
// available tensor operations:

ggml/src/ggml-impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
#if defined(_MSC_VER)
2121

2222
#define m512bh(p) p
23+
#define m128bh(p) p
2324
#define m512i(p) p
2425

2526
#else
2627

2728
#define m512bh(p) (__m512bh)(p)
29+
#define m128bh(p) (__m128bh)(p)
2830
#define m512i(p) (__m512i)(p)
2931

3032
#endif

0 commit comments

Comments
 (0)