From 2ef86e7213c20bff9a217d87fda92d06d445c473 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Wed, 24 Apr 2024 16:59:30 -0700 Subject: [PATCH] Clamp out of range values in K quantizer This assertion fails when quantizing Mixtral 8x7b as Q5_K_M, because I used `convert.py --outtype f32` and the Mixtral weights use bf16 which has a much larger exponent range than the K quantizer is expecting. If --outtype f16 is used then the assert doesn't fail. See ggerganov/llama.cpp#2982 --- ggml-quants.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-quants.c b/ggml-quants.c index 11e11c2196c2c..66c3e7a572f01 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1023,7 +1023,7 @@ void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int6 // ===================== Helper functions // static inline int nearest_int(float fval) { - assert(fval <= 4194303.f); + fval = fminf(fval, 4194303.f); float val = fval + 12582912.f; int i; memcpy(&i, &val, sizeof(int)); return (i & 0x007fffff) - 0x00400000;