From 2ef86e7213c20bff9a217d87fda92d06d445c473 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jtunney@mozilla.com>
Date: Wed, 24 Apr 2024 16:59:30 -0700
Subject: [PATCH] Clamp out of range values in K quantizer

This assertion fails when quantizing Mixtral 8x7b as Q5_K_M, because I
used `convert.py --outtype f32` and the Mixtral weights use bf16 which
has a much larger exponent range than the K quantizer is expecting. If
--outtype f16 is used then the assert doesn't fail.

See ggerganov/llama.cpp#2982
---
 ggml-quants.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml-quants.c b/ggml-quants.c
index 11e11c2196c2c..66c3e7a572f01 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -1023,7 +1023,7 @@ void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int6
 // ===================== Helper functions
 //
 static inline int nearest_int(float fval) {
-    assert(fval <= 4194303.f);
+    fval = fminf(fval, 4194303.f);
     float val = fval + 12582912.f;
     int i; memcpy(&i, &val, sizeof(int));
     return (i & 0x007fffff) - 0x00400000;