From 6a52bfe33292e23e5dc501c6b71f692ca45e277b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Mon, 10 Jun 2024 04:26:55 +0200
Subject: [PATCH 1/8] add truncate_bf16

---
 gguf-py/gguf/quants.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
index b22eec1661ce7..31f689dd773fd 100644
--- a/gguf-py/gguf/quants.py
+++ b/gguf-py/gguf/quants.py
@@ -35,6 +35,12 @@ def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
     return n.astype(np.int16)
 
 
+# for fp32 values that are just extended bf16
+def __truncate_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
+    n = n.astype(np.float32, copy=False).view(np.uint32) >> 16
+    return n.astype(np.uint16)
+
+
 # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
 def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
     rows = arr.reshape((-1, arr.shape[-1]))
@@ -62,6 +68,20 @@ def quantize_bf16(n: np.ndarray):
         return __quantize_bf16_array(n)
 
 
+def __truncate_bf16_array(n: np.ndarray) -> np.ndarray:
+    return __apply_over_grouped_rows(__truncate_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape)
+
+
+__truncate_bf16_lazy = LazyNumpyTensor._wrap_fn(__truncate_bf16_array, meta_noop=np.uint16)
+
+
+def truncate_bf16(n: np.ndarray):
+    if type(n) is LazyNumpyTensor:
+        return __truncate_bf16_lazy(n)
+    else:
+        return __truncate_bf16_array(n)
+
+
 __q8_block_size, __q8_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q8_0]
 
 

From 46054d1aab479bd2b90516572fa895dc9703b055 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Mon, 10 Jun 2024 04:30:47 +0200
Subject: [PATCH 2/8] truncate intermediate fp32 if converting bf16 to bf16

---
 convert-hf-to-gguf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 025405a2c6ce1..ac6363a482a8e 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -295,8 +295,8 @@ def write_tensors(self):
 
                 if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
                     if self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
-                        data = gguf.quantize_bf16(data)
-                        assert data.dtype == np.int16
+                        data = gguf.truncate_bf16(data) if old_dtype == torch.bfloat16 else gguf.quantize_bf16(data)
+                        assert data.dtype in (np.int16, np.uint16)
                         data_qtype = gguf.GGMLQuantizationType.BF16
 
                     elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data):

From 069369f3fe6591d5073bd19c67e4eef23d9dafc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Fri, 14 Jun 2024 11:06:21 +0200
Subject: [PATCH 3/8] fix masking in __compute_fp32_to_bf16

---
 gguf-py/gguf/quants.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
index 31f689dd773fd..861e87daa9170 100644
--- a/gguf-py/gguf/quants.py
+++ b/gguf-py/gguf/quants.py
@@ -25,14 +25,14 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
 
 # same as ggml_compute_fp32_to_bf16 in ggml-impl.h
 def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
-    n = n.astype(np.float32, copy=False).view(np.int32)
+    n = n.astype(np.float32, copy=False).view(np.uint32)
     # force nan to quiet
-    n = np.where((n & 0x7fffffff) > 0x7f800000, (n & 0xffff0000) | (64 << 16), n)
+    n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | (64 << 16), n)
     # flush subnormals to zero
-    n = np.where((n & 0x7f800000) == 0, n & 0x80000000, n)
+    n = np.where((n & 0x7f800000) == 0, n & np.uint32(0x80000000), n)
     # round to nearest even
     n = (n + (0x7fff + ((n >> 16) & 1))) >> 16
-    return n.astype(np.int16)
+    return n.astype(np.uint16)
 
 
 # for fp32 values that are just extended bf16
@@ -55,10 +55,10 @@ def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.
 
 
 def __quantize_bf16_array(n: np.ndarray) -> np.ndarray:
-    return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.int16, oshape=n.shape)
+    return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape)
 
 
-__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.int16)
+__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.uint16)
 
 
 def quantize_bf16(n: np.ndarray):

From 225ec48fe579337efb8fd931e19dc5c6f6078ae4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Fri, 14 Jun 2024 13:32:48 +0200
Subject: [PATCH 4/8] np.int16 no longer used

---
 convert-hf-to-gguf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index ac6363a482a8e..6a3fe2867bf05 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -296,7 +296,7 @@ def write_tensors(self):
                 if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
                     if self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
                         data = gguf.truncate_bf16(data) if old_dtype == torch.bfloat16 else gguf.quantize_bf16(data)
-                        assert data.dtype in (np.int16, np.uint16)
+                        assert data.dtype == np.uint16
                         data_qtype = gguf.GGMLQuantizationType.BF16
 
                     elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data):

From e8e2b7e03fa1ae22ccca3179988748b27e7c8ef6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Thu, 27 Jun 2024 19:04:49 +0200
Subject: [PATCH 5/8] missing cast and additional numpy 2.x fix

---
 gguf-py/gguf/quants.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
index 861e87daa9170..8feec2958780a 100644
--- a/gguf-py/gguf/quants.py
+++ b/gguf-py/gguf/quants.py
@@ -27,11 +27,11 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
 def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
     n = n.astype(np.float32, copy=False).view(np.uint32)
     # force nan to quiet
-    n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | (64 << 16), n)
+    n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
     # flush subnormals to zero
     n = np.where((n & 0x7f800000) == 0, n & np.uint32(0x80000000), n)
     # round to nearest even
-    n = (n + (0x7fff + ((n >> 16) & 1))) >> 16
+    n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16
     return n.astype(np.uint16)
 
 

From 5b67a6cfbfd363e3bd6ecf0373fb8491a6f857e2 Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Fri, 28 Jun 2024 16:47:55 -0400
Subject: [PATCH 6/8] ggml-impl : do not flush bf16 subnormals to zero

* ggml : add reference fp32 to bf16 conversion

The fast version is no longer equivalent for all platforms
because of the handling of subnormal values.

* gguf-py : remove flush to zero for bf16 subnormals

* gguf-py : remove float32 truncation to bf16

Rounding achieves the same thing in the cases where this was used.
---
 convert-hf-to-gguf.py  |  2 +-
 ggml-impl.h            |  9 +++------
 ggml.c                 | 11 +++++++++--
 ggml.h                 |  1 +
 gguf-py/gguf/quants.py | 22 ----------------------
 5 files changed, 14 insertions(+), 31 deletions(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 6a3fe2867bf05..8a4c523e3160a 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -295,7 +295,7 @@ def write_tensors(self):
 
                 if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
                     if self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
-                        data = gguf.truncate_bf16(data) if old_dtype == torch.bfloat16 else gguf.quantize_bf16(data)
+                        data = gguf.quantize_bf16(data)
                         assert data.dtype == np.uint16
                         data_qtype = gguf.GGMLQuantizationType.BF16
 
diff --git a/ggml-impl.h b/ggml-impl.h
index 5e77471f332f4..397e22a6df0f8 100644
--- a/ggml-impl.h
+++ b/ggml-impl.h
@@ -80,8 +80,9 @@ static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
 /**
  * Converts float32 to brain16.
  *
- * This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
- * Subnormals shall be flushed to zero, and NANs will be quiet.
+ * This is binary identical with Google Brain float conversion.
+ * Floats shall round to nearest even, and NANs shall be quiet.
+ * Subnormals aren't flushed to zero, except perhaps when used.
  * This code should vectorize nicely if using modern compilers.
  */
 static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
@@ -95,10 +96,6 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
         h.bits = (u.i >> 16) | 64; /* force to quiet */
         return h;
     }
-    if (!(u.i & 0x7f800000)) { /* subnormal */
-        h.bits = (u.i & 0x80000000) >> 16; /* flush to zero */
-        return h;
-    }
     h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
     return h;
 }
diff --git a/ggml.c b/ggml.c
index 1fc77743bc7b9..009a28c7f97ec 100644
--- a/ggml.c
+++ b/ggml.c
@@ -411,9 +411,16 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
     }
 }
 
+void ggml_fp32_to_bf16_row_reference(const float * x, ggml_bf16_t * y, int64_t n) {
+    for (int i = 0; i < n; i++) {
+        y[i] = ggml_compute_fp32_to_bf16(x[i]);
+    }
+}
+
 void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
   int i = 0;
 #if defined(__AVX512BF16__)
+  // subnormals are flushed to zero on this platform
   for (; i + 32 <= n; i += 32) {
         _mm512_storeu_si512(
             (__m512i *)(y + i),
@@ -904,7 +911,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .is_quantized             = false,
         .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
         .from_float               = (ggml_from_float_t) ggml_fp32_to_bf16_row,
-        .from_float_reference     = (ggml_from_float_t) ggml_fp32_to_bf16_row,
+        .from_float_reference     = (ggml_from_float_t) ggml_fp32_to_bf16_row_reference,
         .vec_dot                  = (ggml_vec_dot_t) ggml_vec_dot_bf16,
         .vec_dot_type             = GGML_TYPE_BF16,
         .nrows                    = 1,
@@ -21334,7 +21341,7 @@ size_t ggml_quantize_chunk(
         case GGML_TYPE_BF16:
             {
                 size_t elemsize = sizeof(ggml_bf16_t);
-                ggml_fp32_to_bf16_row(src + start, (ggml_bf16_t *)dst + start, n);
+                ggml_fp32_to_bf16_row_reference(src + start, (ggml_bf16_t *)dst + start, n);
                 result = n * elemsize;
             } break;
         case GGML_TYPE_F32:
diff --git a/ggml.h b/ggml.h
index 13502a3622fc4..07e73481357cc 100644
--- a/ggml.h
+++ b/ggml.h
@@ -339,6 +339,7 @@ extern "C" {
     GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
     GGML_API float       ggml_bf16_to_fp32(ggml_bf16_t);  // consider just doing << 16
     GGML_API void        ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
+    GGML_API void        ggml_fp32_to_bf16_row_reference(const float *, ggml_bf16_t *, int64_t);
     GGML_API void        ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
 
     struct ggml_object;
diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
index 8feec2958780a..bf365fd3d5c4f 100644
--- a/gguf-py/gguf/quants.py
+++ b/gguf-py/gguf/quants.py
@@ -28,19 +28,11 @@ def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
     n = n.astype(np.float32, copy=False).view(np.uint32)
     # force nan to quiet
     n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
-    # flush subnormals to zero
-    n = np.where((n & 0x7f800000) == 0, n & np.uint32(0x80000000), n)
     # round to nearest even
     n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16
     return n.astype(np.uint16)
 
 
-# for fp32 values that are just extended bf16
-def __truncate_fp32_to_bf16(n: np.ndarray) -> np.ndarray:
-    n = n.astype(np.float32, copy=False).view(np.uint32) >> 16
-    return n.astype(np.uint16)
-
-
 # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
 def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
     rows = arr.reshape((-1, arr.shape[-1]))
@@ -68,20 +60,6 @@ def quantize_bf16(n: np.ndarray):
         return __quantize_bf16_array(n)
 
 
-def __truncate_bf16_array(n: np.ndarray) -> np.ndarray:
-    return __apply_over_grouped_rows(__truncate_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape)
-
-
-__truncate_bf16_lazy = LazyNumpyTensor._wrap_fn(__truncate_bf16_array, meta_noop=np.uint16)
-
-
-def truncate_bf16(n: np.ndarray):
-    if type(n) is LazyNumpyTensor:
-        return __truncate_bf16_lazy(n)
-    else:
-        return __truncate_bf16_array(n)
-
-
 __q8_block_size, __q8_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q8_0]
 
 

From dc051541ff81418b8eed230afa20aa12abdad501 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Thu, 1 Aug 2024 10:00:21 +0200
Subject: [PATCH 7/8] missed prototype update in merge

---
 ggml/include/ggml.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 7d355d581c92c..d8d3dceef8cac 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -349,7 +349,7 @@ extern "C" {
     GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
     GGML_API float       ggml_bf16_to_fp32(ggml_bf16_t);  // consider just doing << 16
     GGML_API void        ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
-    GGML_API void        ggml_fp32_to_bf16_row_reference(const float *, ggml_bf16_t *, int64_t);
+    GGML_API void        ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t);
     GGML_API void        ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
 
     struct ggml_object;

From 3a3a7528cd392b1647093281dd0ba49d50f09ab0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Thu, 1 Aug 2024 10:24:14 +0200
Subject: [PATCH 8/8] merge cleanup

---
 ggml/src/ggml.c | 172 ++++++++++++++++++++++++------------------------
 1 file changed, 86 insertions(+), 86 deletions(-)

diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index adeb67282ab77..2607f927d5e27 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -225,7 +225,7 @@ static void ggml_print_backtrace(void) {
             if (WEXITSTATUS(wstatus) == EXIT_FAILURE) {
                 // gdb failed, fallback to backtrace_symbols
                 ggml_print_backtrace_symbols();
-    }
+            }
         }
     }
 }
@@ -3963,7 +3963,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) {
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 
     return tensor;
@@ -4022,7 +4022,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) {
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 
     return tensor;
@@ -4137,7 +4137,7 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -4191,7 +4191,7 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -4268,7 +4268,7 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -4322,7 +4322,7 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -9091,7 +9091,7 @@ static void ggml_compute_forward_dup(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -9571,7 +9571,7 @@ static void ggml_compute_forward_add(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -9949,7 +9949,7 @@ static void ggml_compute_forward_add1(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10074,7 +10074,7 @@ static void ggml_compute_forward_acc(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10155,7 +10155,7 @@ static void ggml_compute_forward_sub(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10249,7 +10249,7 @@ static void ggml_compute_forward_mul(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10340,7 +10340,7 @@ static void ggml_compute_forward_div(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10385,7 +10385,7 @@ static void ggml_compute_forward_sqr(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10430,7 +10430,7 @@ static void ggml_compute_forward_sqrt(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10475,7 +10475,7 @@ static void ggml_compute_forward_log(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10604,7 +10604,7 @@ static void ggml_compute_forward_sum(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10657,7 +10657,7 @@ static void ggml_compute_forward_sum_rows(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10714,7 +10714,7 @@ static void ggml_compute_forward_mean(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10762,7 +10762,7 @@ static void ggml_compute_forward_argmax(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10880,7 +10880,7 @@ static void ggml_compute_forward_repeat(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -10958,7 +10958,7 @@ static void ggml_compute_forward_repeat_back(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11027,7 +11027,7 @@ static void ggml_compute_forward_concat(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11071,7 +11071,7 @@ static void ggml_compute_forward_abs(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11115,7 +11115,7 @@ static void ggml_compute_forward_sgn(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11159,7 +11159,7 @@ static void ggml_compute_forward_neg(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11203,7 +11203,7 @@ static void ggml_compute_forward_step(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11247,7 +11247,7 @@ static void ggml_compute_forward_tanh(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11291,7 +11291,7 @@ static void ggml_compute_forward_elu(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11335,7 +11335,7 @@ static void ggml_compute_forward_relu(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11379,7 +11379,7 @@ static void ggml_compute_forward_sigmoid(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11438,7 +11438,7 @@ static void ggml_compute_forward_gelu(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11497,7 +11497,7 @@ static void ggml_compute_forward_gelu_quick(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11556,7 +11556,7 @@ static void ggml_compute_forward_silu(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 // ggml_compute_forward_leaky_relu
@@ -11605,7 +11605,7 @@ static void ggml_compute_forward_leaky_relu(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11668,7 +11668,7 @@ static void ggml_compute_forward_silu_back(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11710,7 +11710,7 @@ static void ggml_compute_forward_hardswish(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11752,7 +11752,7 @@ static void ggml_compute_forward_hardsigmoid(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11824,7 +11824,7 @@ static void ggml_compute_forward_norm(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -11892,7 +11892,7 @@ static void ggml_compute_forward_rms_norm(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -12065,7 +12065,7 @@ static void ggml_compute_forward_rms_norm_back(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -12159,7 +12159,7 @@ static void ggml_compute_forward_group_norm(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -12270,10 +12270,10 @@ static void ggml_compute_forward_mul_mat(
 
     const enum ggml_type type = src0->type;
 
-    enum ggml_type    const vec_dot_type          = type_traits[type].vec_dot_type;
+    enum ggml_type           const vec_dot_type         = type_traits[type].vec_dot_type;
     ggml_from_float_t        const from_float           = type_traits[vec_dot_type].from_float;
     ggml_from_float_to_mat_t const from_float_to_mat    = type_traits[vec_dot_type].from_float_to_mat;
-    int64_t           const vec_dot_num_rows      = type_traits[type].nrows;
+    int64_t                  const vec_dot_num_rows     = type_traits[type].nrows;
     int64_t                  const matmul_num_cols      = type_traits[type].ncols;
     int64_t                  const blck_size_interleave = type_traits[type].blck_size_interleave;
     ggml_gemv_t              const gemv                 = type_traits[type].gemv;
@@ -12348,7 +12348,7 @@ UseGgmlGemm1:;
                 for (int64_t i11 = i11_processed + ith; i11 < ne11; i11 += nth) {
                     from_float((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11),
                            (void *)               (wdata + i13*nbw3 + i12*nbw2 + i11*nbw1),
-                                           ne10);
+                           ne10);
                 }
             }
         }
@@ -12490,8 +12490,8 @@ static void ggml_compute_forward_mul_mat_id(
 
     const bool src1_cont = ggml_is_contiguous(src1);
 
-    ggml_vec_dot_t    const vec_dot               = type_traits[type].vec_dot;
-    enum ggml_type    const vec_dot_type          = type_traits[type].vec_dot_type;
+    ggml_vec_dot_t    const vec_dot         = type_traits[type].vec_dot;
+    enum ggml_type    const vec_dot_type    = type_traits[type].vec_dot_type;
     ggml_from_float_t const from_float      = type_traits[vec_dot_type].from_float;
     int64_t           const matmul_num_cols = type_traits[type].ncols;
     ggml_gemv_t       const gemv            = type_traits[type].gemv;
@@ -12536,8 +12536,8 @@ static void ggml_compute_forward_mul_mat_id(
             for (int64_t i12 = 0; i12 < ne12; ++i12) {
                 for (int64_t i11 = ith; i11 < ne11; i11 += nth) {
                     from_float((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11),
-                                          (void *)               (wdata + i13*nbw3 + i12*nbw2 + i11*nbw1),
-                                           ne10);
+                               (void *)               (wdata + i13*nbw3 + i12*nbw2 + i11*nbw1),
+                               ne10);
                 }
             }
         }
@@ -12927,7 +12927,7 @@ static void ggml_compute_forward_out_prod(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -12987,7 +12987,7 @@ static void ggml_compute_forward_scale(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -13103,7 +13103,7 @@ static void ggml_compute_forward_set(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -13381,7 +13381,7 @@ static void ggml_compute_forward_get_rows(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 
     //static bool first = true;
@@ -13489,7 +13489,7 @@ static void ggml_compute_forward_get_rows_back(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 
     //static bool first = true;
@@ -13567,7 +13567,7 @@ static void ggml_compute_forward_diag(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -13637,7 +13637,7 @@ static void ggml_compute_forward_diag_mask_inf(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -13655,7 +13655,7 @@ static void ggml_compute_forward_diag_mask_zero(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -13773,7 +13773,7 @@ static void ggml_compute_forward_soft_max(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -13869,7 +13869,7 @@ static void ggml_compute_forward_soft_max_back(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -13960,7 +13960,7 @@ static void ggml_compute_forward_clamp(
         case GGML_TYPE_COUNT:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -14290,7 +14290,7 @@ static void ggml_compute_forward_rope(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -14314,7 +14314,7 @@ static void ggml_compute_forward_rope_back(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -14514,7 +14514,7 @@ static void ggml_compute_forward_conv_transpose_1d(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -14686,7 +14686,7 @@ static void ggml_compute_forward_im2col(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -14987,7 +14987,7 @@ static void ggml_compute_forward_upscale(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -15045,7 +15045,7 @@ static void ggml_compute_forward_pad(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -15086,7 +15086,7 @@ static void ggml_compute_forward_arange(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -15137,7 +15137,7 @@ static void ggml_compute_forward_timestep_embedding(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -15196,7 +15196,7 @@ static void ggml_compute_forward_argsort(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -15419,7 +15419,7 @@ static void ggml_compute_forward_flash_attn_ext(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -15755,7 +15755,7 @@ static void ggml_compute_forward_flash_attn_back(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -15877,7 +15877,7 @@ static void ggml_compute_forward_ssm_conv(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -15998,7 +15998,7 @@ static void ggml_compute_forward_ssm_scan(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -16061,7 +16061,7 @@ static void ggml_compute_forward_win_part(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -16122,7 +16122,7 @@ static void ggml_compute_forward_win_unpart(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -16190,7 +16190,7 @@ static void ggml_compute_forward_unary(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -16237,7 +16237,7 @@ static void ggml_compute_forward_get_rel_pos(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -16318,7 +16318,7 @@ static void ggml_compute_forward_add_rel_pos(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -16364,7 +16364,7 @@ static void ggml_compute_forward_map_unary(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -16413,7 +16413,7 @@ static void ggml_compute_forward_map_binary(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -16612,7 +16612,7 @@ static void ggml_compute_forward_cross_entropy_loss(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -16699,7 +16699,7 @@ static void ggml_compute_forward_cross_entropy_loss_back(
         default:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -17035,7 +17035,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
         case GGML_OP_COUNT:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 }
 
@@ -18063,7 +18063,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
         case GGML_OP_COUNT:
             {
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 
     for (int i = 0; i < GGML_MAX_SRC; ++i) {
@@ -18657,7 +18657,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
                     fprintf(stderr, "%d\n", node->op);
                 }
                 GGML_ABORT("fatal error");
-    }
+            }
     }
 
     assert(n_tasks > 0);