Don't flush bf16 subnormals to zero

jart · jart · commit 7fd91011ecbe · 2024-06-29T01:31:08.000-07:00
See ggml-org/llama.cpp#7843
diff --git a/llama.cpp/ggml-impl.h b/llama.cpp/ggml-impl.h
@@ -81,10 +81,6 @@ static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
 
 /**
  * Converts float32 to brain16.
- *
- * This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
- * Subnormals shall be flushed to zero, and NANs will be quiet.
- * This code should vectorize nicely if using modern compilers.
  */
 static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
     ggml_bf16_t h;
@@ -97,10 +93,6 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
         h.bits = (u.i >> 16) | 64; /* force to quiet */
         return h;
     }
-    if (!(u.i & 0x7f800000)) { /* subnormal */
-        h.bits = (u.i & 0x80000000) >> 16; /* flush to zero */
-        return h;
-    }
     h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
     return h;
 }
diff --git a/llama.cpp/ggml-vector.inc b/llama.cpp/ggml-vector.inc
@@ -859,12 +859,12 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
 
 void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
   int i = 0;
-#if defined(__AVX512BF16__)
+#if defined(__AVX512BF16__) && 0 // [jart] it kills subnormals
   for (; i + 32 <= n; i += 32) {
         _mm512_storeu_si512(
             (__m512i *)(y + i),
             m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
-                                _mm512_loadu_ps(x + i))));
+                                      _mm512_loadu_ps(x + i))));
   }
 #endif
     for (; i < n; i++) {