From 6a52bfe33292e23e5dc501c6b71f692ca45e277b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 10 Jun 2024 04:26:55 +0200 Subject: [PATCH 1/8] add truncate_bf16 --- gguf-py/gguf/quants.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py index b22eec1661ce7..31f689dd773fd 100644 --- a/gguf-py/gguf/quants.py +++ b/gguf-py/gguf/quants.py @@ -35,6 +35,12 @@ def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray: return n.astype(np.int16) +# for fp32 values that are just extended bf16 +def __truncate_fp32_to_bf16(n: np.ndarray) -> np.ndarray: + n = n.astype(np.float32, copy=False).view(np.uint32) >> 16 + return n.astype(np.uint16) + + # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray: rows = arr.reshape((-1, arr.shape[-1])) @@ -62,6 +68,20 @@ def quantize_bf16(n: np.ndarray): return __quantize_bf16_array(n) +def __truncate_bf16_array(n: np.ndarray) -> np.ndarray: + return __apply_over_grouped_rows(__truncate_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape) + + +__truncate_bf16_lazy = LazyNumpyTensor._wrap_fn(__truncate_bf16_array, meta_noop=np.uint16) + + +def truncate_bf16(n: np.ndarray): + if type(n) is LazyNumpyTensor: + return __truncate_bf16_lazy(n) + else: + return __truncate_bf16_array(n) + + __q8_block_size, __q8_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q8_0] From 46054d1aab479bd2b90516572fa895dc9703b055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 10 Jun 2024 04:30:47 +0200 Subject: [PATCH 2/8] truncate intermediate fp32 if converting bf16 to bf16 --- convert-hf-to-gguf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 025405a2c6ce1..ac6363a482a8e 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -295,8 +295,8 @@ def write_tensors(self): if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32: if self.ftype == gguf.LlamaFileType.MOSTLY_BF16: - data = gguf.quantize_bf16(data) - assert data.dtype == np.int16 + data = gguf.truncate_bf16(data) if old_dtype == torch.bfloat16 else gguf.quantize_bf16(data) + assert data.dtype in (np.int16, np.uint16) data_qtype = gguf.GGMLQuantizationType.BF16 elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data): From 069369f3fe6591d5073bd19c67e4eef23d9dafc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Fri, 14 Jun 2024 11:06:21 +0200 Subject: [PATCH 3/8] fix masking in __compute_fp32_to_bf16 --- gguf-py/gguf/quants.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py index 31f689dd773fd..861e87daa9170 100644 --- a/gguf-py/gguf/quants.py +++ b/gguf-py/gguf/quants.py @@ -25,14 +25,14 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati # same as ggml_compute_fp32_to_bf16 in ggml-impl.h def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray: - n = n.astype(np.float32, copy=False).view(np.int32) + n = n.astype(np.float32, copy=False).view(np.uint32) # force nan to quiet - n = np.where((n & 0x7fffffff) > 0x7f800000, (n & 0xffff0000) | (64 << 16), n) + n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | (64 << 16), n) # flush subnormals to zero - n = np.where((n & 0x7f800000) == 0, n & 0x80000000, n) + n = np.where((n & 0x7f800000) == 0, n & np.uint32(0x80000000), n) # round to nearest even n = (n + (0x7fff + ((n >> 16) & 1))) >> 16 - return n.astype(np.int16) + return n.astype(np.uint16) # for fp32 values that are just extended bf16 @@ -55,10 +55,10 @@ def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np. def __quantize_bf16_array(n: np.ndarray) -> np.ndarray: - return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.int16, oshape=n.shape) + return __apply_over_grouped_rows(__compute_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape) -__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.int16) +__quantize_bf16_lazy = LazyNumpyTensor._wrap_fn(__quantize_bf16_array, meta_noop=np.uint16) def quantize_bf16(n: np.ndarray): From 225ec48fe579337efb8fd931e19dc5c6f6078ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Fri, 14 Jun 2024 13:32:48 +0200 Subject: [PATCH 4/8] np.int16 no longer used --- convert-hf-to-gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index ac6363a482a8e..6a3fe2867bf05 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -296,7 +296,7 @@ def write_tensors(self): if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32: if self.ftype == gguf.LlamaFileType.MOSTLY_BF16: data = gguf.truncate_bf16(data) if old_dtype == torch.bfloat16 else gguf.quantize_bf16(data) - assert data.dtype in (np.int16, np.uint16) + assert data.dtype == np.uint16 data_qtype = gguf.GGMLQuantizationType.BF16 elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data): From e8e2b7e03fa1ae22ccca3179988748b27e7c8ef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 27 Jun 2024 19:04:49 +0200 Subject: [PATCH 5/8] missing cast and additional numpy 2.x fix --- gguf-py/gguf/quants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py index 861e87daa9170..8feec2958780a 100644 --- a/gguf-py/gguf/quants.py +++ b/gguf-py/gguf/quants.py @@ -27,11 +27,11 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray: n = n.astype(np.float32, copy=False).view(np.uint32) # force nan to quiet - n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | (64 << 16), n) + n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n) # flush subnormals to zero n = np.where((n & 0x7f800000) == 0, n & np.uint32(0x80000000), n) # round to nearest even - n = (n + (0x7fff + ((n >> 16) & 1))) >> 16 + n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16 return n.astype(np.uint16) From 5b67a6cfbfd363e3bd6ecf0373fb8491a6f857e2 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Fri, 28 Jun 2024 16:47:55 -0400 Subject: [PATCH 6/8] ggml-impl : do not flush bf16 subnormals to zero * ggml : add reference fp32 to bf16 conversion The fast version is no longer equivalent for all platforms because of the handling of subnormal values. * gguf-py : remove flush to zero for bf16 subnormals * gguf-py : remove float32 truncation to bf16 Rounding achieves the same thing in the cases where this was used. --- convert-hf-to-gguf.py | 2 +- ggml-impl.h | 9 +++------ ggml.c | 11 +++++++++-- ggml.h | 1 + gguf-py/gguf/quants.py | 22 ---------------------- 5 files changed, 14 insertions(+), 31 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 6a3fe2867bf05..8a4c523e3160a 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -295,7 +295,7 @@ def write_tensors(self): if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32: if self.ftype == gguf.LlamaFileType.MOSTLY_BF16: - data = gguf.truncate_bf16(data) if old_dtype == torch.bfloat16 else gguf.quantize_bf16(data) + data = gguf.quantize_bf16(data) assert data.dtype == np.uint16 data_qtype = gguf.GGMLQuantizationType.BF16 diff --git a/ggml-impl.h b/ggml-impl.h index 5e77471f332f4..397e22a6df0f8 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -80,8 +80,9 @@ static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) { /** * Converts float32 to brain16. * - * This function is binary identical to AMD Zen4 VCVTNEPS2BF16. - * Subnormals shall be flushed to zero, and NANs will be quiet. + * This is binary identical with Google Brain float conversion. + * Floats shall round to nearest even, and NANs shall be quiet. + * Subnormals aren't flushed to zero, except perhaps when used. * This code should vectorize nicely if using modern compilers. */ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { @@ -95,10 +96,6 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { h.bits = (u.i >> 16) | 64; /* force to quiet */ return h; } - if (!(u.i & 0x7f800000)) { /* subnormal */ - h.bits = (u.i & 0x80000000) >> 16; /* flush to zero */ - return h; - } h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16; return h; } diff --git a/ggml.c b/ggml.c index 1fc77743bc7b9..009a28c7f97ec 100644 --- a/ggml.c +++ b/ggml.c @@ -411,9 +411,16 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) { } } +void ggml_fp32_to_bf16_row_reference(const float * x, ggml_bf16_t * y, int64_t n) { + for (int i = 0; i < n; i++) { + y[i] = ggml_compute_fp32_to_bf16(x[i]); + } +} + void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) { int i = 0; #if defined(__AVX512BF16__) + // subnormals are flushed to zero on this platform for (; i + 32 <= n; i += 32) { _mm512_storeu_si512( (__m512i *)(y + i), @@ -904,7 +911,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .is_quantized = false, .to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row, .from_float = (ggml_from_float_t) ggml_fp32_to_bf16_row, - .from_float_reference = (ggml_from_float_t) ggml_fp32_to_bf16_row, + .from_float_reference = (ggml_from_float_t) ggml_fp32_to_bf16_row_reference, .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_bf16, .vec_dot_type = GGML_TYPE_BF16, .nrows = 1, @@ -21334,7 +21341,7 @@ size_t ggml_quantize_chunk( case GGML_TYPE_BF16: { size_t elemsize = sizeof(ggml_bf16_t); - ggml_fp32_to_bf16_row(src + start, (ggml_bf16_t *)dst + start, n); + ggml_fp32_to_bf16_row_reference(src + start, (ggml_bf16_t *)dst + start, n); result = n * elemsize; } break; case GGML_TYPE_F32: diff --git a/ggml.h b/ggml.h index 13502a3622fc4..07e73481357cc 100644 --- a/ggml.h +++ b/ggml.h @@ -339,6 +339,7 @@ extern "C" { GGML_API ggml_bf16_t ggml_fp32_to_bf16(float); GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16 GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t); + GGML_API void ggml_fp32_to_bf16_row_reference(const float *, ggml_bf16_t *, int64_t); GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t); struct ggml_object; diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py index 8feec2958780a..bf365fd3d5c4f 100644 --- a/gguf-py/gguf/quants.py +++ b/gguf-py/gguf/quants.py @@ -28,19 +28,11 @@ def __compute_fp32_to_bf16(n: np.ndarray) -> np.ndarray: n = n.astype(np.float32, copy=False).view(np.uint32) # force nan to quiet n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n) - # flush subnormals to zero - n = np.where((n & 0x7f800000) == 0, n & np.uint32(0x80000000), n) # round to nearest even n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16 return n.astype(np.uint16) -# for fp32 values that are just extended bf16 -def __truncate_fp32_to_bf16(n: np.ndarray) -> np.ndarray: - n = n.astype(np.float32, copy=False).view(np.uint32) >> 16 - return n.astype(np.uint16) - - # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time def __apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray: rows = arr.reshape((-1, arr.shape[-1])) @@ -68,20 +60,6 @@ def quantize_bf16(n: np.ndarray): return __quantize_bf16_array(n) -def __truncate_bf16_array(n: np.ndarray) -> np.ndarray: - return __apply_over_grouped_rows(__truncate_fp32_to_bf16, arr=n, otype=np.uint16, oshape=n.shape) - - -__truncate_bf16_lazy = LazyNumpyTensor._wrap_fn(__truncate_bf16_array, meta_noop=np.uint16) - - -def truncate_bf16(n: np.ndarray): - if type(n) is LazyNumpyTensor: - return __truncate_bf16_lazy(n) - else: - return __truncate_bf16_array(n) - - __q8_block_size, __q8_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q8_0] From dc051541ff81418b8eed230afa20aa12abdad501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 1 Aug 2024 10:00:21 +0200 Subject: [PATCH 7/8] missed prototype update in merge --- ggml/include/ggml.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 7d355d581c92c..d8d3dceef8cac 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -349,7 +349,7 @@ extern "C" { GGML_API ggml_bf16_t ggml_fp32_to_bf16(float); GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16 GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t); - GGML_API void ggml_fp32_to_bf16_row_reference(const float *, ggml_bf16_t *, int64_t); + GGML_API void ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t); GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t); struct ggml_object; From 3a3a7528cd392b1647093281dd0ba49d50f09ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 1 Aug 2024 10:24:14 +0200 Subject: [PATCH 8/8] merge cleanup --- ggml/src/ggml.c | 172 ++++++++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index adeb67282ab77..2607f927d5e27 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -225,7 +225,7 @@ static void ggml_print_backtrace(void) { if (WEXITSTATUS(wstatus) == EXIT_FAILURE) { // gdb failed, fallback to backtrace_symbols ggml_print_backtrace_symbols(); - } + } } } } @@ -3963,7 +3963,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) { default: { GGML_ABORT("fatal error"); - } + } } return tensor; @@ -4022,7 +4022,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) { default: { GGML_ABORT("fatal error"); - } + } } return tensor; @@ -4137,7 +4137,7 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) { default: { GGML_ABORT("fatal error"); - } + } } } @@ -4191,7 +4191,7 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, default: { GGML_ABORT("fatal error"); - } + } } } @@ -4268,7 +4268,7 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) { default: { GGML_ABORT("fatal error"); - } + } } } @@ -4322,7 +4322,7 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, default: { GGML_ABORT("fatal error"); - } + } } } @@ -9091,7 +9091,7 @@ static void ggml_compute_forward_dup( default: { GGML_ABORT("fatal error"); - } + } } } @@ -9571,7 +9571,7 @@ static void ggml_compute_forward_add( default: { GGML_ABORT("fatal error"); - } + } } } @@ -9949,7 +9949,7 @@ static void ggml_compute_forward_add1( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10074,7 +10074,7 @@ static void ggml_compute_forward_acc( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10155,7 +10155,7 @@ static void ggml_compute_forward_sub( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10249,7 +10249,7 @@ static void ggml_compute_forward_mul( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10340,7 +10340,7 @@ static void ggml_compute_forward_div( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10385,7 +10385,7 @@ static void ggml_compute_forward_sqr( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10430,7 +10430,7 @@ static void ggml_compute_forward_sqrt( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10475,7 +10475,7 @@ static void ggml_compute_forward_log( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10604,7 +10604,7 @@ static void ggml_compute_forward_sum( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10657,7 +10657,7 @@ static void ggml_compute_forward_sum_rows( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10714,7 +10714,7 @@ static void ggml_compute_forward_mean( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10762,7 +10762,7 @@ static void ggml_compute_forward_argmax( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10880,7 +10880,7 @@ static void ggml_compute_forward_repeat( default: { GGML_ABORT("fatal error"); - } + } } } @@ -10958,7 +10958,7 @@ static void ggml_compute_forward_repeat_back( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11027,7 +11027,7 @@ static void ggml_compute_forward_concat( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11071,7 +11071,7 @@ static void ggml_compute_forward_abs( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11115,7 +11115,7 @@ static void ggml_compute_forward_sgn( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11159,7 +11159,7 @@ static void ggml_compute_forward_neg( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11203,7 +11203,7 @@ static void ggml_compute_forward_step( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11247,7 +11247,7 @@ static void ggml_compute_forward_tanh( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11291,7 +11291,7 @@ static void ggml_compute_forward_elu( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11335,7 +11335,7 @@ static void ggml_compute_forward_relu( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11379,7 +11379,7 @@ static void ggml_compute_forward_sigmoid( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11438,7 +11438,7 @@ static void ggml_compute_forward_gelu( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11497,7 +11497,7 @@ static void ggml_compute_forward_gelu_quick( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11556,7 +11556,7 @@ static void ggml_compute_forward_silu( default: { GGML_ABORT("fatal error"); - } + } } } // ggml_compute_forward_leaky_relu @@ -11605,7 +11605,7 @@ static void ggml_compute_forward_leaky_relu( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11668,7 +11668,7 @@ static void ggml_compute_forward_silu_back( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11710,7 +11710,7 @@ static void ggml_compute_forward_hardswish( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11752,7 +11752,7 @@ static void ggml_compute_forward_hardsigmoid( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11824,7 +11824,7 @@ static void ggml_compute_forward_norm( default: { GGML_ABORT("fatal error"); - } + } } } @@ -11892,7 +11892,7 @@ static void ggml_compute_forward_rms_norm( default: { GGML_ABORT("fatal error"); - } + } } } @@ -12065,7 +12065,7 @@ static void ggml_compute_forward_rms_norm_back( default: { GGML_ABORT("fatal error"); - } + } } } @@ -12159,7 +12159,7 @@ static void ggml_compute_forward_group_norm( default: { GGML_ABORT("fatal error"); - } + } } } @@ -12270,10 +12270,10 @@ static void ggml_compute_forward_mul_mat( const enum ggml_type type = src0->type; - enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type; + enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type; ggml_from_float_t const from_float = type_traits[vec_dot_type].from_float; ggml_from_float_to_mat_t const from_float_to_mat = type_traits[vec_dot_type].from_float_to_mat; - int64_t const vec_dot_num_rows = type_traits[type].nrows; + int64_t const vec_dot_num_rows = type_traits[type].nrows; int64_t const matmul_num_cols = type_traits[type].ncols; int64_t const blck_size_interleave = type_traits[type].blck_size_interleave; ggml_gemv_t const gemv = type_traits[type].gemv; @@ -12348,7 +12348,7 @@ UseGgmlGemm1:; for (int64_t i11 = i11_processed + ith; i11 < ne11; i11 += nth) { from_float((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) (wdata + i13*nbw3 + i12*nbw2 + i11*nbw1), - ne10); + ne10); } } } @@ -12490,8 +12490,8 @@ static void ggml_compute_forward_mul_mat_id( const bool src1_cont = ggml_is_contiguous(src1); - ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot; - enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type; + ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot; + enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type; ggml_from_float_t const from_float = type_traits[vec_dot_type].from_float; int64_t const matmul_num_cols = type_traits[type].ncols; ggml_gemv_t const gemv = type_traits[type].gemv; @@ -12536,8 +12536,8 @@ static void ggml_compute_forward_mul_mat_id( for (int64_t i12 = 0; i12 < ne12; ++i12) { for (int64_t i11 = ith; i11 < ne11; i11 += nth) { from_float((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), - (void *) (wdata + i13*nbw3 + i12*nbw2 + i11*nbw1), - ne10); + (void *) (wdata + i13*nbw3 + i12*nbw2 + i11*nbw1), + ne10); } } } @@ -12927,7 +12927,7 @@ static void ggml_compute_forward_out_prod( default: { GGML_ABORT("fatal error"); - } + } } } @@ -12987,7 +12987,7 @@ static void ggml_compute_forward_scale( default: { GGML_ABORT("fatal error"); - } + } } } @@ -13103,7 +13103,7 @@ static void ggml_compute_forward_set( default: { GGML_ABORT("fatal error"); - } + } } } @@ -13381,7 +13381,7 @@ static void ggml_compute_forward_get_rows( default: { GGML_ABORT("fatal error"); - } + } } //static bool first = true; @@ -13489,7 +13489,7 @@ static void ggml_compute_forward_get_rows_back( default: { GGML_ABORT("fatal error"); - } + } } //static bool first = true; @@ -13567,7 +13567,7 @@ static void ggml_compute_forward_diag( default: { GGML_ABORT("fatal error"); - } + } } } @@ -13637,7 +13637,7 @@ static void ggml_compute_forward_diag_mask_inf( default: { GGML_ABORT("fatal error"); - } + } } } @@ -13655,7 +13655,7 @@ static void ggml_compute_forward_diag_mask_zero( default: { GGML_ABORT("fatal error"); - } + } } } @@ -13773,7 +13773,7 @@ static void ggml_compute_forward_soft_max( default: { GGML_ABORT("fatal error"); - } + } } } @@ -13869,7 +13869,7 @@ static void ggml_compute_forward_soft_max_back( default: { GGML_ABORT("fatal error"); - } + } } } @@ -13960,7 +13960,7 @@ static void ggml_compute_forward_clamp( case GGML_TYPE_COUNT: { GGML_ABORT("fatal error"); - } + } } } @@ -14290,7 +14290,7 @@ static void ggml_compute_forward_rope( default: { GGML_ABORT("fatal error"); - } + } } } @@ -14314,7 +14314,7 @@ static void ggml_compute_forward_rope_back( default: { GGML_ABORT("fatal error"); - } + } } } @@ -14514,7 +14514,7 @@ static void ggml_compute_forward_conv_transpose_1d( default: { GGML_ABORT("fatal error"); - } + } } } @@ -14686,7 +14686,7 @@ static void ggml_compute_forward_im2col( default: { GGML_ABORT("fatal error"); - } + } } } @@ -14987,7 +14987,7 @@ static void ggml_compute_forward_upscale( default: { GGML_ABORT("fatal error"); - } + } } } @@ -15045,7 +15045,7 @@ static void ggml_compute_forward_pad( default: { GGML_ABORT("fatal error"); - } + } } } @@ -15086,7 +15086,7 @@ static void ggml_compute_forward_arange( default: { GGML_ABORT("fatal error"); - } + } } } @@ -15137,7 +15137,7 @@ static void ggml_compute_forward_timestep_embedding( default: { GGML_ABORT("fatal error"); - } + } } } @@ -15196,7 +15196,7 @@ static void ggml_compute_forward_argsort( default: { GGML_ABORT("fatal error"); - } + } } } @@ -15419,7 +15419,7 @@ static void ggml_compute_forward_flash_attn_ext( default: { GGML_ABORT("fatal error"); - } + } } } @@ -15755,7 +15755,7 @@ static void ggml_compute_forward_flash_attn_back( default: { GGML_ABORT("fatal error"); - } + } } } @@ -15877,7 +15877,7 @@ static void ggml_compute_forward_ssm_conv( default: { GGML_ABORT("fatal error"); - } + } } } @@ -15998,7 +15998,7 @@ static void ggml_compute_forward_ssm_scan( default: { GGML_ABORT("fatal error"); - } + } } } @@ -16061,7 +16061,7 @@ static void ggml_compute_forward_win_part( default: { GGML_ABORT("fatal error"); - } + } } } @@ -16122,7 +16122,7 @@ static void ggml_compute_forward_win_unpart( default: { GGML_ABORT("fatal error"); - } + } } } @@ -16190,7 +16190,7 @@ static void ggml_compute_forward_unary( default: { GGML_ABORT("fatal error"); - } + } } } @@ -16237,7 +16237,7 @@ static void ggml_compute_forward_get_rel_pos( default: { GGML_ABORT("fatal error"); - } + } } } @@ -16318,7 +16318,7 @@ static void ggml_compute_forward_add_rel_pos( default: { GGML_ABORT("fatal error"); - } + } } } @@ -16364,7 +16364,7 @@ static void ggml_compute_forward_map_unary( default: { GGML_ABORT("fatal error"); - } + } } } @@ -16413,7 +16413,7 @@ static void ggml_compute_forward_map_binary( default: { GGML_ABORT("fatal error"); - } + } } } @@ -16612,7 +16612,7 @@ static void ggml_compute_forward_cross_entropy_loss( default: { GGML_ABORT("fatal error"); - } + } } } @@ -16699,7 +16699,7 @@ static void ggml_compute_forward_cross_entropy_loss_back( default: { GGML_ABORT("fatal error"); - } + } } } @@ -17035,7 +17035,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm case GGML_OP_COUNT: { GGML_ABORT("fatal error"); - } + } } } @@ -18063,7 +18063,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor case GGML_OP_COUNT: { GGML_ABORT("fatal error"); - } + } } for (int i = 0; i < GGML_MAX_SRC; ++i) { @@ -18657,7 +18657,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { fprintf(stderr, "%d\n", node->op); } GGML_ABORT("fatal error"); - } + } } assert(n_tasks > 0);