ggml : add I32 <-> F32 conversion

ggerganov · ggerganov · commit 8772658b117f · 2024-02-23T14:25:05.000+02:00
ggml-ci
diff --git a/ggml.c b/ggml.c
@@ -355,6 +355,18 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
     }
 }
 
+static void ggml_i32_to_f32_row(const int32_t * x, float * y, int n) {
+    for (int i = 0; i < n; i++) {
+        y[i] = (float) x[i];
+    }
+}
+
+static void ggml_f32_to_i32_row(const float * x, int32_t * y, int n) {
+    for (int i = 0; i < n; i++) {
+        y[i] = (int32_t) x[i];
+    }
+}
+
 //
 // timing
 //
@@ -454,6 +466,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = 1,
         .type_size                = sizeof(int32_t),
         .is_quantized             = false,
+        .to_float                 = (ggml_to_float_t)   ggml_i32_to_f32_row,
+        .from_float               = (ggml_from_float_t) ggml_f32_to_i32_row,
+        .from_float_reference     = (ggml_from_float_t) ggml_f32_to_i32_row,
     },
     [GGML_TYPE_F32] = {
         .type_name                = "f32",
@@ -469,10 +484,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = 1,
         .type_size                = sizeof(ggml_fp16_t),
         .is_quantized             = false,
-        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
+        .to_float                 = (ggml_to_float_t)   ggml_fp16_to_fp32_row,
         .from_float               = (ggml_from_float_t) ggml_fp32_to_fp16_row,
         .from_float_reference     = (ggml_from_float_t) ggml_fp32_to_fp16_row,
-        .vec_dot                  = (ggml_vec_dot_t) ggml_vec_dot_f16,
+        .vec_dot                  = (ggml_vec_dot_t)    ggml_vec_dot_f16,
         .vec_dot_type             = GGML_TYPE_F16,
         .nrows                    = 1,
     },
@@ -481,8 +496,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK4_0,
         .type_size                = sizeof(block_q4_0),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
-        .from_float               = quantize_row_q4_0,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q4_0,
+        .from_float               = (ggml_from_float_t) quantize_row_q4_0,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q4_0_reference,
         .vec_dot                  = ggml_vec_dot_q4_0_q8_0,
         .vec_dot_type             = GGML_TYPE_Q8_0,
@@ -497,8 +512,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK4_1,
         .type_size                = sizeof(block_q4_1),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
-        .from_float               = quantize_row_q4_1,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q4_1,
+        .from_float               = (ggml_from_float_t) quantize_row_q4_1,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q4_1_reference,
         .vec_dot                  = ggml_vec_dot_q4_1_q8_1,
         .vec_dot_type             = GGML_TYPE_Q8_1,
@@ -537,8 +552,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK5_0,
         .type_size                = sizeof(block_q5_0),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
-        .from_float               = quantize_row_q5_0,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q5_0,
+        .from_float               = (ggml_from_float_t) quantize_row_q5_0,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q5_0_reference,
         .vec_dot                  = ggml_vec_dot_q5_0_q8_0,
         .vec_dot_type             = GGML_TYPE_Q8_0,
@@ -549,8 +564,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK5_1,
         .type_size                = sizeof(block_q5_1),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
-        .from_float               = quantize_row_q5_1,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q5_1,
+        .from_float               = (ggml_from_float_t) quantize_row_q5_1,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q5_1_reference,
         .vec_dot                  = ggml_vec_dot_q5_1_q8_1,
         .vec_dot_type             = GGML_TYPE_Q8_1,
@@ -561,8 +576,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK8_0,
         .type_size                = sizeof(block_q8_0),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
-        .from_float               = quantize_row_q8_0,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q8_0,
+        .from_float               = (ggml_from_float_t) quantize_row_q8_0,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q8_0_reference,
         .vec_dot                  = ggml_vec_dot_q8_0_q8_0,
         .vec_dot_type             = GGML_TYPE_Q8_0,
@@ -577,7 +592,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK8_1,
         .type_size                = sizeof(block_q8_1),
         .is_quantized             = true,
-        .from_float               = quantize_row_q8_1,
+        .from_float               = (ggml_from_float_t) quantize_row_q8_1,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q8_1_reference,
         .vec_dot_type             = GGML_TYPE_Q8_1,
         .nrows                    = 1,
@@ -587,8 +602,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK_K,
         .type_size                = sizeof(block_q2_K),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
-        .from_float               = quantize_row_q2_K,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q2_K,
+        .from_float               = (ggml_from_float_t) quantize_row_q2_K,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q2_K_reference,
         .vec_dot                  = ggml_vec_dot_q2_K_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
@@ -599,8 +614,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK_K,
         .type_size                = sizeof(block_q3_K),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
-        .from_float               = quantize_row_q3_K,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q3_K,
+        .from_float               = (ggml_from_float_t) quantize_row_q3_K,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q3_K_reference,
         .vec_dot                  = ggml_vec_dot_q3_K_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
@@ -611,8 +626,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK_K,
         .type_size                = sizeof(block_q4_K),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
-        .from_float               = quantize_row_q4_K,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q4_K,
+        .from_float               = (ggml_from_float_t) quantize_row_q4_K,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q4_K_reference,
         .vec_dot                  = ggml_vec_dot_q4_K_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
@@ -623,8 +638,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK_K,
         .type_size                = sizeof(block_q5_K),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
-        .from_float               = quantize_row_q5_K,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q5_K,
+        .from_float               = (ggml_from_float_t) quantize_row_q5_K,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q5_K_reference,
         .vec_dot                  = ggml_vec_dot_q5_K_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
@@ -635,8 +650,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK_K,
         .type_size                = sizeof(block_q6_K),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
-        .from_float               = quantize_row_q6_K,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_q6_K,
+        .from_float               = (ggml_from_float_t) quantize_row_q6_K,
         .from_float_reference     = (ggml_from_float_t) quantize_row_q6_K_reference,
         .vec_dot                  = ggml_vec_dot_q6_K_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
@@ -671,9 +686,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK_K,
         .type_size                = sizeof(block_iq3_xxs),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
-        .from_float               = quantize_row_iq3_xxs,
-        .from_float_reference     = (ggml_from_float_t)quantize_row_iq3_xxs_reference,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_iq3_xxs,
+        .from_float               = (ggml_from_float_t) quantize_row_iq3_xxs,
+        .from_float_reference     = (ggml_from_float_t) quantize_row_iq3_xxs_reference,
         .vec_dot                  = ggml_vec_dot_iq3_xxs_q8_K,
         .vec_dot_type             = GGML_TYPE_Q8_K,
         .nrows                    = 1,
@@ -695,9 +710,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
         .blck_size                = QK4_NL,
         .type_size                = sizeof(block_iq4_nl),
         .is_quantized             = true,
-        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
-        .from_float               = quantize_row_iq4_nl,
-        .from_float_reference     = (ggml_from_float_t)quantize_row_iq4_nl_reference,
+        .to_float                 = (ggml_to_float_t)   dequantize_row_iq4_nl,
+        .from_float               = (ggml_from_float_t) quantize_row_iq4_nl,
+        .from_float_reference     = (ggml_from_float_t) quantize_row_iq4_nl_reference,
         .vec_dot                  = ggml_vec_dot_iq4_nl_q8_0,
         .vec_dot_type             = GGML_TYPE_Q8_0,
         .nrows                    = 1,
diff --git a/llama.cpp b/llama.cpp
@@ -5928,18 +5928,20 @@ struct llm_build_context {
 
         // get input vectors with right size
         const size_t stride1 = n_tokens * ggml_type_size(lctx.inp_tokens->type);
-        struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0);
+
+        struct ggml_tensor * inp_pos  = ggml_view_1d(ctx0, lctx.inp_pos,  n_tokens, 0);
         struct ggml_tensor * inp_mean = ggml_view_2d(ctx0, lctx.inp_mean, n_tokens, n_tokens, stride1, 0);
-        struct ggml_tensor * inp_cls = ggml_view_1d(ctx0, lctx.inp_cls, n_tokens, 0);
+        struct ggml_tensor * inp_cls  = ggml_view_1d(ctx0, lctx.inp_cls,  n_tokens, 0);
 
         // construct input embeddings (token, type, position)
         inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb);
 
         // token types are hardcoded to zero ("Sentence A")
         struct ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
         inpL = ggml_add(ctx0, inpL, type_row0);
+
         if (model.arch == LLM_ARCH_BERT) {
-            inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, inp_pos), inpL);
+            inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, ggml_cast(ctx0, inp_pos, GGML_TYPE_I32)), inpL);
         }
         cb(inpL, "inp_embd", -1);
 
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp
@@ -143,10 +143,10 @@ int main(int argc, char * argv[]) {
             continue;
         }
 
-        printf("Testing %s\n", ggml_type_name((ggml_type) i));
-        ggml_quantize_init(ei);
+        if (qfns.from_float && qfns.to_float && qfns.vec_dot) {
+            printf("Testing %s\n", ggml_type_name((ggml_type) i));
+            ggml_quantize_init(ei);
 
-        if (qfns.from_float && qfns.to_float) {
             const float total_error = total_quantization_error(qfns, test_size, test_data.data());
             const float max_quantization_error =
                 type == GGML_TYPE_Q2_K    ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp
@@ -275,7 +275,7 @@ int main(int argc, char * argv[]) {
             continue;
         }
 
-        if (qfns.from_float && qfns.to_float) {
+        if (qfns.from_float && qfns.to_float && qfns.vec_dot) {
             printf("%s\n", ggml_type_name(type));
 
             ggml_quantize_init(type);

Original file line number	Diff line number	Diff line change
`@@ -275,7 +275,7 @@ int main(int argc, char * argv[]) {`
`275`	`275`	`continue;`
`276`	`276`	`}`
`277`	`277`
`278`		`- if (qfns.from_float && qfns.to_float) {`
	`278`	`+ if (qfns.from_float && qfns.to_float && qfns.vec_dot) {`
`279`	`279`	`printf("%s\n", ggml_type_name(type));`
`280`	`280`
`281`	`281`	`ggml_quantize_init(type);`