Skip to content

Commit 8772658

Browse files
committed
ggml : add I32 <-> F32 conversion
ggml-ci
1 parent fc77536 commit 8772658

File tree

4 files changed

+53
-36
lines changed

4 files changed

+53
-36
lines changed

ggml.c

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,18 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
355355
}
356356
}
357357

358+
static void ggml_i32_to_f32_row(const int32_t * x, float * y, int n) {
359+
for (int i = 0; i < n; i++) {
360+
y[i] = (float) x[i];
361+
}
362+
}
363+
364+
static void ggml_f32_to_i32_row(const float * x, int32_t * y, int n) {
365+
for (int i = 0; i < n; i++) {
366+
y[i] = (int32_t) x[i];
367+
}
368+
}
369+
358370
//
359371
// timing
360372
//
@@ -454,6 +466,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
454466
.blck_size = 1,
455467
.type_size = sizeof(int32_t),
456468
.is_quantized = false,
469+
.to_float = (ggml_to_float_t) ggml_i32_to_f32_row,
470+
.from_float = (ggml_from_float_t) ggml_f32_to_i32_row,
471+
.from_float_reference = (ggml_from_float_t) ggml_f32_to_i32_row,
457472
},
458473
[GGML_TYPE_F32] = {
459474
.type_name = "f32",
@@ -469,10 +484,10 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
469484
.blck_size = 1,
470485
.type_size = sizeof(ggml_fp16_t),
471486
.is_quantized = false,
472-
.to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
487+
.to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
473488
.from_float = (ggml_from_float_t) ggml_fp32_to_fp16_row,
474489
.from_float_reference = (ggml_from_float_t) ggml_fp32_to_fp16_row,
475-
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16,
490+
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16,
476491
.vec_dot_type = GGML_TYPE_F16,
477492
.nrows = 1,
478493
},
@@ -481,8 +496,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
481496
.blck_size = QK4_0,
482497
.type_size = sizeof(block_q4_0),
483498
.is_quantized = true,
484-
.to_float = (ggml_to_float_t) dequantize_row_q4_0,
485-
.from_float = quantize_row_q4_0,
499+
.to_float = (ggml_to_float_t) dequantize_row_q4_0,
500+
.from_float = (ggml_from_float_t) quantize_row_q4_0,
486501
.from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference,
487502
.vec_dot = ggml_vec_dot_q4_0_q8_0,
488503
.vec_dot_type = GGML_TYPE_Q8_0,
@@ -497,8 +512,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
497512
.blck_size = QK4_1,
498513
.type_size = sizeof(block_q4_1),
499514
.is_quantized = true,
500-
.to_float = (ggml_to_float_t) dequantize_row_q4_1,
501-
.from_float = quantize_row_q4_1,
515+
.to_float = (ggml_to_float_t) dequantize_row_q4_1,
516+
.from_float = (ggml_from_float_t) quantize_row_q4_1,
502517
.from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference,
503518
.vec_dot = ggml_vec_dot_q4_1_q8_1,
504519
.vec_dot_type = GGML_TYPE_Q8_1,
@@ -537,8 +552,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
537552
.blck_size = QK5_0,
538553
.type_size = sizeof(block_q5_0),
539554
.is_quantized = true,
540-
.to_float = (ggml_to_float_t) dequantize_row_q5_0,
541-
.from_float = quantize_row_q5_0,
555+
.to_float = (ggml_to_float_t) dequantize_row_q5_0,
556+
.from_float = (ggml_from_float_t) quantize_row_q5_0,
542557
.from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference,
543558
.vec_dot = ggml_vec_dot_q5_0_q8_0,
544559
.vec_dot_type = GGML_TYPE_Q8_0,
@@ -549,8 +564,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
549564
.blck_size = QK5_1,
550565
.type_size = sizeof(block_q5_1),
551566
.is_quantized = true,
552-
.to_float = (ggml_to_float_t) dequantize_row_q5_1,
553-
.from_float = quantize_row_q5_1,
567+
.to_float = (ggml_to_float_t) dequantize_row_q5_1,
568+
.from_float = (ggml_from_float_t) quantize_row_q5_1,
554569
.from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference,
555570
.vec_dot = ggml_vec_dot_q5_1_q8_1,
556571
.vec_dot_type = GGML_TYPE_Q8_1,
@@ -561,8 +576,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
561576
.blck_size = QK8_0,
562577
.type_size = sizeof(block_q8_0),
563578
.is_quantized = true,
564-
.to_float = (ggml_to_float_t) dequantize_row_q8_0,
565-
.from_float = quantize_row_q8_0,
579+
.to_float = (ggml_to_float_t) dequantize_row_q8_0,
580+
.from_float = (ggml_from_float_t) quantize_row_q8_0,
566581
.from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference,
567582
.vec_dot = ggml_vec_dot_q8_0_q8_0,
568583
.vec_dot_type = GGML_TYPE_Q8_0,
@@ -577,7 +592,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
577592
.blck_size = QK8_1,
578593
.type_size = sizeof(block_q8_1),
579594
.is_quantized = true,
580-
.from_float = quantize_row_q8_1,
595+
.from_float = (ggml_from_float_t) quantize_row_q8_1,
581596
.from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference,
582597
.vec_dot_type = GGML_TYPE_Q8_1,
583598
.nrows = 1,
@@ -587,8 +602,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
587602
.blck_size = QK_K,
588603
.type_size = sizeof(block_q2_K),
589604
.is_quantized = true,
590-
.to_float = (ggml_to_float_t) dequantize_row_q2_K,
591-
.from_float = quantize_row_q2_K,
605+
.to_float = (ggml_to_float_t) dequantize_row_q2_K,
606+
.from_float = (ggml_from_float_t) quantize_row_q2_K,
592607
.from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference,
593608
.vec_dot = ggml_vec_dot_q2_K_q8_K,
594609
.vec_dot_type = GGML_TYPE_Q8_K,
@@ -599,8 +614,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
599614
.blck_size = QK_K,
600615
.type_size = sizeof(block_q3_K),
601616
.is_quantized = true,
602-
.to_float = (ggml_to_float_t) dequantize_row_q3_K,
603-
.from_float = quantize_row_q3_K,
617+
.to_float = (ggml_to_float_t) dequantize_row_q3_K,
618+
.from_float = (ggml_from_float_t) quantize_row_q3_K,
604619
.from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference,
605620
.vec_dot = ggml_vec_dot_q3_K_q8_K,
606621
.vec_dot_type = GGML_TYPE_Q8_K,
@@ -611,8 +626,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
611626
.blck_size = QK_K,
612627
.type_size = sizeof(block_q4_K),
613628
.is_quantized = true,
614-
.to_float = (ggml_to_float_t) dequantize_row_q4_K,
615-
.from_float = quantize_row_q4_K,
629+
.to_float = (ggml_to_float_t) dequantize_row_q4_K,
630+
.from_float = (ggml_from_float_t) quantize_row_q4_K,
616631
.from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference,
617632
.vec_dot = ggml_vec_dot_q4_K_q8_K,
618633
.vec_dot_type = GGML_TYPE_Q8_K,
@@ -623,8 +638,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
623638
.blck_size = QK_K,
624639
.type_size = sizeof(block_q5_K),
625640
.is_quantized = true,
626-
.to_float = (ggml_to_float_t) dequantize_row_q5_K,
627-
.from_float = quantize_row_q5_K,
641+
.to_float = (ggml_to_float_t) dequantize_row_q5_K,
642+
.from_float = (ggml_from_float_t) quantize_row_q5_K,
628643
.from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference,
629644
.vec_dot = ggml_vec_dot_q5_K_q8_K,
630645
.vec_dot_type = GGML_TYPE_Q8_K,
@@ -635,8 +650,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
635650
.blck_size = QK_K,
636651
.type_size = sizeof(block_q6_K),
637652
.is_quantized = true,
638-
.to_float = (ggml_to_float_t) dequantize_row_q6_K,
639-
.from_float = quantize_row_q6_K,
653+
.to_float = (ggml_to_float_t) dequantize_row_q6_K,
654+
.from_float = (ggml_from_float_t) quantize_row_q6_K,
640655
.from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference,
641656
.vec_dot = ggml_vec_dot_q6_K_q8_K,
642657
.vec_dot_type = GGML_TYPE_Q8_K,
@@ -671,9 +686,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
671686
.blck_size = QK_K,
672687
.type_size = sizeof(block_iq3_xxs),
673688
.is_quantized = true,
674-
.to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
675-
.from_float = quantize_row_iq3_xxs,
676-
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_xxs_reference,
689+
.to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
690+
.from_float = (ggml_from_float_t) quantize_row_iq3_xxs,
691+
.from_float_reference = (ggml_from_float_t) quantize_row_iq3_xxs_reference,
677692
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
678693
.vec_dot_type = GGML_TYPE_Q8_K,
679694
.nrows = 1,
@@ -695,9 +710,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
695710
.blck_size = QK4_NL,
696711
.type_size = sizeof(block_iq4_nl),
697712
.is_quantized = true,
698-
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
699-
.from_float = quantize_row_iq4_nl,
700-
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_nl_reference,
713+
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
714+
.from_float = (ggml_from_float_t) quantize_row_iq4_nl,
715+
.from_float_reference = (ggml_from_float_t) quantize_row_iq4_nl_reference,
701716
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
702717
.vec_dot_type = GGML_TYPE_Q8_0,
703718
.nrows = 1,

llama.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5928,18 +5928,20 @@ struct llm_build_context {
59285928

59295929
// get input vectors with right size
59305930
const size_t stride1 = n_tokens * ggml_type_size(lctx.inp_tokens->type);
5931-
struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0);
5931+
5932+
struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0);
59325933
struct ggml_tensor * inp_mean = ggml_view_2d(ctx0, lctx.inp_mean, n_tokens, n_tokens, stride1, 0);
5933-
struct ggml_tensor * inp_cls = ggml_view_1d(ctx0, lctx.inp_cls, n_tokens, 0);
5934+
struct ggml_tensor * inp_cls = ggml_view_1d(ctx0, lctx.inp_cls, n_tokens, 0);
59345935

59355936
// construct input embeddings (token, type, position)
59365937
inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb);
59375938

59385939
// token types are hardcoded to zero ("Sentence A")
59395940
struct ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
59405941
inpL = ggml_add(ctx0, inpL, type_row0);
5942+
59415943
if (model.arch == LLM_ARCH_BERT) {
5942-
inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, inp_pos), inpL);
5944+
inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, ggml_cast(ctx0, inp_pos, GGML_TYPE_I32)), inpL);
59435945
}
59445946
cb(inpL, "inp_embd", -1);
59455947

tests/test-quantize-fns.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,10 @@ int main(int argc, char * argv[]) {
143143
continue;
144144
}
145145

146-
printf("Testing %s\n", ggml_type_name((ggml_type) i));
147-
ggml_quantize_init(ei);
146+
if (qfns.from_float && qfns.to_float && qfns.vec_dot) {
147+
printf("Testing %s\n", ggml_type_name((ggml_type) i));
148+
ggml_quantize_init(ei);
148149

149-
if (qfns.from_float && qfns.to_float) {
150150
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
151151
const float max_quantization_error =
152152
type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :

tests/test-quantize-perf.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ int main(int argc, char * argv[]) {
275275
continue;
276276
}
277277

278-
if (qfns.from_float && qfns.to_float) {
278+
if (qfns.from_float && qfns.to_float && qfns.vec_dot) {
279279
printf("%s\n", ggml_type_name(type));
280280

281281
ggml_quantize_init(type);

0 commit comments

Comments
 (0)