@@ -6546,11 +6546,11 @@ void ggml_compute_forward_im2col_back_f32(
6546
6546
}
6547
6547
}
6548
6548
6549
- static void ggml_call_mul_mat (ggml_type T , const ggml_compute_params * params, int64_t m, int64_t n, int64_t k,
6550
- void * a, void * b, void * c) {
6551
- const ggml_type_traits * traits = ggml_get_type_traits (T );
6549
+ static void ggml_call_mul_mat (ggml_type type , const ggml_compute_params * params, int64_t m, int64_t n, int64_t k,
6550
+ void * a, void * b, float * c) {
6551
+ const ggml_type_traits * traits = ggml_get_type_traits (type );
6552
6552
struct ggml_tensor src1 = {};
6553
- src1.type = T ;
6553
+ src1.type = type ;
6554
6554
src1.ne [0 ] = k;
6555
6555
src1.ne [1 ] = m;
6556
6556
src1.ne [2 ] = 1 ;
@@ -6562,7 +6562,7 @@ static void ggml_call_mul_mat(ggml_type T, const ggml_compute_params * params, i
6562
6562
src1.data = a;
6563
6563
6564
6564
struct ggml_tensor src0 = {};
6565
- src0.type = T ;
6565
+ src0.type = type ;
6566
6566
src0.ne [0 ] = k;
6567
6567
src0.ne [1 ] = n;
6568
6568
src0.ne [2 ] = 1 ;
@@ -6598,6 +6598,7 @@ static void ggml_compute_forward_conv_2d_impl(const ggml_compute_params * params
6598
6598
ggml_type kernel_type) {
6599
6599
6600
6600
GGML_ASSERT (ggml_is_contiguous (kernel));
6601
+ GGML_ASSERT (kernel_type == GGML_TYPE_F16 || kernel_type == GGML_TYPE_F32);
6601
6602
GGML_ASSERT (kernel->type == kernel_type);
6602
6603
6603
6604
const ggml_type_traits * traits = ggml_get_type_traits (kernel_type);
@@ -6620,9 +6621,9 @@ static void ggml_compute_forward_conv_2d_impl(const ggml_compute_params * params
6620
6621
const int64_t dst_w = dst->ne [0 ];
6621
6622
const int64_t dst_h = dst->ne [1 ];
6622
6623
6623
- float * src_data = (float *) src->data ;
6624
- void * knl_data = kernel->data ;
6625
- float * dst_data = (float *) dst->data ;
6624
+ const float * src_data = (float *) src->data ;
6625
+ void * knl_data = kernel->data ;
6626
+ float * dst_data = (float *) dst->data ;
6626
6627
6627
6628
const int64_t knl_n = knl_w * knl_h * c_in;
6628
6629
const int64_t patch_total = dst->ne [3 ] * dst_w * dst_h;
@@ -6653,8 +6654,8 @@ static void ggml_compute_forward_conv_2d_impl(const ggml_compute_params * params
6653
6654
const int64_t src_x = (p / dst_w) % dst_h;
6654
6655
const int64_t src_y = p % dst_w;
6655
6656
6656
- float * src_base = (float *)((char *)src_data + batch_n * src->nb [3 ]);
6657
- char * dst_row = (char *) tmp + (p % patches_per_batch) * knl_n * traits->type_size ;
6657
+ const float * src_base = (const float *)((const char *)src_data + batch_n * src->nb [3 ]);
6658
+ char * dst_row = (char *) tmp + (p % patches_per_batch) * knl_n * traits->type_size ;
6658
6659
6659
6660
for (int64_t ic = 0 ; ic < c_in; ++ic) {
6660
6661
for (int64_t ky = 0 ; ky < knl_h; ++ky) {
@@ -6668,15 +6669,15 @@ static void ggml_compute_forward_conv_2d_impl(const ggml_compute_params * params
6668
6669
if (sy < 0 || sy >= src_h || sx < 0 || sx >= src_w) {
6669
6670
src_val = 0 .0f ;
6670
6671
} else {
6671
- float * src_ptr = (float *)((char *)src_base + sx * src->nb [0 ] + sy * src->nb [1 ] + ic * src->nb [2 ]);
6672
- src_val = *src_ptr;
6672
+ const float * src_ptr = (const float *)((const char *)src_base + sx * src->nb [0 ] + sy * src->nb [1 ] + ic * src->nb [2 ]);
6673
+ src_val = *src_ptr;
6673
6674
}
6674
6675
6675
6676
char * element_ptr = dst_row + dst_idx * traits->type_size ;
6676
6677
if (kernel_type == GGML_TYPE_F32) {
6677
6678
*(float *) element_ptr = src_val;
6678
6679
} else if (kernel_type == GGML_TYPE_F16) {
6679
- *(ggml_fp16_t *) element_ptr = GGML_FP32_TO_FP16 (src_val);
6680
+ *(ggml_fp16_t *) element_ptr = GGML_CPU_FP32_TO_FP16 (src_val);
6680
6681
}
6681
6682
}
6682
6683
}
0 commit comments