@@ -4190,15 +4190,18 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
4190
4190
sumf = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
4191
4191
#endif
4192
4192
for (; ib < nb; ++ib) {
4193
- int sumi = 0;
4193
+ int sumi0 = 0;
4194
+ int sumi1 = 0;
4194
4195
4195
4196
for (int j = 0; j < qk/2; ++j) {
4196
4197
const int v0 = (x[ib].qs[j] & 0x0F) - 8;
4197
4198
const int v1 = (x[ib].qs[j] >> 4) - 8;
4198
4199
4199
- sumi += (v0 * y[ib].qs[j]) + (v1 * y[ib].qs[j + qk/2]);
4200
+ sumi0 += (v0 * y[ib].qs[j]);
4201
+ sumi1 += (v1 * y[ib].qs[j + qk/2]);
4200
4202
}
4201
4203
4204
+ int sumi = sumi0 + sumi1;
4202
4205
sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d);
4203
4206
}
4204
4207
@@ -4474,15 +4477,18 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
4474
4477
sumf = hsum_float_8(acc) + summs;
4475
4478
#endif
4476
4479
for (; ib < nb; ++ib) {
4477
- int sumi = 0;
4480
+ int sumi0 = 0
4481
+ int sumi1 = 0;
4478
4482
4479
4483
for (int j = 0; j < qk/2; ++j) {
4480
4484
const int v0 = (x[ib].qs[j] & 0x0F);
4481
4485
const int v1 = (x[ib].qs[j] >> 4);
4482
4486
4483
- sumi += (v0 * y[ib].qs[j]) + (v1 * y[ib].qs[j + qk/2]);
4487
+ sumi0 += (v0 * y[ib].qs[j]);
4488
+ sumi1 += (v1 * y[ib].qs[j + qk/2]);
4484
4489
}
4485
4490
4491
+ int sumi = sumi0 + sumi1;
4486
4492
sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s);
4487
4493
}
4488
4494
@@ -4823,18 +4829,21 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
4823
4829
uint32_t qh;
4824
4830
memcpy(&qh, x[ib].qh, sizeof(qh));
4825
4831
4826
- int sumi = 0;
4832
+ int sumi0 = 0;
4833
+ int sumi1 = 0;
4827
4834
4828
4835
for (int j = 0; j < qk/2; ++j) {
4829
4836
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
4830
4837
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
4831
4838
4832
- const int32_t x0 = (( x[ib].qs[j] & 0x0F) | xh_0) - 16;
4833
- const int32_t x1 = (( x[ib].qs[j] >> 4) | xh_1) - 16;
4839
+ const int32_t x0 = (int8_t)((( x[ib].qs[j] & 0x0F) | xh_0) - 16) ;
4840
+ const int32_t x1 = (int8_t)((( x[ib].qs[j] >> 4) | xh_1) - 16) ;
4834
4841
4835
- sumi += (x0 * y[ib].qs[j]) + (x1 * y[ib].qs[j + qk/2]);
4842
+ sumi0 += (x0 * y[ib].qs[j]);
4843
+ sumi1 += (x1 * y[ib].qs[j + qk/2]);
4836
4844
}
4837
4845
4846
+ int sumi = sumi0 + sumi1;
4838
4847
sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi;
4839
4848
}
4840
4849
@@ -5194,7 +5203,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
5194
5203
uint32_t qh;
5195
5204
memcpy(&qh, x[ib].qh, sizeof(qh));
5196
5205
5197
- int sumi = 0;
5206
+ int sumi0 = 0;
5207
+ int sumi1 = 0;
5198
5208
5199
5209
for (int j = 0; j < qk/2; ++j) {
5200
5210
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
@@ -5203,9 +5213,11 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
5203
5213
const int32_t x0 = (x[ib].qs[j] & 0xF) | xh_0;
5204
5214
const int32_t x1 = (x[ib].qs[j] >> 4) | xh_1;
5205
5215
5206
- sumi += (x0 * y[ib].qs[j]) + (x1 * y[ib].qs[j + qk/2]);
5216
+ sumi0 += (x0 * y[ib].qs[j]);
5217
+ sumi1 += (x1 * y[ib].qs[j + qk/2]);
5207
5218
}
5208
5219
5220
+ int sumi = sumi0 + sumi1;
5209
5221
sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s);
5210
5222
}
5211
5223
0 commit comments