@@ -13812,7 +13812,6 @@ void ggml_vec_dot_iq2_bn_q8_K64(int n, float * s, size_t bs, const void * vx, si
13812
13812
}
13813
13813
#endif */
13814
13814
13815
-
13816
13815
/* void ggml_vec_dot_iq2_k_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
13817
13816
assert(n % QK_K == 0);
13818
13817
assert(nrc == 1);
@@ -13829,60 +13828,6 @@ void ggml_vec_dot_iq2_bn_q8_K64(int n, float * s, size_t bs, const void * vx, si
13829
13828
13830
13829
}
13831
13830
13832
- void ggml_vec_dot_iq2_ks_q8_K(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
13833
- assert(n % QK_K == 0);
13834
- assert(nrc == 1);
13835
- UNUSED(nrc);
13836
- UNUSED(bx);
13837
- UNUSED(by);
13838
- UNUSED(bs);
13839
-
13840
- #if GGML_USE_IQK_MULMAT
13841
- if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_KS, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
13842
- return;
13843
- }
13844
- #endif
13845
-
13846
- const ggml_half * dptr = (const ggml_half *)vx;
13847
- const float d = GGML_FP16_TO_FP32(*dptr);
13848
- const block_iq2_ks * x = (const block_iq2_ks *)(dptr + 1);
13849
- const block_q8_K * y = (const block_q8_K *)vy;
13850
-
13851
- const int nb = n / QK_K;
13852
- float sumf = 0;
13853
- for (int i = 0; i < nb; i++) {
13854
- const uint8_t * qs = x[i].qs;
13855
- const int8_t * q8 = y[i].qs;
13856
- uint16_t extra = x[i].extra;
13857
- int sumi = 0;
13858
- for (int ib128 = 0; ib128 < QK_K/128; ++ib128) {
13859
- int d1 = (((x[i].scales[2*ib128+0] & 0xf) | ((extra >> 4) & 0x10)) - 16);
13860
- int d2 = (((x[i].scales[2*ib128+0] >> 4) | ((extra >> 5) & 0x10)) - 16);
13861
- int d3 = (((x[i].scales[2*ib128+1] & 0xf) | ((extra >> 6) & 0x10)) - 16);
13862
- int d4 = (((x[i].scales[2*ib128+1] >> 4) | ((extra >> 7) & 0x10)) - 16);
13863
- const int8_t * values1 = extra & 1 ? iq2nl_values + 4 : iq2nl_values;
13864
- const int8_t * values2 = extra & 2 ? iq2nl_values + 4 : iq2nl_values;
13865
- const int8_t * values3 = extra & 4 ? iq2nl_values + 4 : iq2nl_values;
13866
- const int8_t * values4 = extra & 8 ? iq2nl_values + 4 : iq2nl_values;
13867
- extra >>= 4;
13868
- int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0;
13869
- for (int j = 0; j < 32; ++j) {
13870
- sumi1 += q8[j+ 0] * values1[(qs[j] >> 0) & 3];
13871
- sumi2 += q8[j+32] * values2[(qs[j] >> 2) & 3];
13872
- sumi3 += q8[j+64] * values3[(qs[j] >> 4) & 3];
13873
- sumi4 += q8[j+96] * values4[(qs[j] >> 6) & 3];
13874
- }
13875
- sumi += d1*sumi1 + d2*sumi2 + d3*sumi3 + d4*sumi4;
13876
- q8 += 128;
13877
- qs += 32;
13878
- }
13879
- sumf += y[i].d * sumi;
13880
- }
13881
-
13882
- *s = d * sumf;
13883
-
13884
- }
13885
-
13886
13831
void ggml_vec_dot_iq3_k_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
13887
13832
assert(n % QK_K == 0);
13888
13833
assert(nrc == 1);
@@ -14080,113 +14025,3 @@ void ggml_vec_dot_iq6_k_q8_K(int n, float * s, size_t bs, const void * vx, size_
14080
14025
*s = sumf;
14081
14026
14082
14027
} */
14083
-
14084
- /* void ggml_vec_dot_iq4_ks_q8_K(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
14085
- constexpr int kBlockSize = 32;
14086
- #if GGML_USE_IQK_MULMAT
14087
- if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_KS, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
14088
- return;
14089
- }
14090
- #endif
14091
- assert(n%QK_K == 0);
14092
- assert(nrc == 1);
14093
- UNUSED(bs);
14094
- UNUSED(bx);
14095
- UNUSED(by);
14096
- const float * dptr = (const float *)vx;
14097
- const float d = *dptr;
14098
- //printf("%s: n = %d, d = %g\n", __func__, n, d);
14099
- const block_iq4_ks * x = (const block_iq4_ks *)(dptr + 1);
14100
- const block_q8_K * y = (const block_q8_K *)vy;
14101
- int nblock = n/QK_K;
14102
- float sumf = 0;
14103
- for (int ibl = 0; ibl < nblock; ++ibl) {
14104
- //int sumi = 0;
14105
- auto qy = y[ibl].qs;
14106
- auto qx = x[ibl].qs;
14107
- float db = d * y[ibl].d;
14108
- for (int ib = 0; ib < QK_K/kBlockSize; ++ib) {
14109
- float dl = db * ((x[ibl].scales[ib] & 254) - 127);
14110
- //int ls = (x[ibl].scales[ib] & 254) - 127;
14111
- const int8_t * values = iq4k_values + ((x[ibl].scales[ib] & 1) << 4);
14112
- int suml = 0;
14113
- for (int j = 0; j < kBlockSize/2; ++j) {
14114
- suml += qy[j ] * values[qx[j] & 0xf]
14115
- + qy[j + kBlockSize/2] * values[qx[j] >> 4];
14116
- }
14117
- sumf += dl * suml;
14118
- //sumi += ls * suml;
14119
- qy += kBlockSize;
14120
- qx += kBlockSize/2;
14121
- }
14122
- //sumf += d * y[ibl].d * sumi;
14123
- }
14124
- *s = sumf;
14125
- } */
14126
-
14127
- /* void ggml_vec_dot_iq4_kss_q8_K(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
14128
- #if GGML_USE_IQK_MULMAT
14129
- if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_KSS, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
14130
- return;
14131
- }
14132
- #endif
14133
- assert(n%QK_K == 0);
14134
- assert(nrc == 1);
14135
- UNUSED(bs);
14136
- UNUSED(bx);
14137
- UNUSED(by);
14138
- } */
14139
-
14140
- // ======================================= iq2_kt
14141
-
14142
- /* void ggml_vec_dot_iq2_kt_q8_K(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
14143
- assert(n % QK_K == 0);
14144
- assert(nrc == 1);
14145
- UNUSED(nrc);
14146
- UNUSED(bx);
14147
- UNUSED(by);
14148
- UNUSED(bs);
14149
-
14150
- #if GGML_USE_IQK_MULMAT
14151
- if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ2_KT, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
14152
- return;
14153
- }
14154
- #endif
14155
-
14156
- }
14157
-
14158
- // ======================================== iq3_kt
14159
-
14160
- void ggml_vec_dot_iq3_kt_q8_K(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
14161
- assert(n % QK_K == 0);
14162
- assert(nrc == 1);
14163
- UNUSED(nrc);
14164
- UNUSED(bx);
14165
- UNUSED(by);
14166
- UNUSED(bs);
14167
-
14168
- #if GGML_USE_IQK_MULMAT
14169
- if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ3_KT, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
14170
- return;
14171
- }
14172
- #endif
14173
-
14174
- }
14175
-
14176
- // ======================================== iq4_kt
14177
-
14178
- void ggml_vec_dot_iq4_kt_q8_K(int n, float * s, size_t bs, const void * vx, size_t bx, const void * vy, size_t by, int nrc) {
14179
- assert(n % QK_K == 0);
14180
- assert(nrc == 1);
14181
- UNUSED(nrc);
14182
- UNUSED(bx);
14183
- UNUSED(by);
14184
- UNUSED(bs);
14185
-
14186
- #if GGML_USE_IQK_MULMAT
14187
- if (iqk_mul_mat(1, 1, n, GGML_TYPE_IQ4_KT, vx, 0, GGML_TYPE_Q8_K, vy, 0, s, 0, 0, 1)) {
14188
- return;
14189
- }
14190
- #endif
14191
-
14192
- } */
0 commit comments