@@ -13085,6 +13085,12 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
13085
13085
}
13086
13086
}
13087
13087
13088
+ static int iq1_sort_helper(const void * left, const void * right) {
13089
+ const float * l = left;
13090
+ const float * r = right;
13091
+ return *l < *r ? -1 : *l > *r ? 1 : 0;
13092
+ }
13093
+
13088
13094
static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights) {
13089
13095
13090
13096
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
@@ -13114,6 +13120,9 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
13114
13120
bool is_on_grid_aux[2];
13115
13121
uint8_t block_signs[2];
13116
13122
uint16_t q2[2*(QK_K/16)];
13123
+ uint16_t index[2], aux_index[2];
13124
+ float sumx[17], sumw[17], pairs[32];
13125
+ int * int_pairs = (int *)(pairs + 1);
13117
13126
13118
13127
for (int ibl = 0; ibl < nbl; ++ibl) {
13119
13128
@@ -13166,11 +13175,35 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
13166
13175
memset(L, 0, 16);
13167
13176
continue;
13168
13177
}
13169
- float best = 0;
13170
- float scale = max/(2*kMaxQ-1);
13178
+ for (int j = 0; j < 16; ++j) {
13179
+ pairs[2*j] = xval[j];
13180
+ int_pairs[2*j] = j;
13181
+ }
13182
+ qsort(pairs, 16, 2*sizeof(float), iq1_sort_helper);
13183
+ {
13184
+ sumx[0] = sumw[0] = 0;
13185
+ for (int j = 0; j < 16; ++j) {
13186
+ int i = int_pairs[2*j];
13187
+ sumx[j+1] = sumx[j] + weight[i]*xval[i];
13188
+ sumw[j+1] = sumw[j] + weight[i];
13189
+ }
13190
+ }
13191
+ float best = 0, scale = 0;
13192
+ for (int i1 = 0; i1 <= 16; ++i1) {
13193
+ for (int i2 = i1; i2 <= 16; ++i2) {
13194
+ float sumqx = (sumx[i1] - sumx[0])*1 + (sumx[i2] - sumx[i1])*3 + (sumx[16] - sumx[i2])*5;
13195
+ float sumq2 = (sumw[i1] - sumw[0])*1 + (sumw[i2] - sumw[i1])*9 + (sumw[16] - sumw[i2])*25;
13196
+ if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
13197
+ scale = sumqx/sumq2; best = scale*sumqx;
13198
+ }
13199
+ }
13200
+ }
13201
+ best = 0;
13202
+ float eff_max = scale*(2*kMaxQ - 1);
13171
13203
is_on_grid[0] = is_on_grid[1] = true;
13172
- for (int is = -9; is <= 9; ++is) {
13173
- float id = (2*kMaxQ-1+is*0.1f)/max;
13204
+ index[0] = index[1] = 0;
13205
+ for (int is = -7; is <= 7; ++is) {
13206
+ float id = (2*kMaxQ-1+is*0.1f)/eff_max;
13174
13207
float this_scale = 1/id;
13175
13208
for (int k = 0; k < 2; ++k) {
13176
13209
for (int i = 0; i < 8; ++i) {
@@ -13186,6 +13219,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
13186
13219
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
13187
13220
grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, this_scale, Laux + 8*k);
13188
13221
}
13222
+ aux_index[k] = grid_index;
13189
13223
}
13190
13224
float sumqx = 0, sumq2 = 0;
13191
13225
for (int i = 0; i < 16; ++i) {
@@ -13198,35 +13232,45 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
13198
13232
scale = sumqx/sumq2; best = scale*sumqx;
13199
13233
for (int i = 0; i < 16; ++i) L[i] = Laux[i];
13200
13234
for (int k = 0; k < 2; ++k) is_on_grid[k] = is_on_grid_aux[k];
13235
+ for (int k = 0; k < 2; ++k) index[k] = aux_index[k];
13201
13236
}
13202
13237
}
13203
- int n_not_ongrid = 0;
13204
- for (int k = 0; k < 2; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
13205
- if (n_not_ongrid > 0 && scale > 0) {
13206
- float id = 1/scale;
13207
- for (int k = 0; k < 2; ++k) {
13208
- if (is_on_grid[k]) continue;
13209
- uint16_t u = 0;
13210
- for (int i = 0; i < 8; ++i) {
13211
- int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
13212
- l = MAX(0, MIN(kMaxQ-1, l));
13213
- u |= (l << 2*i);
13214
- L[8*k + i] = l;
13238
+ if (scale) {
13239
+ for (int iter = 0; iter < 3; ++iter) {
13240
+ float id = 1/scale;
13241
+ bool changed = false;
13242
+ for (int k = 0; k < 2; ++k) {
13243
+ uint16_t u = 0;
13244
+ for (int i = 0; i < 8; ++i) {
13245
+ int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
13246
+ l = MAX(0, MIN(kMaxQ-1, l));
13247
+ u |= (l << 2*i);
13248
+ Laux[8*k + i] = l;
13249
+ }
13250
+ int grid_index = kmap_q2xs[u];
13251
+ if (grid_index < 0) {
13252
+ const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
13253
+ grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, Laux + 8*k);
13254
+ }
13255
+ aux_index[k] = grid_index;
13256
+ if (grid_index != index[k]) changed = true;
13215
13257
}
13216
- int grid_index = kmap_q2xs[u];
13217
- if (grid_index < 0) {
13218
- const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
13219
- grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, L + 8*k);
13258
+ if (!changed) break;
13259
+ float sumqx = 0, sumq2 = 0;
13260
+ for (int i = 0; i < 16; ++i) {
13261
+ float w = weight[i];
13262
+ float q = 2*Laux[i] + 1;
13263
+ sumqx += w*xval[i]*q;
13264
+ sumq2 += w*q*q;
13220
13265
}
13266
+ if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
13267
+ scale = sumqx/sumq2;
13268
+ best = scale*sumqx;
13269
+ memcpy(L, Laux, 16);
13270
+ for (int k = 0; k < 2; ++k) index[k] = aux_index[k];
13271
+ }
13272
+ else break;
13221
13273
}
13222
- float sumqx = 0, sumq2 = 0;
13223
- for (int i = 0; i < 16; ++i) {
13224
- float w = weight[i];
13225
- float q = 2*L[i] + 1;
13226
- sumqx += w*xval[i]*q;
13227
- sumq2 += w*q*q;
13228
- }
13229
- if (sumq2 > 0) scale = sumqx/sumq2;
13230
13274
}
13231
13275
if (scale < 0) {
13232
13276
scale = -scale;
@@ -13257,13 +13301,34 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
13257
13301
float d = max_scale/31;
13258
13302
y[ibl].d = GGML_FP32_TO_FP16(d);
13259
13303
float id = 1/d;
13304
+ float sumqx = 0, sumq2 = 0;
13260
13305
for (int ib = 0; ib < QK_K/16; ++ib) {
13261
13306
int l = nearest_int(0.5f*(id*scales[ib]-1));
13262
13307
l = MAX(0, MIN(15, l));
13263
13308
if (ib%2 == 0) y[ibl].scales[ib/2] = l;
13264
13309
else y[ibl].scales[ib/2] |= (l << 4);
13310
+ l = 2*l + 1;
13311
+ const float * xb = xbl + 16*ib;
13312
+ if (quant_weights) {
13313
+ const float * qw = quant_weights + QK_K*ibl + 16*ib;
13314
+ for (int i = 0; i < 16; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
13315
+ } else {
13316
+ for (int i = 0; i < 16; ++i) weight[i] = 0.25f*sigma2 + xb[i]*xb[i];
13317
+ }
13318
+ for (int k = 0; k < 2; ++k) {
13319
+ int grid_index = q2[2*ib+k] & 511;
13320
+ const int8_t * grid = (const int8_t *)(iq2xs_grid + grid_index);
13321
+ const uint8_t signs = ksigns_iq2xs[q2[2*ib+k] >> 9];
13322
+ for (int j = 0; j < 8; ++j) {
13323
+ float w = weight[8*k+j];
13324
+ float q = 0.125f*l*grid[j]*(signs & kmask_iq2xs[j] ? -1.f : 1.f);
13325
+ sumqx += w*q*xb[8*k+j];
13326
+ sumq2 += w*q*q;
13327
+ }
13328
+ }
13265
13329
}
13266
13330
memcpy(y[ibl].qs, q2, QK_K/4);
13331
+ if (sumq2 > 0) y[ibl].d = GGML_FP32_TO_FP16(1.05f*sumqx/sumq2);
13267
13332
13268
13333
}
13269
13334
}
@@ -14103,12 +14168,6 @@ static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const
14103
14168
return grid_index;
14104
14169
}
14105
14170
14106
- static int iq1_sort_helper(const void * left, const void * right) {
14107
- const float * l = left;
14108
- const float * r = right;
14109
- return *l < *r ? -1 : *l > *r ? 1 : 0;
14110
- }
14111
-
14112
14171
void iq1s_process_1block(int block_size, const float * xb, const float * weight, int8_t * L, float * the_scale, uint16_t * the_index, int * the_shift,
14113
14172
float * pairs, float * sumx, float * sumw) {
14114
14173
float max = fabsf(xb[0]);
0 commit comments