Skip to content

Commit 2309ecd

Browse files
ikawrakowIwan Kawrakow
andauthored
Better iq2_xs quantization (ikawrakow#312)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
1 parent a051f08 commit 2309ecd

File tree

1 file changed

+93
-34
lines changed

1 file changed

+93
-34
lines changed

ggml/src/ggml-quants.c

Lines changed: 93 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13085,6 +13085,12 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
1308513085
}
1308613086
}
1308713087

13088+
static int iq1_sort_helper(const void * left, const void * right) {
13089+
const float * l = left;
13090+
const float * r = right;
13091+
return *l < *r ? -1 : *l > *r ? 1 : 0;
13092+
}
13093+
1308813094
static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights) {
1308913095

1309013096
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
@@ -13114,6 +13120,9 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
1311413120
bool is_on_grid_aux[2];
1311513121
uint8_t block_signs[2];
1311613122
uint16_t q2[2*(QK_K/16)];
13123+
uint16_t index[2], aux_index[2];
13124+
float sumx[17], sumw[17], pairs[32];
13125+
int * int_pairs = (int *)(pairs + 1);
1311713126

1311813127
for (int ibl = 0; ibl < nbl; ++ibl) {
1311913128

@@ -13166,11 +13175,35 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
1316613175
memset(L, 0, 16);
1316713176
continue;
1316813177
}
13169-
float best = 0;
13170-
float scale = max/(2*kMaxQ-1);
13178+
for (int j = 0; j < 16; ++j) {
13179+
pairs[2*j] = xval[j];
13180+
int_pairs[2*j] = j;
13181+
}
13182+
qsort(pairs, 16, 2*sizeof(float), iq1_sort_helper);
13183+
{
13184+
sumx[0] = sumw[0] = 0;
13185+
for (int j = 0; j < 16; ++j) {
13186+
int i = int_pairs[2*j];
13187+
sumx[j+1] = sumx[j] + weight[i]*xval[i];
13188+
sumw[j+1] = sumw[j] + weight[i];
13189+
}
13190+
}
13191+
float best = 0, scale = 0;
13192+
for (int i1 = 0; i1 <= 16; ++i1) {
13193+
for (int i2 = i1; i2 <= 16; ++i2) {
13194+
float sumqx = (sumx[i1] - sumx[0])*1 + (sumx[i2] - sumx[i1])*3 + (sumx[16] - sumx[i2])*5;
13195+
float sumq2 = (sumw[i1] - sumw[0])*1 + (sumw[i2] - sumw[i1])*9 + (sumw[16] - sumw[i2])*25;
13196+
if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
13197+
scale = sumqx/sumq2; best = scale*sumqx;
13198+
}
13199+
}
13200+
}
13201+
best = 0;
13202+
float eff_max = scale*(2*kMaxQ - 1);
1317113203
is_on_grid[0] = is_on_grid[1] = true;
13172-
for (int is = -9; is <= 9; ++is) {
13173-
float id = (2*kMaxQ-1+is*0.1f)/max;
13204+
index[0] = index[1] = 0;
13205+
for (int is = -7; is <= 7; ++is) {
13206+
float id = (2*kMaxQ-1+is*0.1f)/eff_max;
1317413207
float this_scale = 1/id;
1317513208
for (int k = 0; k < 2; ++k) {
1317613209
for (int i = 0; i < 8; ++i) {
@@ -13186,6 +13219,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
1318613219
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
1318713220
grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, this_scale, Laux + 8*k);
1318813221
}
13222+
aux_index[k] = grid_index;
1318913223
}
1319013224
float sumqx = 0, sumq2 = 0;
1319113225
for (int i = 0; i < 16; ++i) {
@@ -13198,35 +13232,45 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
1319813232
scale = sumqx/sumq2; best = scale*sumqx;
1319913233
for (int i = 0; i < 16; ++i) L[i] = Laux[i];
1320013234
for (int k = 0; k < 2; ++k) is_on_grid[k] = is_on_grid_aux[k];
13235+
for (int k = 0; k < 2; ++k) index[k] = aux_index[k];
1320113236
}
1320213237
}
13203-
int n_not_ongrid = 0;
13204-
for (int k = 0; k < 2; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
13205-
if (n_not_ongrid > 0 && scale > 0) {
13206-
float id = 1/scale;
13207-
for (int k = 0; k < 2; ++k) {
13208-
if (is_on_grid[k]) continue;
13209-
uint16_t u = 0;
13210-
for (int i = 0; i < 8; ++i) {
13211-
int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
13212-
l = MAX(0, MIN(kMaxQ-1, l));
13213-
u |= (l << 2*i);
13214-
L[8*k + i] = l;
13238+
if (scale) {
13239+
for (int iter = 0; iter < 3; ++iter) {
13240+
float id = 1/scale;
13241+
bool changed = false;
13242+
for (int k = 0; k < 2; ++k) {
13243+
uint16_t u = 0;
13244+
for (int i = 0; i < 8; ++i) {
13245+
int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
13246+
l = MAX(0, MIN(kMaxQ-1, l));
13247+
u |= (l << 2*i);
13248+
Laux[8*k + i] = l;
13249+
}
13250+
int grid_index = kmap_q2xs[u];
13251+
if (grid_index < 0) {
13252+
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
13253+
grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, Laux + 8*k);
13254+
}
13255+
aux_index[k] = grid_index;
13256+
if (grid_index != index[k]) changed = true;
1321513257
}
13216-
int grid_index = kmap_q2xs[u];
13217-
if (grid_index < 0) {
13218-
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
13219-
grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, L + 8*k);
13258+
if (!changed) break;
13259+
float sumqx = 0, sumq2 = 0;
13260+
for (int i = 0; i < 16; ++i) {
13261+
float w = weight[i];
13262+
float q = 2*Laux[i] + 1;
13263+
sumqx += w*xval[i]*q;
13264+
sumq2 += w*q*q;
1322013265
}
13266+
if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
13267+
scale = sumqx/sumq2;
13268+
best = scale*sumqx;
13269+
memcpy(L, Laux, 16);
13270+
for (int k = 0; k < 2; ++k) index[k] = aux_index[k];
13271+
}
13272+
else break;
1322113273
}
13222-
float sumqx = 0, sumq2 = 0;
13223-
for (int i = 0; i < 16; ++i) {
13224-
float w = weight[i];
13225-
float q = 2*L[i] + 1;
13226-
sumqx += w*xval[i]*q;
13227-
sumq2 += w*q*q;
13228-
}
13229-
if (sumq2 > 0) scale = sumqx/sumq2;
1323013274
}
1323113275
if (scale < 0) {
1323213276
scale = -scale;
@@ -13257,13 +13301,34 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
1325713301
float d = max_scale/31;
1325813302
y[ibl].d = GGML_FP32_TO_FP16(d);
1325913303
float id = 1/d;
13304+
float sumqx = 0, sumq2 = 0;
1326013305
for (int ib = 0; ib < QK_K/16; ++ib) {
1326113306
int l = nearest_int(0.5f*(id*scales[ib]-1));
1326213307
l = MAX(0, MIN(15, l));
1326313308
if (ib%2 == 0) y[ibl].scales[ib/2] = l;
1326413309
else y[ibl].scales[ib/2] |= (l << 4);
13310+
l = 2*l + 1;
13311+
const float * xb = xbl + 16*ib;
13312+
if (quant_weights) {
13313+
const float * qw = quant_weights + QK_K*ibl + 16*ib;
13314+
for (int i = 0; i < 16; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
13315+
} else {
13316+
for (int i = 0; i < 16; ++i) weight[i] = 0.25f*sigma2 + xb[i]*xb[i];
13317+
}
13318+
for (int k = 0; k < 2; ++k) {
13319+
int grid_index = q2[2*ib+k] & 511;
13320+
const int8_t * grid = (const int8_t *)(iq2xs_grid + grid_index);
13321+
const uint8_t signs = ksigns_iq2xs[q2[2*ib+k] >> 9];
13322+
for (int j = 0; j < 8; ++j) {
13323+
float w = weight[8*k+j];
13324+
float q = 0.125f*l*grid[j]*(signs & kmask_iq2xs[j] ? -1.f : 1.f);
13325+
sumqx += w*q*xb[8*k+j];
13326+
sumq2 += w*q*q;
13327+
}
13328+
}
1326513329
}
1326613330
memcpy(y[ibl].qs, q2, QK_K/4);
13331+
if (sumq2 > 0) y[ibl].d = GGML_FP32_TO_FP16(1.05f*sumqx/sumq2);
1326713332

1326813333
}
1326913334
}
@@ -14103,12 +14168,6 @@ static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const
1410314168
return grid_index;
1410414169
}
1410514170

14106-
static int iq1_sort_helper(const void * left, const void * right) {
14107-
const float * l = left;
14108-
const float * r = right;
14109-
return *l < *r ? -1 : *l > *r ? 1 : 0;
14110-
}
14111-
1411214171
void iq1s_process_1block(int block_size, const float * xb, const float * weight, int8_t * L, float * the_scale, uint16_t * the_index, int * the_shift,
1411314172
float * pairs, float * sumx, float * sumw) {
1411414173
float max = fabsf(xb[0]);

0 commit comments

Comments
 (0)