Skip to content

Commit 51b6f81

Browse files
committed
Direct conversion from fp16 to Q6_0
1 parent 75bb7f4 commit 51b6f81

File tree

4 files changed

+56
-11
lines changed

4 files changed

+56
-11
lines changed

convert_hf_to_gguf.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ def prepare_tensors(self):
323323
gguf.MODEL_TENSOR.OUTPUT,
324324
gguf.MODEL_TENSOR.ATTN_V,
325325
gguf.MODEL_TENSOR.ATTN_K,
326+
gguf.MODEL_TENSOR.ATTN_QKV,
326327
)
327328
):
328329
if self.ftype in (
@@ -333,15 +334,14 @@ def prepare_tensors(self):
333334
elif self.ftype in (
334335
gguf.LlamaFileType.MOSTLY_Q5_0,
335336
gguf.LlamaFileType.MOSTLY_Q5_1,
336-
# gguf.LlamaFileType.MOSTLY_Q6_0,
337337
):
338-
data_qtype = gguf.GGMLQuantizationType.Q8_0
338+
data_qtype = gguf.GGMLQuantizationType.Q6_0
339339
elif self.ftype in (
340340
gguf.LlamaFileType.MOSTLY_TQ1_0,
341341
gguf.LlamaFileType.MOSTLY_TQ2_0,
342342
):
343343
# TODO: use Q4_K and Q6_K
344-
data_qtype = gguf.GGMLQuantizationType.F16
344+
data_qtype = gguf.GGMLQuantizationType.Q6_0
345345

346346
# No override (data_qtype is False), or wants to be quantized (data_qtype is True)
347347
if isinstance(data_qtype, bool):
@@ -359,8 +359,8 @@ def prepare_tensors(self):
359359
data_qtype = gguf.GGMLQuantizationType.Q5_0
360360
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q5_1:
361361
data_qtype = gguf.GGMLQuantizationType.Q5_1
362-
# elif self.ftype == gguf.LlamaFileType.MOSTLY_Q6_0: // To be implemented?
363-
# data_qtype = gguf.GGMLQuantizationType.Q6_0
362+
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q6_0:
363+
data_qtype = gguf.GGMLQuantizationType.Q6_0
364364
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0:
365365
data_qtype = gguf.GGMLQuantizationType.Q8_0
366366
elif self.ftype == gguf.LlamaFileType.MOSTLY_TQ1_0:
@@ -545,7 +545,7 @@ def set_gguf_parameters(self):
545545
logger.info("****************************************************************************************")
546546
logger.info("** quantizing to `Q4_0`,`Q4_1`,`Q5_0`, or `Q5_1`is not equiv to using `llama-quantize`")
547547
logger.info("** `Q4_0`,`Q4_1` are here using embeddings, output, attn_k and attn_v in q5_0")
548-
logger.info("** `Q5_0`,`Q5_1` are here using embeddings, output, attn_k and attn_v in q8_0")
548+
logger.info("** `Q5_0`,`Q5_1` are here using embeddings, output, attn_k and attn_v in q6_0")
549549
logger.info("** This, in order to generate a small but reliable conversion to create an iMatrix file.")
550550
logger.info("****************************************************************************************")
551551

@@ -6373,8 +6373,8 @@ def parse_args() -> argparse.Namespace:
63736373
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
63746374
)
63756375
parser.add_argument(
6376-
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "q4_0", "q4_1", "q5_0", "q5_1", "tq1_0", "tq2_0", "auto"], default="f16",
6377-
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, q4_0, q4_1, q5_0, q5_1 for a smaller conversion to then create an iMatrix file for example, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
6376+
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "q4_0", "q4_1", "q5_0", "q5_1", "q6_0", "tq1_0", "tq2_0", "auto"], default="f16",
6377+
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, q4_0, q4_1, q5_0, q5_1, q6_0 for a smaller conversion to then create an iMatrix file for example, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
63786378
)
63796379
parser.add_argument(
63806380
"--bigendian", action="store_true",
@@ -6507,7 +6507,7 @@ def main() -> None:
65076507
"q4_1": gguf.LlamaFileType.MOSTLY_Q4_1,
65086508
"q5_0": gguf.LlamaFileType.MOSTLY_Q5_0,
65096509
"q5_1": gguf.LlamaFileType.MOSTLY_Q5_1,
6510-
# "q6_0": gguf.LlamaFileType.MOSTLY_Q6_0,
6510+
"q6_0": gguf.LlamaFileType.MOSTLY_Q6_0,
65116511
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
65126512
"tq1_0": gguf.LlamaFileType.MOSTLY_TQ1_0,
65136513
"tq2_0": gguf.LlamaFileType.MOSTLY_TQ2_0,

gguf-py/gguf/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2401,6 +2401,7 @@ class VisionProjectorType:
24012401
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
24022402
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
24032403
GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
2404+
GGMLQuantizationType.Q6_0: (32, 2 + 8 + 16),
24042405
GGMLQuantizationType.Q8_0: (32, 2 + 32),
24052406
GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
24062407
GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
@@ -2433,7 +2434,7 @@ class VisionProjectorType:
24332434
GGMLQuantizationType.Q8_0_X4 : ( 32, 34),
24342435
GGMLQuantizationType.Q8_1_X4 : ( 32, 36),
24352436
GGMLQuantizationType.Q8_2_X4 : ( 32, 36),
2436-
GGMLQuantizationType.Q6_0 : ( 32, 26),
2437+
# GGMLQuantizationType.Q6_0 : ( 32, 26),
24372438
GGMLQuantizationType.IQ1_BN : ( 64, 13),
24382439
GGMLQuantizationType.IQ2_BN : ( 64, 16),
24392440
GGMLQuantizationType.Q8_K64 : ( 64, 68),

gguf-py/gguf/quants.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,50 @@ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
377377
return (d * qs) + m
378378

379379

380+
class Q6_0(__Quant, qtype=GGMLQuantizationType.Q6_0):
381+
@classmethod
382+
def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
383+
n_blocks = blocks.shape[0]
384+
385+
imax = abs(blocks).argmax(axis=-1, keepdims=True)
386+
max = np.take_along_axis(blocks, imax, axis=-1)
387+
388+
d = max / -32
389+
with np.errstate(divide="ignore"):
390+
id = np.where(d == 0, 0, 1 / d)
391+
# Adapted from Q5_0
392+
q = np.trunc((np.float64(blocks) * np.float64(id)) + np.float64(32.5), dtype=np.float32).astype(np.uint8).clip(0, 63)
393+
394+
qs = q.reshape((n_blocks, 2, cls.block_size // 2))
395+
qs = (qs[..., 0, :] & np.uint8(0x0F)) | (qs[..., 1, :] << np.uint8(4))
396+
397+
qh = np.zeros((n_blocks, cls.block_size // 4), dtype=np.uint8)
398+
for j in range(cls.block_size // 2):
399+
h = ((q[:, j] >> 4) | ((q[:, j + cls.block_size // 2] >> 4) << 2)).astype(np.uint8)
400+
qh[:, j % (cls.block_size // 4)] |= (h << 4 * (j // (cls.block_size // 4)))
401+
402+
d = d.astype(np.float16).view(np.uint8)
403+
404+
return np.concatenate([d, qh, qs], axis=-1)
405+
406+
# @classmethod
407+
# def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
408+
# n_blocks = blocks.shape[0]
409+
410+
# d, rest = np.hsplit(blocks, [2]) # (n_blocks, 2)
411+
# qh, qs = np.hsplit(rest, [cls.block_size // 4]) # (n_blocks, block_size//4), (n_blocks, block_size//2*2)
412+
413+
# d = d.view(np.float16).astype(np.float32)
414+
415+
# qh = qh.reshape((n_blocks, 1)) >> np.array([i for i in range(32)], dtype=np.uint32).reshape((1, 32))
416+
# ql = qs.reshape((n_blocks, -1, 1, cls.block_size // 2)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
417+
# qh = (qh & np.uint32(0x03)).astype(np.uint8)
418+
# ql = (ql & np.uint8(0x0F)).reshape((n_blocks, -1))
419+
420+
# qs = (ql | (qh << np.uint8(4))).astype(np.int8) - np.int8(32)
421+
422+
# return (d * qs.astype(np.float32))
423+
380424
class Q8_0(__Quant, qtype=GGMLQuantizationType.Q8_0):
381425
@classmethod
382426
# Implementation of Q8_0 with bit-exact same results as reference implementation in ggml-quants.c

gguf-py/tests/test_quants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def __init__(self, libggml: Path):
6464
self.libggml.ggml_quantize_requires_imatrix.argtypes = (ctypes.c_int,)
6565

6666
for t in (
67-
"q4_0", "q4_1", "q5_0", "q5_1", "q8_0",
67+
"q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "q6_0"
6868
"q2_K", "q3_K", "q4_K", "q5_K", "q6_K",
6969
"tq1_0", "tq2_0",
7070
"iq2_xxs", "iq2_xs", "iq2_s", "iq3_xxs", "iq3_s", "iq1_s", "iq1_m",

0 commit comments

Comments
 (0)