Skip to content

Commit 31afbae

Browse files
authored
Merge pull request #4 from viraatdas/master
bitnetmodel: directly use scale instead of inverting
2 parents ed8ec73 + 2c712e0 commit 31afbae

File tree

1 file changed

+3
-5
lines changed

1 file changed

+3
-5
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2391,11 +2391,9 @@ def weight_quant(self, weight: Tensor) -> Tensor:
23912391
dtype = weight.dtype
23922392
weight = weight.float()
23932393
scale = weight.abs().mean().clamp(min=1e-5)
2394-
iscale = 1 / scale
2395-
# TODO: multiply by the scale directly instead of inverting it twice
2396-
# (this is also unnecessarily doubly inverted upstream)
2397-
# ref: https://huggingface.co/1bitLLM/bitnet_b1_58-3B/blob/af89e318d78a70802061246bf037199d2fb97020/utils_quant.py#L10
2398-
result = (weight * iscale).round().clamp(-1, 1) / iscale
2394+
# Directly use scale instead of inverting it twice
2395+
# First round and clamp to -1, 1, then multiply by scale to get back to original range
2396+
result = weight.div(scale).round().clamp(-1, 1).mul(scale)
23992397
return result.type(dtype)
24002398

24012399
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:

0 commit comments

Comments
 (0)