Skip to content

Commit d80ab26

Browse files
authored
Fix export compressed model OOM (#1682)
Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
1 parent a42cc02 commit d80ab26

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

neural_compressor/adaptor/torch_utils/weight_only.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def quantize_4bit(tensor, quantile=1.0, data_type="nf4", return_int=False):
9393
q_tensor += torch.where((mid_data[i - 1] < tensor) & (tensor <= mid_data[i]), data, 0)
9494
tensor.copy_(q_tensor)
9595
if return_int:
96-
return tensor.type(torch.int8), scale.type(torch.float), None
96+
return tensor, scale, None
9797
return tensor.mul_(scale)
9898

9999

@@ -128,7 +128,7 @@ def qdq_weight_asym(weight, num_bits=4, quantile=1.0, return_int=False):
128128
weight.add_(zp)
129129
weight.clamp_(0, maxq)
130130
if return_int:
131-
return weight.type(torch.uint8), scale.type(torch.float), zp.type(torch.uint8)
131+
return weight, scale, zp
132132
weight.sub_(zp)
133133
return weight.mul_(scale)
134134

@@ -176,7 +176,7 @@ def qdq_weight_sym(weight, num_bits=4, quantile=1.0, return_int=False, full_rang
176176
weight.round_()
177177
weight.clamp_(minq, maxq)
178178
if return_int:
179-
return weight.type(torch.int8), scale.type(torch.float), None
179+
return weight, scale.type(torch.float), None
180180
return weight.mul_(scale)
181181

182182

0 commit comments

Comments
 (0)