Skip to content

Commit 35def7b

Browse files
wenhuach21chensuyue
authored andcommitted
fix SQ auto bug (#1294)
Signed-off-by: wenhuach21 <wenhua.cheng@intel.com> (cherry picked from commit 1730de0)
1 parent e9c14a5 commit 35def7b

File tree

1 file changed

+24
-20
lines changed

1 file changed

+24
-20
lines changed

neural_compressor/adaptor/torch_utils/smooth_quant.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -794,20 +794,20 @@ def dict_to_list(dic):
794794
raise NotImplementedError
795795
return best_alpha
796796

797-
def _auto_tune_alpha_new(
797+
def _auto_tune_alpha(
798798
self, input_maxes, calib_sample_num=32, alpha_min=0.3, alpha_max=0.7, alpha_step=0.05, shared_criterion="min"
799799
):
800800
"""Perform alpha-tuning to obtain layer-wise optimal alpha values and adjust parameters accordingly.
801801
802802
This function takes quantization of the former layers into consideration when qdq one layer
803803
Also, it reduces the memory usage at the cost of increasingtuning time
804-
TODO may have compatibility issue when setting folding=True
805-
:param input_maxes:
806-
:param calib_sample_num:
807-
:param alpha_min:
808-
:param alpha_max:
809-
:param alpha_step:
810-
:param shared_criterion:
804+
TODO may have compatibility issue when setting folding=True, check whether having issues when bs!=1
805+
:param input_maxes: calibration data, input max
806+
:param calib_sample_num: sample count used to auto tuning alpha
807+
:param alpha_min: the min value of alpha
808+
:param alpha_max: the max value of alpha
809+
:param alpha_step: the alpha step in search space
810+
:param shared_criterion: the criterion to choose alpha when multiple layers must share one same alpha
811811
:return:
812812
"""
813813
logger.info("start sq auto tuning")
@@ -830,13 +830,16 @@ def _auto_tune_alpha_new(
830830
self.absorb_to_layer, input_maxes, default_alpha, tuning=True
831831
)
832832
self._update_scales_for_auto(absorb_input_scales, weight_scales)
833-
cnt = 0
833+
total_cnt = 0
834+
tmp_cnt = 0
834835
alpha_update_iter = 0
835836
# multiply_factor is used to combine samples to calib_sample_num // 4 before summarizing the best alpha
836-
multiply_factor = calib_sample_num // 4 if calib_sample_num >= 4 else calib_sample_num
837+
tune_cnt = 4
838+
multiply_factor = calib_sample_num // tune_cnt if calib_sample_num >= tune_cnt else calib_sample_num
837839

838840
best_alphas = default_alpha
839841
if not self.dataloader:
842+
logger.info(f"Auto-tuning failed due to no dataloader, using {best_alphas} instead.")
840843
self._qdq_model_unwrapper_for_auto()
841844
return best_alphas
842845
try:
@@ -857,18 +860,19 @@ def _auto_tune_alpha_new(
857860
cur_loss = loss_alphas[key]
858861
for alpha_key in cur_loss.keys():
859862
cur_loss[alpha_key] += loss_tmp[key][alpha_key]
860-
cnt += self.dataloader.batch_size
861-
if cnt // multiply_factor >= 1:
863+
total_cnt += self.dataloader.batch_size
864+
tmp_cnt += self.dataloader.batch_size
865+
if tmp_cnt // multiply_factor >= 1:
862866
alpha_update_iter += 1
863-
cnt = 0
867+
tmp_cnt = 0
864868
best_alphas = self._get_best_alpha(self.absorb_to_layer, loss_alphas, shared_criterion)
865869
for key in best_alphas.keys():
866870
logger.info(f"Auto alpha update iter: {alpha_update_iter}, {key}: {best_alphas[key]}")
867871
absorb_input_scales, weight_scales = self._cal_scales(
868872
self.absorb_to_layer, input_maxes, best_alphas, tuning=True
869873
)
870874
self._update_scales_for_auto(absorb_input_scales, weight_scales)
871-
if cnt >= calib_sample_num:
875+
if total_cnt >= calib_sample_num:
872876
break
873877
except:
874878
for input in self.dataloader:
@@ -888,10 +892,11 @@ def _auto_tune_alpha_new(
888892
cur_loss = loss_alphas[key]
889893
for alpha_key in cur_loss.keys():
890894
cur_loss[alpha_key] += loss_tmp[key][alpha_key]
891-
cnt += self.dataloader.batch_size
892-
if cnt // multiply_factor >= 1:
895+
total_cnt += self.dataloader.batch_size
896+
tmp_cnt += self.dataloader.batch_size
897+
if tmp_cnt // multiply_factor >= 1:
893898
alpha_update_iter += 1
894-
cnt = 0
899+
tmp_cnt = 0
895900

896901
best_alphas = self._get_best_alpha(self.absorb_to_layer, loss_alphas, shared_criterion)
897902
for key in best_alphas.keys():
@@ -900,7 +905,7 @@ def _auto_tune_alpha_new(
900905
self.absorb_to_layer, input_maxes, best_alphas, tuning=True
901906
)
902907
self._update_scales_for_auto(absorb_input_scales, weight_scales)
903-
if cnt >= calib_sample_num:
908+
if total_cnt >= calib_sample_num:
904909
break
905910

906911
best_alphas = self._get_best_alpha(self.absorb_to_layer, loss_alphas, shared_criterion)
@@ -934,7 +939,6 @@ def transform(
934939
logger.warning("smooth quant is ignored since the model is not a torch module")
935940
return self.model
936941

937-
logger.info("call new sq") ##TODO need to remove later
938942
if folding:
939943
self.insert_mul, self.allow_absorb = False, True
940944
else:
@@ -994,7 +998,7 @@ def transform(
994998
del self.absorb_to_layer[d]
995999

9961000
if alpha == "auto":
997-
self.alpha_per_layer = self._auto_tune_alpha_new(
1001+
self.alpha_per_layer = self._auto_tune_alpha(
9981002
input_maxes_abs, calib_sample_num=32, **auto_alpha_args
9991003
) ##save the alpha
10001004

0 commit comments

Comments
 (0)