@@ -219,7 +219,10 @@ def smooth_quant(
219
219
self .sq = ORTSmoothQuant (self .pre_optimized_model , dataloader , self .reduce_range , self .backend )
220
220
self .sq .record_max_info = record_max_info
221
221
self .smooth_quant_model = self .sq .transform (** self .cur_sq_args )
222
- logger .info ("Updated the pre-optimized model with smooth quant model." )
222
+ if not record_max_info : # pragma: no cover
223
+ logger .info ("Updated the pre-optimized model with smooth quant model." )
224
+ else :
225
+ logger .info ("Collected scale information for smooth quant." )
223
226
# TODO double-check the smooth_quant_model and pre_optimized_model to make sure there no two fp32 model replicas
224
227
self .pre_optimized_model = self .smooth_quant_model
225
228
return self .smooth_quant_model
@@ -305,6 +308,7 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
305
308
self .sq .model = tmp_model
306
309
self .sq .record_max_info = False
307
310
tmp_model = self .sq .transform (** self .cur_sq_args )
311
+ logger .info ("Model is smooth quantized." )
308
312
309
313
iterations = tune_cfg .get ("calib_iteration" , 1 )
310
314
calib_sampling_size = tune_cfg .get ("calib_sampling_size" , 1 )
@@ -1129,7 +1133,7 @@ def _replace_gemm_with_matmul(model):
1129
1133
from onnx import numpy_helper
1130
1134
1131
1135
if not isinstance (model , ONNXModel ):
1132
- model = ONNXModel (model )
1136
+ model = ONNXModel (model , ignore_warning = True )
1133
1137
1134
1138
for node in model .nodes ():
1135
1139
if node .op_type == "Gemm" :
0 commit comments