Skip to content

Commit 1f236d5

Browse files
authored
Enhance ONNXRT log info (#1454)
Signed-off-by: yuwenzho <yuwen.zhou@intel.com>
1 parent 778d1f1 commit 1f236d5

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

neural_compressor/adaptor/onnxrt.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,10 @@ def smooth_quant(
219219
self.sq = ORTSmoothQuant(self.pre_optimized_model, dataloader, self.reduce_range, self.backend)
220220
self.sq.record_max_info = record_max_info
221221
self.smooth_quant_model = self.sq.transform(**self.cur_sq_args)
222-
logger.info("Updated the pre-optimized model with smooth quant model.")
222+
if not record_max_info: # pragma: no cover
223+
logger.info("Updated the pre-optimized model with smooth quant model.")
224+
else:
225+
logger.info("Collected scale information for smooth quant.")
223226
# TODO double-check the smooth_quant_model and pre_optimized_model to make sure there no two fp32 model replicas
224227
self.pre_optimized_model = self.smooth_quant_model
225228
return self.smooth_quant_model
@@ -305,6 +308,7 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
305308
self.sq.model = tmp_model
306309
self.sq.record_max_info = False
307310
tmp_model = self.sq.transform(**self.cur_sq_args)
311+
logger.info("Model is smooth quantized.")
308312

309313
iterations = tune_cfg.get("calib_iteration", 1)
310314
calib_sampling_size = tune_cfg.get("calib_sampling_size", 1)
@@ -1129,7 +1133,7 @@ def _replace_gemm_with_matmul(model):
11291133
from onnx import numpy_helper
11301134

11311135
if not isinstance(model, ONNXModel):
1132-
model = ONNXModel(model)
1136+
model = ONNXModel(model, ignore_warning=True)
11331137

11341138
for node in model.nodes():
11351139
if node.op_type == "Gemm":

0 commit comments

Comments
 (0)