Skip to content

Commit 7bf8107

Browse files
authored
Remove ONNX RT Quantizer dependency (#1087)
1 parent 98d829a commit 7bf8107

File tree

22 files changed

+135
-67
lines changed

22 files changed

+135
-67
lines changed

neural_compressor/adaptor/onnxrt.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def quantize(self, tune_cfg, model, data_loader, q_func=None):
100100
return model
101101
if model.model.opset_import[0].version < 11: # pragma: no cover
102102
logger.warning("Quantize input needs model opset 11 or newer.")
103-
from onnxruntime.quantization.quant_utils import QuantizationMode
103+
from neural_compressor.adaptor.ox_utils.util import QuantizationMode
104104
if self.backend in ["qlinearops", "qoperator"]:
105105
backend = QuantizationMode.QLinearOps
106106
if self.backend == "qlinearops":
@@ -218,7 +218,7 @@ def recover(self, model, q_config):
218218
if model.model.opset_import[0].version < 11: # pragma: no cover
219219
logger.warning("Quantize input needs model opset 11 or newer.")
220220

221-
from onnxruntime.quantization.quant_utils import QuantizationMode
221+
from neural_compressor.adaptor.ox_utils.util import QuantizationMode
222222
if self.backend in ["qlinearops", "qoperator"]:
223223
backend = QuantizationMode.QLinearOps
224224
elif self.backend == "qdq":
@@ -471,7 +471,7 @@ def _pre_optimize(self, model, level=1):
471471
self.pre_optimized_model = model
472472

473473
def _revert_fusedconv(self, model):
474-
from onnxruntime.quantization.quant_utils import attribute_to_kwarg
474+
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg
475475
from onnx import onnx_pb as onnx_proto
476476
new_nodes = []
477477
remove_nodes = []
@@ -813,7 +813,7 @@ def eval_func(dataloader):
813813

814814
def diagnosis_helper(self, fp32_model, int8_model, tune_cfg=None, save_path=None):
815815
from neural_compressor.utils.utility import dump_data_to_local
816-
from neural_compressor.model.onnx_model import find_by_name
816+
from neural_compressor.adaptor.ox_utils.util import find_by_name
817817
if self.backend in ["qlinearops", "qoperator"]:
818818
supported_optype = ['Conv', 'MatMul', 'Concat', 'Attention', 'FusedConv',
819819
'Add', 'Mul', 'LeakyRelu', 'Sigmoid', 'GlobalAveragePool', 'AveragePool']

neural_compressor/adaptor/ox_utils/operators/activation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import onnx
2020
from .base_operator import QuantOperatorBase
2121
from .qdq_base_operator import QDQOperatorBase
22-
from onnxruntime.quantization.quant_utils import QuantizedValueType, \
22+
from neural_compressor.adaptor.ox_utils.util import QuantizedValueType, \
2323
attribute_to_kwarg, ms_domain
2424
from onnx import onnx_pb as onnx_proto
2525
from neural_compressor.adaptor.ox_utils.util import QuantizedValue

neural_compressor/adaptor/ox_utils/operators/attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import onnx
2020
from .base_operator import QuantOperatorBase
2121
from .qdq_base_operator import QDQOperatorBase
22-
from onnxruntime.quantization.quant_utils import attribute_to_kwarg, ms_domain
22+
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain
2323
from onnx import onnx_pb as onnx_proto
2424
'''
2525
Quantize Attention

neural_compressor/adaptor/ox_utils/operators/binary_op.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import onnx
2020
from .base_operator import QuantOperatorBase
21-
from onnxruntime.quantization.quant_utils import attribute_to_kwarg, ms_domain, \
21+
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain, \
2222
QuantizedValueType
2323
from onnx import onnx_pb as onnx_proto
2424
from neural_compressor.adaptor.ox_utils.util import QuantizedValue

neural_compressor/adaptor/ox_utils/operators/concat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import onnx
2020
from .base_operator import QuantOperatorBase
21-
from onnxruntime.quantization.quant_utils import QuantizedValueType, \
21+
from neural_compressor.adaptor.ox_utils.util import QuantizedValueType, \
2222
attribute_to_kwarg, ms_domain
2323
from onnx import onnx_pb as onnx_proto
2424
from neural_compressor.adaptor.ox_utils.util import QuantizedValue

neural_compressor/adaptor/ox_utils/operators/conv.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import onnx
2020
from .base_operator import QuantOperatorBase
2121
from .qdq_base_operator import QDQOperatorBase
22-
from onnxruntime.quantization.quant_utils import find_by_name, get_mul_node, \
22+
from neural_compressor.adaptor.ox_utils.util import find_by_name, \
2323
QuantizedValueType, attribute_to_kwarg
2424
from onnx import onnx_pb as onnx_proto
2525
from neural_compressor.adaptor.ox_utils.util import QuantizedValue
@@ -88,17 +88,17 @@ def convert(self):
8888

8989
scales_mul_node = find_by_name(scales_mul_op, self.quantizer.new_nodes)
9090
if scales_mul_node is None:
91-
scales_mul_node = get_mul_node([scale_0, scale_1],
92-
scales_mul_op + ":0", scales_mul_op)
91+
scales_mul_node = onnx.helper.make_node("Mul", [scale_0, scale_1],
92+
[scales_mul_op + ":0"], scales_mul_op)
9393
self.quantizer.new_nodes.append(scales_mul_node)
9494

9595
scales_mul_op_output = scales_mul_node.output[0]
9696

9797
# Add mul operation to multiply mul_scales_op result with output of ConvInteger
9898
# and make the output of this node the same as output of original conv node.
9999
output_scale_mul_op = node.name + "_output_scale_mul"
100-
self.quantizer.new_nodes.append(get_mul_node([cast_op_output, scales_mul_op_output],
101-
node.output[0], output_scale_mul_op))
100+
self.quantizer.new_nodes.append(onnx.helper.make_node("Mul",
101+
[cast_op_output, scales_mul_op_output], [node.output[0]], output_scale_mul_op))
102102
self.quantizer.remove_nodes.extend(parents[1:])
103103
self.quantizer.remove_nodes.append(node)
104104

neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from .base_operator import QuantOperatorBase
2121
from .qdq_base_operator import QDQOperatorBase
2222
from onnx import onnx_pb as onnx_proto
23-
from onnxruntime.quantization.quant_utils import QuantizedValueType, \
23+
from neural_compressor.adaptor.ox_utils.util import QuantizedValueType, \
2424
attribute_to_kwarg, ms_domain
2525
'''
2626
Quantize EmbedLayerNormalization

neural_compressor/adaptor/ox_utils/operators/gather.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import onnx
2020
from .base_operator import QuantOperatorBase
21-
from onnxruntime.quantization.quant_utils import QuantizedValueType, attribute_to_kwarg
21+
from neural_compressor.adaptor.ox_utils.util import QuantizedValueType, attribute_to_kwarg
2222
from onnx import onnx_pb as onnx_proto
2323
from neural_compressor.adaptor.ox_utils.util import QuantizedValue
2424
'''

neural_compressor/adaptor/ox_utils/operators/gavgpool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import onnx
2020
from .base_operator import QuantOperatorBase
21-
from onnxruntime.quantization.quant_utils import attribute_to_kwarg, ms_domain, \
21+
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain, \
2222
QuantizedValueType
2323
from neural_compressor.adaptor.ox_utils.util import QuantizedValue
2424
class QGlobalAveragePool(QuantOperatorBase):

neural_compressor/adaptor/ox_utils/operators/lstm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import numpy
2121
from .base_operator import QuantOperatorBase
2222
from .qdq_base_operator import QDQOperatorBase
23-
from onnxruntime.quantization.quant_utils import attribute_to_kwarg, ms_domain, QuantType
23+
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain, QuantType
2424
from onnx import onnx_pb as onnx_proto
2525
'''
2626
Quantize LSTM

0 commit comments

Comments
 (0)