Skip to content

Commit fcb66f0

Browse files
authored
fix strategy refinement (#1283)
1 parent e383f2e commit fcb66f0

File tree

5 files changed

+36
-42
lines changed

5 files changed

+36
-42
lines changed

neural_compressor/adaptor/pytorch.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -992,31 +992,43 @@ def _get_quantizable_ops(self, model):
992992
(self.query_handler.get_quantization_capability()['int8'], 'static'),
993993
(self.query_handler.get_quantization_capability()['dynamic'], 'dynamic')]
994994
fp32_config = {'activation': {'dtype': 'fp32'}, 'weight': {'dtype': 'fp32'}}
995+
# Ignore LayerNorm, InstanceNorm3d and Embedding quantizable ops,
996+
# due to huge accuracy regression in PyTorch.
997+
if isinstance(self, PyTorch_IPEXAdaptor):
998+
additional_skipped_module_classes = {}
999+
else:
1000+
additional_skipped_module_classes = {'LayerNorm', 'InstanceNorm3d', 'Dropout'}
1001+
no_fp32_ops = {'QuantStub'}
9951002
for pair in capability_pair:
9961003
capability, mode = pair
9971004
for q_op in quantizable_ops:
1005+
if q_op not in q_capability['opwise']:
1006+
q_capability['opwise'][q_op] = []
1007+
if q_op[1] not in q_capability['optypewise']:
1008+
q_capability['optypewise'][q_op[1]] = []
1009+
9981010
if mode == 'static' and self.approach != "quant_aware_training" and \
9991011
q_op[1] in ['LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell']:
10001012
continue
10011013
op_cfg = copy.deepcopy(capability[q_op[1]]) if q_op[1] in capability \
10021014
else copy.deepcopy(capability['default'])
1015+
10031016
op_cfg['activation']['quant_mode'] = mode if q_op[1] not in \
10041017
['LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell'] else 'dynamic'
10051018

1006-
if q_op not in q_capability['opwise']:
1007-
q_capability['opwise'][q_op] = [op_cfg]
1008-
elif op_cfg not in q_capability['opwise'][q_op]:
1009-
q_capability['opwise'][q_op].append(op_cfg)
1019+
# skip the op that only include fp32
1020+
if q_op[1] not in additional_skipped_module_classes:
1021+
if op_cfg not in q_capability['opwise'][q_op]:
1022+
q_capability['opwise'][q_op].append(op_cfg)
1023+
if op_cfg not in q_capability['optypewise'][q_op[1]]:
1024+
q_capability['optypewise'][q_op[1]].append(op_cfg)
1025+
1026+
if q_op[1] not in no_fp32_ops:
1027+
if fp32_config not in q_capability['opwise'][q_op]:
1028+
q_capability['opwise'][q_op].append(fp32_config)
1029+
if fp32_config not in q_capability['optypewise'][q_op[1]]:
1030+
q_capability['optypewise'][q_op[1]].append(fp32_config)
10101031

1011-
if q_op[1] not in q_capability['optypewise']:
1012-
q_capability['optypewise'][q_op[1]] = [op_cfg]
1013-
elif op_cfg not in q_capability['optypewise'][q_op[1]]:
1014-
q_capability['optypewise'][q_op[1]].append(op_cfg)
1015-
1016-
if fp32_config not in q_capability['opwise'][q_op]:
1017-
q_capability['opwise'][q_op].append(fp32_config)
1018-
if fp32_config not in q_capability['optypewise'][q_op[1]]:
1019-
q_capability['optypewise'][q_op[1]].append(fp32_config)
10201032

10211033
# get bf16 capability
10221034
if self.use_bf16 and (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1') and \

neural_compressor/strategy/basic.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,15 +98,10 @@ def next_tune_cfg(self):
9898
"""
9999
from copy import deepcopy
100100
tuning_space = self.tuning_space
101-
initial_op_tuning_cfg = {}
102-
for item in tuning_space.root_item.options:
103-
if item.item_type == 'op':
104-
op_name, op_type = item.name
105-
initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space)
106101
calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options
107102
for calib_sampling_size in calib_sampling_size_lst:
108103
# Initialize the tuning config for each op according to the quantization approach
109-
op_item_dtype_dict, quant_mode_wise_items = self.initial_tuning_cfg()
104+
op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg()
110105
# Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight)
111106
early_stop_tuning = False
112107
stage1_cnt = 0

neural_compressor/strategy/bayesian.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,8 @@ def next_tune_cfg(self):
117117
pbounds = {}
118118
from copy import deepcopy
119119
tuning_space = self.tuning_space
120-
initial_op_tuning_cfg = {}
121-
for item in tuning_space.root_item.options:
122-
if item.item_type == 'op':
123-
op_name, op_type = item.name
124-
initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space)
125120
calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options
126-
op_item_dtype_dict, quant_mode_wise_items = self.initial_tuning_cfg()
121+
op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg()
127122
op_wise_pool = OpWiseTuningSampler(tuning_space, [], [],
128123
op_item_dtype_dict, initial_op_tuning_cfg)
129124
self.op_configs = op_wise_pool.get_opwise_candidate()

neural_compressor/strategy/strategy.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,12 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict):
291291
op_item_dtype_dict = OrderedDict()
292292
for quant_mode, quant_mode_items in quant_mode_wise_items.items():
293293
initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict)
294-
return op_item_dtype_dict, quant_mode_wise_items
294+
295+
initial_op_tuning_cfg = {}
296+
for op_name_dtype, quant_mode in op_item_dtype_dict.items():
297+
initial_op_tuning_cfg[op_name_dtype] = OpTuningConfig(op_name_dtype[0], op_name_dtype[1],
298+
quant_mode, self.tuning_space)
299+
return op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg
295300

296301
def show_baseline_info(self):
297302
if self.baseline:

test/adaptor/pytorch_adaptor/test_adaptor_pytorch.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,7 @@
6767
6868
quantization:
6969
op_wise: {
70-
'quant': {
71-
'activation': {'dtype': ['fp32']},
72-
'weight': {'dtype': ['fp32']}
73-
},
70+
7471
'layer1.0.conv1': {
7572
'activation': {'dtype': ['fp32']},
7673
'weight': {'dtype': ['fp32']}
@@ -143,11 +140,8 @@
143140
framework: pytorch_fx
144141
145142
quantization:
143+
approach: post_training_auto_quant
146144
op_wise: {
147-
'quant': {
148-
'activation': {'dtype': ['fp32']},
149-
'weight': {'dtype': ['fp32']}
150-
},
151145
'layer1.0.conv1': {
152146
'activation': {'dtype': ['fp32']},
153147
'weight': {'dtype': ['fp32']}
@@ -213,10 +207,6 @@
213207
CrossEntropyLoss:
214208
reduction: mean
215209
op_wise: {
216-
'quant': {
217-
'activation': {'dtype': ['fp32']},
218-
'weight': {'dtype': ['fp32']}
219-
},
220210
'layer1.0.conv1': {
221211
'activation': {'dtype': ['fp32']},
222212
'weight': {'dtype': ['fp32']}
@@ -976,10 +966,7 @@ def q_func(model):
976966
quantizer.model = common.Model(model_origin)
977967
q_model = quantizer.fit()
978968
self.assertTrue('quantize' in str(type(q_model.model.encoder)))
979-
if fake_yaml == 'fx_ptq_yaml.yaml':
980-
self.assertTrue('quantize' not in str(type(q_model.model.rnn)))
981-
else:
982-
self.assertTrue('quantize' in str(type(q_model.model.rnn)))
969+
self.assertTrue('quantize' in str(type(q_model.model.rnn)))
983970

984971
def test_fx_sub_module_quant(self):
985972
for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml', 'fx_dynamic_yaml.yaml']:

0 commit comments

Comments
 (0)