fix strategy refinement (#1283)

yiliu30 · web-flow · commit fcb66f0ecf4a · 2022-09-27T09:21:59.000+08:00
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
@@ -992,31 +992,43 @@ def _get_quantizable_ops(self, model):
                 (self.query_handler.get_quantization_capability()['int8'], 'static'),
                 (self.query_handler.get_quantization_capability()['dynamic'], 'dynamic')]
         fp32_config = {'activation': {'dtype': 'fp32'}, 'weight': {'dtype': 'fp32'}}
+        # Ignore LayerNorm, InstanceNorm3d and Embedding quantizable ops,
+        # due to huge accuracy regression in PyTorch.
+        if isinstance(self, PyTorch_IPEXAdaptor):
+            additional_skipped_module_classes = {}
+        else:
+            additional_skipped_module_classes = {'LayerNorm', 'InstanceNorm3d', 'Dropout'}
+        no_fp32_ops = {'QuantStub'}
         for pair in capability_pair:
             capability, mode = pair
             for q_op in quantizable_ops:
+                if q_op not in q_capability['opwise']:
+                    q_capability['opwise'][q_op] = []
+                if q_op[1] not in q_capability['optypewise']:
+                    q_capability['optypewise'][q_op[1]] = []
+                    
                 if mode == 'static' and self.approach != "quant_aware_training" and \
                     q_op[1] in ['LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell']:
                     continue
                 op_cfg = copy.deepcopy(capability[q_op[1]]) if q_op[1] in capability \
                     else copy.deepcopy(capability['default'])
+
                 op_cfg['activation']['quant_mode'] = mode if q_op[1] not in \
                     ['LSTM', 'GRU', 'LSTMCell', 'GRUCell', 'RNNCell'] else 'dynamic'
 
-                if q_op not in q_capability['opwise']:
-                    q_capability['opwise'][q_op] = [op_cfg]
-                elif op_cfg not in q_capability['opwise'][q_op]:
-                    q_capability['opwise'][q_op].append(op_cfg)
+                # skip the op that only include fp32
+                if q_op[1] not in additional_skipped_module_classes:
+                    if op_cfg not in q_capability['opwise'][q_op]:
+                        q_capability['opwise'][q_op].append(op_cfg)
+                    if op_cfg not in q_capability['optypewise'][q_op[1]]:
+                        q_capability['optypewise'][q_op[1]].append(op_cfg)
+
+                if q_op[1] not in no_fp32_ops:
+                    if fp32_config not in q_capability['opwise'][q_op]:
+                        q_capability['opwise'][q_op].append(fp32_config)
+                    if fp32_config not in q_capability['optypewise'][q_op[1]]:
+                        q_capability['optypewise'][q_op[1]].append(fp32_config)
 
-                if q_op[1] not in q_capability['optypewise']:
-                    q_capability['optypewise'][q_op[1]] = [op_cfg]
-                elif op_cfg not in q_capability['optypewise'][q_op[1]]:
-                    q_capability['optypewise'][q_op[1]].append(op_cfg)
-
-                if fp32_config not in q_capability['opwise'][q_op]:
-                    q_capability['opwise'][q_op].append(fp32_config)
-                if fp32_config not in q_capability['optypewise'][q_op[1]]:
-                    q_capability['optypewise'][q_op[1]].append(fp32_config)
 
         # get bf16 capability
         if self.use_bf16 and (CpuInfo().bf16 or os.getenv('FORCE_BF16') == '1') and \
diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py
@@ -98,15 +98,10 @@ def next_tune_cfg(self):
         """
         from copy import deepcopy
         tuning_space = self.tuning_space
-        initial_op_tuning_cfg = {}
-        for item in tuning_space.root_item.options:
-            if item.item_type == 'op':
-                op_name, op_type = item.name
-                initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space)
         calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options
         for calib_sampling_size in calib_sampling_size_lst:
             # Initialize the tuning config for each op according to the quantization approach 
-            op_item_dtype_dict, quant_mode_wise_items = self.initial_tuning_cfg()
+            op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg()
             # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight)
             early_stop_tuning = False
             stage1_cnt = 0
diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py
@@ -117,13 +117,8 @@ def next_tune_cfg(self):
         pbounds = {} 
         from copy import deepcopy
         tuning_space = self.tuning_space
-        initial_op_tuning_cfg = {}
-        for item in tuning_space.root_item.options:
-            if item.item_type == 'op':
-                op_name, op_type = item.name
-                initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space)
         calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options
-        op_item_dtype_dict, quant_mode_wise_items = self.initial_tuning_cfg()
+        op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg()
         op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], 
                                            op_item_dtype_dict, initial_op_tuning_cfg)
         self.op_configs = op_wise_pool.get_opwise_candidate()
diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py
@@ -291,7 +291,12 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict):
         op_item_dtype_dict = OrderedDict()
         for quant_mode, quant_mode_items in quant_mode_wise_items.items():
             initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict)
-        return op_item_dtype_dict, quant_mode_wise_items
+
+        initial_op_tuning_cfg = {}
+        for op_name_dtype, quant_mode in op_item_dtype_dict.items():
+            initial_op_tuning_cfg[op_name_dtype] = OpTuningConfig(op_name_dtype[0], op_name_dtype[1], 
+                                                                  quant_mode, self.tuning_space)
+        return op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg
 
     def show_baseline_info(self):
         if self.baseline:
diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch.py
@@ -67,10 +67,7 @@
 
     quantization:
       op_wise: {
-              'quant': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
+
               'layer1.0.conv1': {
                 'activation': {'dtype': ['fp32']},
                 'weight': {'dtype': ['fp32']}
@@ -143,11 +140,8 @@
       framework: pytorch_fx
 
     quantization:
+      approach: post_training_auto_quant
       op_wise: {
-              'quant': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
               'layer1.0.conv1': {
                 'activation': {'dtype': ['fp32']},
                 'weight': {'dtype': ['fp32']}
@@ -213,10 +207,6 @@
           CrossEntropyLoss:
             reduction: mean
       op_wise: {
-              'quant': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
               'layer1.0.conv1': {
                 'activation': {'dtype': ['fp32']},
                 'weight': {'dtype': ['fp32']}
@@ -976,10 +966,7 @@ def q_func(model):
             quantizer.model = common.Model(model_origin)
             q_model = quantizer.fit()
             self.assertTrue('quantize' in str(type(q_model.model.encoder)))
-            if fake_yaml == 'fx_ptq_yaml.yaml':
-                self.assertTrue('quantize' not in str(type(q_model.model.rnn)))
-            else:
-                self.assertTrue('quantize' in str(type(q_model.model.rnn)))
+            self.assertTrue('quantize' in str(type(q_model.model.rnn)))
 
     def test_fx_sub_module_quant(self):
         for fake_yaml in ['fx_qat_yaml.yaml', 'fx_ptq_yaml.yaml', 'fx_dynamic_yaml.yaml']: