update quantizer and model relationship

xin3he · xin3he · commit fd2adde314f9 · 2024-05-06T16:38:18.000+08:00
Signed-off-by: xin3he &lt;xin3.he@intel.com&gt;
diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -183,7 +183,7 @@ def quantize(x, scale, zero, maxq):
     return scale * (q - zero)
 
 
-class GPTQuantizer(object):
+class RAWGPTQuantizer(object):
     """Main API for GPTQ algorithm.
 
     Please refer to:
@@ -1121,7 +1121,7 @@ def ready(self):
 from neural_compressor.torch.algorithms import Quantizer as INCQuantizer
 
 
-class INCGPTQQuantizer(INCQuantizer):
+class GPTQuantizer(INCQuantizer):
     def __init__(self, quant_config={}):
         """Init a RTNQuantizer object.
 
@@ -1149,9 +1149,8 @@ def prepare(
         assert isinstance(model, torch.nn.Module), "only support torch module"
         if use_layer_wise:
             assert model_path is not None, "model_path should not be None when use layer wise mode"
-        from .gptq import GPTQuantizer
 
-        self.gptq_quantizer = GPTQuantizer(
+        self.gptq_quantizer = RAWGPTQuantizer(
             model,
             weight_config=self.quant_config,
             nsamples=nsamples,
diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py
@@ -87,7 +87,7 @@ def gptq_entry(
     **kwargs,
 ) -> torch.nn.Module:
     logger.info("Quantize model with the GPTQ algorithm.")
-    from neural_compressor.torch.algorithms.weight_only.gptq import INCGPTQQuantizer
+    from neural_compressor.torch.algorithms.weight_only.gptq import GPTQuantizer
 
     # rebuild weight_config for gptq_quantize function
     weight_config = {}
@@ -119,10 +119,15 @@ def gptq_entry(
     )
     kwargs.pop("example_inputs")
     logger.warning("lm_head in transformer model is skipped by GPTQ")
-
-    if CurrentQuantizer.quantizer is None or mode in [Mode.PREPARE, Mode.QUANTIZE]:
-        CurrentQuantizer.quantizer = INCGPTQQuantizer(quant_config=weight_config)
-    model = CurrentQuantizer.quantizer.execute(model, mode=mode, *args, **kwargs)
+    if getattr(model, "quantizer", False):
+        quantizer = model.quantizer
+    else:
+        quantizer = GPTQuantizer(quant_config=weight_config)
+    model = quantizer.execute(model, mode=mode, *args, **kwargs)
+    if getattr(model, "quantizer", False):
+        del model.quantizer
+    else:
+        model.quantizer = quantizer
     return model