Fix import versions for GPTQ (#105)

jerryzh168 · web-flow · commit 7b5a097f2d48 · 2024-04-01T13:10:44.000-07:00
diff --git a/test/quantization/model.py b/test/quantization/model.py
@@ -12,6 +12,7 @@
 from torch.nn import functional as F
 
 def prepare_inputs_for_model(inps):
+    inps = inps.squeeze(0)
     # setup inputs in correct format
     max_new_tokens = 1
     T = inps.size(0)
diff --git a/torchao/quantization/GPTQ.py b/torchao/quantization/GPTQ.py
@@ -20,7 +20,7 @@
 # from model import Transformer  # pyre-ignore[21]
 from torch.utils._pytree import tree_flatten, tree_unflatten
 
-from .utils import TORCH_VERSION_AFTER_2_4
+from .utils import TORCH_VERSION_AFTER_2_3
 from typing import Any, Dict, Tuple, Optional
 from .unified import Quantizer
 from functools import reduce
@@ -89,7 +89,7 @@ def __init__(
             # for model
             self.input_prep_func = (
                 input_prep_func if input_prep_func is not None
-                else lambda x: x
+                else lambda x: (x,)
             )
 
             self.pad_calibration_inputs = pad_calibration_inputs
@@ -180,6 +180,7 @@ def _model_call(self, inps):
             else:
                 inps = F.pad(inps, (self.pad_token, self.calibration_seq_length - T))
 
+            inps = inps.unsqueeze(0)
             model_in = self.input_prep_func(inps)
 
             self.add_input(model_in)
@@ -546,7 +547,7 @@ def faster_quant(self, H, W):
         return Q, DQ.to(orig_dtype), all_qparams
 
 
-if TORCH_VERSION_AFTER_2_4:
+if TORCH_VERSION_AFTER_2_3:
     from .quant_primitives import (
         get_group_qparams_symmetric,
         group_quantize_tensor_symmetric,
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -46,10 +46,14 @@
     from .GPTQ import (
         Int8DynActInt4WeightQuantizer,
         Int8DynActInt4WeightGPTQQuantizer,
+        Int4WeightQuantizer,
+        Int4WeightGPTQQuantizer,
     )
     __all__ += [
         "Int8DynActInt4WeightQuantizer",
         "Int8DynActInt4WeightGPTQQuantizer",
+        "Int4WeightQuantizer",
+        "Int4WeightGPTQQuantizer",
     ]
 
 
@@ -196,6 +200,3 @@ def replace_conv2d_1x1(conv):
     _replace_with_custom_fn_if_matches_filter(
         model, replace_conv2d_1x1, filter_fn=filter_fn
     )
-
-if TORCH_VERSION_AFTER_2_3:
-    from .GPTQ import Int8DynActInt4WeightQuantizer, Int8DynActInt4WeightGPTQQuantizer

Original file line number	Diff line number	Diff line change
`@@ -46,10 +46,14 @@`
`46`	`46`	`from .GPTQ import (`
`47`	`47`	`Int8DynActInt4WeightQuantizer,`
`48`	`48`	`Int8DynActInt4WeightGPTQQuantizer,`
	`49`	`+ Int4WeightQuantizer,`
	`50`	`+ Int4WeightGPTQQuantizer,`
`49`	`51`	`)`
`50`	`52`	`__all__ += [`
`51`	`53`	`"Int8DynActInt4WeightQuantizer",`
`52`	`54`	`"Int8DynActInt4WeightGPTQQuantizer",`
	`55`	`+ "Int4WeightQuantizer",`
	`56`	`+ "Int4WeightGPTQQuantizer",`
`53`	`57`	`]`
`54`	`58`
`55`	`59`
`@@ -196,6 +200,3 @@ def replace_conv2d_1x1(conv):`
`196`	`200`	`_replace_with_custom_fn_if_matches_filter(`
`197`	`201`	`model, replace_conv2d_1x1, filter_fn=filter_fn`
`198`	`202`	`)`
`199`		`-`
`200`		`-if TORCH_VERSION_AFTER_2_3:`
`201`		`- from .GPTQ import Int8DynActInt4WeightQuantizer, Int8DynActInt4WeightGPTQQuantizer`