fixes for sdxl

HDCharles · HDCharles · commit 9aaf3ec704d6 · 2023-12-14T11:28:45.000-08:00
Summary: added filtering to the api, added an api to convert conv1x1 to linear so they can be quantized. Added a fix to subclass to avoid situations where weight-only quant weight isn't contiguous Test Plan: python test/test.py Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: e22d744 Pull Request resolved: #26
diff --git a/torchao/quantization/__init__.py b/torchao/quantization/__init__.py
@@ -18,7 +18,7 @@
     "change_linear_weights_to_int8_dqtensors",
     "change_linear_weights_to_int8_woqtensors",
     "change_linear_weights_to_int4_woqtensors",
-    "insert_subclass",
+    "swap_conv2d_1x1_to_linear"
     "safe_int_mm",
     "dynamically_quantize_per_tensor",
     "quantize_activation_per_token_absmax",
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -35,6 +35,7 @@
     "change_linear_weights_to_int8_dqtensors",
     "change_linear_weights_to_int8_woqtensors",
     "change_linear_weights_to_int4_woqtensors",
+    "swap_conv2d_1x1_to_linear"
 ]
 
 
@@ -45,19 +46,17 @@ def _replace_with_custom_fn_if_matches_filter(
     For each `child` in `model`, replaces it with `replacement_fn(child)`
     if `filter_fn(child)` is `True`
     """
-    name_to_child = dict(model.named_children())
-    for name, child in name_to_child.items():
-        if cur_fqn == "":
-            new_fqn = name
-        else:
-            new_fqn = f"{cur_fqn}.{name}"
-        if filter_fn(child, new_fqn):
-            new_child = replacement_fn(child)
-            setattr(model, name, new_child)
-        else:
-            _replace_with_custom_fn_if_matches_filter(
-                child, replacement_fn, filter_fn, new_fqn
+    if filter_fn(model, cur_fqn[:-1]):
+        model = replacement_fn(model)
+        return model
+    else:
+        for name, child in model.named_children():
+            new_child = _replace_with_custom_fn_if_matches_filter(
+                child, replacement_fn, filter_fn, f"{cur_fqn}{name}."
             )
+            setattr(model, name, new_child)
+        return model
+
 
 def _is_linear(mod, *args):
     return (
@@ -81,7 +80,7 @@ def apply_weight_only_int8_quant(model):
     )
 
 
-def apply_dynamic_quant(model):
+def apply_dynamic_quant(model, filter_fn=None):
     """
     Applies dynamic symmetric per-token activation and per-channel weight
     quantization to all linear layers in the given model using
@@ -90,7 +89,7 @@ def apply_dynamic_quant(model):
     _replace_with_custom_fn_if_matches_filter(
         model,
         lambda mod: DynamicallyPerAxisQuantizedLinear.from_float(mod),
-        _is_linear,
+        _is_linear if filter_fn is None else filter_fn,
     )
 
 
@@ -104,18 +103,23 @@ def insert_subclass(lin):
     return insert_subclass
 
 
-def change_linear_weights_to_int8_dqtensors(model):
+def change_linear_weights_to_int8_dqtensors(model, filter_fn=None):
     """
     Converts all linear weight tensors to the `Int8DynamicallyQuantizedLinearWeight`
     Tensor subclass, effectively applying the same form of quantization
     as apply_dynamic_quant while not modifying the linear modules.
     """
+    if filter_fn is None:
+        filter_fn = (
+            lambda *args:
+            _is_linear(*args) and
+            _in_features_greater_than_16(*args)
+        )
+
     _replace_with_custom_fn_if_matches_filter(
         model,
         _get_subclass_inserter(Int8DynamicallyQuantizedLinearWeight),
-        lambda *args:
-            _is_linear(*args) and
-            _in_features_greater_than_16(*args)
+        filter_fn
     )
 
 
@@ -140,8 +144,36 @@ def change_linear_weights_to_int4_woqtensors(model, **kwargs):
     effectively applying the same form of quantization
     as apply_dynamic_quant while not modifying the linear modules.
     """
+    filter_fn = kwargs.pop("filter_fn", _is_linear)
+
     _replace_with_custom_fn_if_matches_filter(
         model,
         _get_subclass_inserter(Int4WeightOnlyQuantizedLinearWeight, **kwargs),
-        _is_linear,
+        filter_fn,
+    )
+
+def swap_conv2d_1x1_to_linear(model):
+    """
+    Changes all conv2d 1x1 modules to equivalent linear modules so that they can then be quantized.
+    """
+    class PermuteSandwich(torch.nn.Module):
+        def __init__(self, mod):
+            super().__init__()
+            self.mod = mod
+
+        def forward(self, *args):
+            return self.mod(args[0].permute(0, 2, 3, 1)).permute(-0,3,1,2)
+
+
+    def replace_conv2d_1x1(conv):
+        assert conv.kernel_size == (1, 1)
+        lin = torch.nn.Linear(conv.in_channels, conv.out_channels, bias=(conv.bias is None))
+        lin.weight=torch.nn.Parameter(conv.weight.squeeze(-1,-2))
+        lin.bias = conv.bias
+        return PermuteSandwich(lin)
+
+    _replace_with_custom_fn_if_matches_filter(
+        model,
+        replace_conv2d_1x1,
+        filter_fn=lambda mod, *args: isinstance(mod, torch.nn.Conv2d) and mod.kernel_size==(1,1)
     )
diff --git a/torchao/quantization/subclass.py b/torchao/quantization/subclass.py
@@ -269,6 +269,8 @@ def from_float(cls, input_float, qmin=-128, qmax=127):
         # however the external representation of our tensor will maintain the correct
         # shape attribute which needs to be tracked directly.
         int_data = w_int_repr.contiguous().t()
+        if cls is not Int8DynamicallyQuantizedLinearWeight:
+            int_data = int_data.contiguous()
         return cls(
             int_data, w_scales, False, input_float.shape, dtype=input_float.dtype
         )

Original file line number	Diff line number	Diff line change
`@@ -269,6 +269,8 @@ def from_float(cls, input_float, qmin=-128, qmax=127):`
`269`	`269`	`# however the external representation of our tensor will maintain the correct`
`270`	`270`	`# shape attribute which needs to be tracked directly.`
`271`	`271`	`int_data = w_int_repr.contiguous().t()`
	`272`	`+ if cls is not Int8DynamicallyQuantizedLinearWeight:`
	`273`	`+ int_data = int_data.contiguous()`
`272`	`274`	`return cls(`
`273`	`275`	`int_data, w_scales, False, input_float.shape, dtype=input_float.dtype`
`274`	`276`	`)`