[Transform] Factory classes with shared memory and offloading (#316)

kylesayrs · web-flow · commit 4b81ac7e9d70 · 2025-06-10T11:24:36.000-04:00
* add utilities

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add tests

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add additional tests

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add utils and tests

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* Implement transform factories

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add delete_offload_module

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* key inverses by weight

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* fix tests

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* standardize random hadamard

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* prepend input hooks

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* apply sqrt division first

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* use divided hadamards

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* fix typo

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add random option

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* use random seeds, rename matrix multiply

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add deterministic generation to random matrix

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* update docstrings

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* update docstrings

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* make seed optional

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

---------

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/transform/__init__.py b/src/compressed_tensors/transform/__init__.py
@@ -18,3 +18,8 @@
 from .transform_args import *
 from .transform_scheme import *
 from .transform_config import *
+
+from .factory.base import *
+from .factory.hadamard import *
+from .factory.matrix_multiply import *
+from .factory.random_hadamard import *
diff --git a/src/compressed_tensors/transform/factory/__init__.py b/src/compressed_tensors/transform/factory/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/src/compressed_tensors/transform/factory/base.py b/src/compressed_tensors/transform/factory/base.py
@@ -0,0 +1,164 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+from typing import Optional
+
+import torch
+import torch.nn.utils.parametrize as P
+from compressed_tensors.quantization.lifecycle import is_target  # TODO: move to utils
+from compressed_tensors.registry.registry import RegistryMixin, T
+from compressed_tensors.transform import (
+    TransformArgs,
+    TransformLocation,
+    TransformScheme,
+)
+from compressed_tensors.utils import (
+    align_module_device,
+    has_offloaded_params,
+    patch_attr,
+    register_offload_module,
+    update_offload_parameter,
+)
+from torch import Tensor
+from torch.nn import Module, Parameter
+
+
+__all__ = ["TransformFactory", "TransformBase"]
+
+
+class TransformFactory(RegistryMixin, ABC):
+    """
+    Abstract factory base used to create and apply transforms to a model
+
+    :param name: name associated with transform scheme
+    :param scheme: transform scheme which defines how transforms should be created
+    :param seed: random seed used to transform weight randomization
+    """
+
+    def __init__(self, name: str, scheme: TransformScheme, seed: Optional[int] = None):
+        self.name = name
+        self.scheme = scheme
+        self.generator = torch.Generator()
+        if seed is not None:
+            self.generator.manual_seed(seed)
+
+    @classmethod
+    def from_scheme(cls: type[T], scheme: TransformScheme, **kwargs) -> T:
+        """
+        Create a transform factory from a scheme
+
+        :param scheme: defines how transforms should be created
+        :param kwargs: TransformFactory constructor arguments
+        :return: subclass of `TransformFactory` corresponding to the scheme type
+        """
+        constructor = cls.get_value_from_registry(name=scheme.type)
+        return constructor(scheme=scheme, **kwargs)
+
+    @abstractmethod
+    def create_transform(self, module: Module, args: TransformArgs) -> "TransformBase":
+        """
+        Abstract method which defines how a transform should be created. May utilize
+        caching to maximize shared memory
+
+        :param module: parent module that transform will be applied to
+        :param args: defines how the transform will be applied to the module
+        :return: instance of TransformBase
+        """
+        raise NotImplementedError()
+
+    def apply_to_model(self, model: Module):
+        """
+        Create transforms and apply them to the model
+
+        :param model: module to apply transforms to
+        """
+        for arg in self.scheme.apply:
+            for name, module in list(model.named_modules()):
+                if is_target(name, module, arg.targets, arg.ignore):
+                    self._apply_to_module(module, arg)
+
+    def _apply_to_module(self, module: Module, args: TransformArgs):
+        """
+        Create transforms and apply them to the module
+
+        :param module: target module to apply transforms to
+        :param args: defines how the transform will be applied to the target module
+        """
+        # create transform as submodule
+        transform_name = f"{self.name}_{args.location}"
+        transform = self.create_transform(module, args)
+        register_offload_module(module, transform_name, transform)  # (1)
+
+        # register input transformation hook
+        if args.location == TransformLocation.INPUT:
+
+            def input_hook(_, args):
+                input = args[0]
+                return transform(input)
+
+            module.register_forward_pre_hook(input_hook, prepend=True)
+
+        # eagerly apply transformation to weight
+        elif args.location in (
+            TransformLocation.WEIGHT_INPUT,
+            TransformLocation.WEIGHT_OUTPUT,
+        ):
+            assert isinstance(module, torch.nn.Linear)
+            assert module.bias is None
+
+            with torch.no_grad(), align_module_device(module):
+                update_offload_parameter(module, "weight", transform(module.weight))
+
+            if self.scheme.requires_grad:
+                # for training, the weight changes with every forward pass
+                # so we can leverage parametrization to propagate the gradient
+                if has_offloaded_params(module):
+                    raise ValueError("Offloaded training is not supported")
+                P.register_parametrization(module, "weight", transform)
+
+        # register output transformation hook
+        elif args.location == TransformLocation.OUTPUT:
+
+            def output_hook(_, _input, output):
+                return transform(output)
+
+            module.register_forward_hook(output_hook)
+
+        # other locations such as q_attn and k_attn have not been implemented
+        else:
+            raise NotImplementedError()
+
+        # (1) even in the `weight` cases, this submodule attachment is needed in order
+        # to support saving in the frozen state
+
+
+class TransformBase(Module, ABC):
+    """
+    Represents the application of a transform accord to TransformArgs
+    """
+
+    args: TransformArgs
+    weight: Parameter
+
+    @abstractmethod
+    def forward(self, value: Tensor) -> Tensor:
+        raise NotImplementedError()
+
+    def right_inverse(self, value: Tensor) -> Tensor:
+        with patch_attr(self.args, "inverse", not self.args.inverse):
+            return self.forward(value)
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}(inverse={self.args.inverse})"
diff --git a/src/compressed_tensors/transform/factory/hadamard.py b/src/compressed_tensors/transform/factory/hadamard.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+import torch
+from compressed_tensors.transform import TransformArgs, TransformScheme
+from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
+from compressed_tensors.transform.utils.hadamard import deterministic_hadamard_matrix
+from compressed_tensors.transform.utils.utils import (
+    apply_transform_weight,
+    get_matrix_size,
+)
+from compressed_tensors.utils import get_offloaded_device
+from compressed_tensors.utils.helpers import ParameterizedDefaultDict
+from torch import Tensor, device, dtype
+from torch.nn import Linear, Module, Parameter
+
+
+@TransformFactory.register("hadamard")
+class HadamardFactory(TransformFactory):
+    """
+    Factory used to apply hadamard transforms to a model
+
+    :param name: name associated with transform scheme
+    :param scheme: transform scheme which defines how transforms should be created
+    :param seed: random seed used to transform weight randomization
+    """
+
+    def __init__(self, name: str, scheme: TransformScheme, seed: Optional[int] = None):
+        super().__init__(name, scheme, seed)
+        self.weights = ParameterizedDefaultDict(self._create_weight)
+
+    def create_transform(self, module: Module, args: TransformArgs):
+        """
+        Create a HadamardTransform for applying to a module. Transforms with the same
+        size, dtype, and device are cached
+
+        :param module: parent module that transform will be applied to
+        :param args: defines how the transform will be applied to the module
+        """
+        assert isinstance(module, Linear)
+        size = get_matrix_size(module, args.location)
+        dtype = module.weight.dtype
+        device = get_offloaded_device(module)
+
+        weight = self.weights[size, dtype, device]
+        return HadamardTransform(weight, args)
+
+    def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
+        data = deterministic_hadamard_matrix(size)
+        data = data.to(dtype=dtype, device=device)
+        return Parameter(data, requires_grad=self.scheme.requires_grad)
+
+
+class HadamardTransform(TransformBase):
+    def __init__(self, weight: Parameter, args: TransformArgs):
+        super().__init__()
+        self.weight = weight
+        self.args = args
+
+    def forward(self, value: Tensor) -> Tensor:
+        if not self.args.inverse:
+            weight = self.weight
+        else:
+            weight = self.weight.T
+
+        return apply_transform_weight(weight, value, self.args.location)
diff --git a/src/compressed_tensors/transform/factory/matrix_multiply.py b/src/compressed_tensors/transform/factory/matrix_multiply.py
@@ -0,0 +1,90 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+import torch
+from compressed_tensors.transform import TransformArgs, TransformScheme
+from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
+from compressed_tensors.transform.utils.utils import (
+    apply_transform_weight,
+    get_matrix_size,
+)
+from compressed_tensors.utils import get_offloaded_device
+from compressed_tensors.utils.helpers import ParameterizedDefaultDict
+from torch import Tensor, device, dtype
+from torch.nn import Linear, Module, Parameter
+
+
+@TransformFactory.register("random-matrix")
+class RandomMatrixFactory(TransformFactory):
+    """
+    Factory used to apply random matrix transforms to a model
+
+    :param name: name associated with transform scheme
+    :param scheme: transform scheme which defines how transforms should be created
+    :param seed: random seed used to transform weight randomization
+    """
+
+    def __init__(self, name: str, scheme: TransformScheme, seed: Optional[int] = None):
+        super().__init__(name, scheme, seed)
+        self.weights = ParameterizedDefaultDict(self._create_weight)
+        self.inverses = ParameterizedDefaultDict(self._create_inverse)
+
+    def create_transform(self, module: Module, args: TransformArgs):
+        """
+        Create a RandomMatrixTransform for applying to a module. Transforms with the
+        same size, dtype, and device are cached
+
+        :param module: parent module that transform will be applied to
+        :param args: defines how the transform will be applied to the module
+        """
+        assert isinstance(module, Linear)
+        size = get_matrix_size(module, args.location)
+        dtype = module.weight.dtype
+        device = get_offloaded_device(module)
+
+        weight = self.weights[size, dtype, device]
+        if args.inverse:
+            weight = self.inverses[weight]
+
+        return RandomMatrixTransform(weight, args)
+
+    def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
+        data = torch.rand(
+            (size, size), generator=self.generator, dtype=dtype, device=device
+        )
+        return Parameter(data, requires_grad=self.scheme.requires_grad)
+
+    def _create_inverse(self, weight: Parameter) -> Parameter:
+        data = high_precision_invert(weight.data)
+        return Parameter(data, requires_grad=False)
+
+
+class RandomMatrixTransform(TransformBase):
+    def __init__(self, weight: Tensor, args: TransformArgs):
+        super().__init__()
+        self.weight = weight  # is an inverse if args.inverse
+        self.args = args
+
+    def forward(self, value: Tensor) -> Parameter:
+        return apply_transform_weight(self.weight, value, self.args.location)
+
+    def right_inverse(self, value: Tensor) -> Tensor:
+        inverse = high_precision_invert(self.weight)
+        return apply_transform_weight(inverse, value, self.args.location)
+
+
+def high_precision_invert(weight: Tensor) -> Tensor:
+    return torch.linalg.inv(weight.to(torch.float32)).to(weight.dtype)
diff --git a/src/compressed_tensors/transform/factory/random_hadamard.py b/src/compressed_tensors/transform/factory/random_hadamard.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from compressed_tensors.transform import HadamardFactory, TransformFactory
+from compressed_tensors.transform.utils.hadamard import random_hadamard_matrix
+from torch import device, dtype
+from torch.nn import Parameter
+
+
+@TransformFactory.register("random-hadamard")
+class RandomHadamardFactory(HadamardFactory):
+    """
+    Factory used to apply random hadamard transforms to a model
+
+    :param name: name associated with transform scheme
+    :param scheme: transform scheme which defines how transforms should be created
+    :param seed: random seed used to transform weight randomization
+    """
+
+    def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
+        data = random_hadamard_matrix(size, self.generator)
+        data = data.to(dtype=dtype, device=device)
+        return Parameter(data, requires_grad=self.scheme.requires_grad)
diff --git a/tests/test_transform/factory/test_correctness.py b/tests/test_transform/factory/test_correctness.py
diff --git a/tests/test_transform/factory/test_memory.py b/tests/test_transform/factory/test_memory.py