[Transform] Hadamard and Matrix Transform Utils (#330)

kylesayrs · web-flow · commit d33074449080 · 2025-06-05T17:41:09.000-04:00
* add utils and tests

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* standardize random hadamard

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* apply sqrt division first

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* fix typo

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* add random option

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* update docstrings

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* update docstrings

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* update docstring

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

---------

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/transform/transform_args.py b/src/compressed_tensors/transform/transform_args.py
@@ -13,15 +13,31 @@
 # limitations under the License.
 
 from enum import Enum
-from typing import Any, List
+from typing import List
 
 from pydantic import BaseModel, Field, field_validator
 
 
-__all__ = ["TransformArgs"]
+__all__ = ["TransformArgs", "TransformLocation"]
 
 
 class TransformLocation(str, Enum):
+    """
+    Enum representing which parameters/activations a transform weight should be applied
+    to on a given module.
+
+    | -------------------------------------------------------------------------------------------------------- |  # noqa: E501
+    | Name            | Runtime     | Values        | Locations Where Inverse Could Be Applied                 |  # noqa: E501
+    | --------------- | ----------- | ------------- | -------------------------------------------------------- |  # noqa: E501
+    | `INPUT`         | online      | activations   | `prev.WEIGHT_OUTPUT`, `prev.OUTPUT`, `this.WEIGHT_INPUT` |  # noqa: E501
+    | `WEIGHT_INPUT`  | offline     | weight        | `prev.WEIGHT_OUTPUT`, `prev.OUTPUT`, `this.INPUT`        |  # noqa: E501
+    | `WEIGHT_OUTPUT` | offline     | weight        | `this.OUTPUT`, `next.INPUT`, `next.WEIGHT_INPUT`         |  # noqa: E501
+    | `OUTPUT`        | online      | activations   | `this.WEIGHT_OUTPUT`, `next.INPUT`, `next.WEIGHT_INPUT`  |  # noqa: E501
+    | `K_CACHE`       | online      | key_values    | `q_proj.Q_ATTN`                                          |  # noqa: E501
+    | `Q_ATTN`        | online      | query_values  | `k_proj.K_CACHE`                                         |  # noqa: E501
+    | -------------------------------------------------------------------------------------------------------- |  # noqa: E501
+    """
+
     INPUT = "input"
     WEIGHT_INPUT = "weight_input"
     WEIGHT_OUTPUT = "weight_output"
diff --git a/src/compressed_tensors/transform/utils/__init__.py b/src/compressed_tensors/transform/utils/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/src/compressed_tensors/transform/utils/hadamard.py b/src/compressed_tensors/transform/utils/hadamard.py
@@ -0,0 +1,161 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from typing import Optional, Tuple
+
+import numpy
+import torch
+
+
+__all__ = ["random_hadamard_matrix", "deterministic_hadamard_matrix"]
+
+# adapted from:
+# https://github.com/scipy/scipy/blob/v1.15.2/scipy/linalg/_special_matrices.py
+def deterministic_hadamard_matrix(size: int) -> torch.Tensor:
+    """
+    Construct an n-by-n Hadamard matrix, using Sylvester's construction.
+    `n` must be a power of 2.
+
+    :param size: order of the matrix, must be a power of 2
+    :return: hadamard matrix of size `size`
+    """
+    if size <= 0:
+        raise ValueError("Cannot construct deterministic hadamard of size <= 0")
+
+    log2 = int(math.log(size, 2))
+    if size != 2**log2:
+        raise ValueError("Cannot construct deterministic hadamard of size != 2^n")
+
+    H = numpy.array([[1]], dtype=int)
+
+    # Sylvester's construction
+    for i in range(0, log2):
+        H = numpy.vstack((numpy.hstack((H, H)), numpy.hstack((H, -H))))
+
+    return torch.from_numpy(H / math.sqrt(size))
+
+
+# adapted from:
+# https://github.com/facebookresearch/SpinQuant/blob/main/utils/hadamard_utils.py
+
+# TODO: the following library exists for online rotations and should be considered
+# in the future:
+# https://github.com/Dao-AILab/fast-hadamard-transform/tree/master
+
+
+def random_hadamard_matrix(
+    size: int, gen: Optional[torch.Generator] = None
+) -> torch.Tensor:
+    """
+    Produces a randomly generated Hadamard matrix.
+    See https://cornell-relaxml.github.io/quip-sharp/ ,
+    Section "Randomized Hadamard Transformation"
+
+    :param size: The dimension of the hamadard matrix
+    :param gen: Optional generator random values
+    :return: randomly generated hadamard matrix
+    """
+    # Benefits: support other shapes / non powers of 2, support randomization
+    Q = torch.randint(low=0, high=2, size=(size,), generator=gen, dtype=torch.float64)
+    Q = Q * 2 - 1
+    Q = torch.diag(Q)
+    return _matmul_hadU(Q) / math.sqrt(size)
+
+
+def _get_hadK(n: int, transpose: bool = False) -> Tuple[torch.Tensor, int]:
+    # NOTE: we can easily extend the list of supported shapes/sizes
+    # by adding to these methods
+    hadK, K = None, None
+    if n % 20 == 0:
+        assert _is_pow2(n // 20)
+        K = 20
+        hadK = _get_had20().T if transpose else _get_had20()
+    elif n % 12 == 0:
+        assert _is_pow2(n // 12)
+        K = 12
+        hadK = _get_had12().T if transpose else _get_had12()
+    else:
+        assert _is_pow2(n)
+        K = 1
+
+    return hadK, K
+
+
+def _matmul_hadU(X, transpose=False) -> torch.Tensor:
+    n = X.shape[-1]
+    # Check if we have the determined hadamard matrix
+    hadK, K = _get_hadK(n, transpose)
+    # Reshape diag matrix with randomized -1/+1
+    input = X.clone().view(-1, n, 1)
+    output = input.clone()
+
+    # for cases when hadK is not predetermined, determine hadamard matrix
+    while input.shape[1] > K:
+        input = input.view(input.shape[0], input.shape[1] // 2, 2, input.shape[2])
+        output = output.view(input.shape)
+        output[:, :, 0, :] = input[:, :, 0, :] + input[:, :, 1, :]
+        output[:, :, 1, :] = input[:, :, 0, :] - input[:, :, 1, :]
+        output = output.view(input.shape[0], input.shape[1], -1)
+        (input, output) = (output, input)
+    del output
+
+    # K == 1 when hadK is None; this happens when the size dim (n)
+    # is not comaptible with any of the maintained hadamard matrices
+
+    if K > 1:
+        # Do not explicitly repeat - OOM
+        # input = torch.bmm(
+        #     hadK.repeat(len(input), 1, 1).to(input.device).to(input.dtype), input)
+        # Use bcast instead
+
+        # for cases when hadK is pre-determined
+        input = hadK.view(1, K, K).to(input) @ input
+
+    # normalize
+    return input.view(X.shape)
+
+
+def _is_pow2(n: int) -> bool:
+    return (n & (n - 1) == 0) and (n > 0)
+
+
+def _reshape_bits(packed_bits: numpy.ndarray, original_size: int) -> numpy.ndarray:
+    had_unpacked = numpy.unpackbits(packed_bits)
+    had_unpacked = [1 if x == 1 else -1 for x in had_unpacked]
+    had_unpacked = numpy.array(had_unpacked).reshape((original_size, original_size))
+    return had_unpacked
+
+
+# http://www.neilsloane.com/hadamard/index.html
+def _get_had12() -> torch.Tensor:
+    # fmt: off
+    had_12 = numpy.array([128,  13,  29, 232, 235,  71, 218,  
+        62, 209, 246, 139, 180, 157, 168, 237, 199, 106,  59], dtype=numpy.uint8)
+    # fmt: on
+    # TODO: just unpack during apply
+    had_12_unpacked = _reshape_bits(had_12, original_size=12)
+    return torch.tensor(had_12_unpacked)
+
+
+def _get_had20() -> torch.Tensor:
+    # fmt: off
+    had_20 = numpy.array([128, 0,  13, 133, 121, 236,  43, 203,  97,  94, 155,  10, 252, 
+        216, 87, 230, 194, 191,  54,  21, 249, 176, 171, 205, 133, 222, 108,  42, 243,  
+        97, 215, 155,  10, 188, 216, 149, 230, 200, 175, 54, 133, 121, 188,  43, 
+        205, 225,  94, 107,  10, 243], dtype=numpy.uint8)
+    # fmt: on
+    # TODO: just unpack during apply
+    had_20_unpacked = _reshape_bits(had_20, original_size=20)
+    return torch.tensor(had_20_unpacked)
diff --git a/src/compressed_tensors/transform/utils/utils.py b/src/compressed_tensors/transform/utils/utils.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from compressed_tensors.transform import TransformLocation
+
+
+__all__ = ["get_matrix_size", "apply_transform_weight"]
+
+
+def get_matrix_size(module: torch.nn.Module, location: TransformLocation) -> int:
+    """
+    Determine the size of a matrix given its location on the module
+
+    :param module: module that matrix will be applied to
+    :param location: location on module
+    :return: size of matrix
+    """
+    assert isinstance(module, torch.nn.Linear)
+    if location in ("input", TransformLocation.WEIGHT_INPUT):
+        return module.in_features
+    else:
+        return module.out_features
+
+
+def apply_transform_weight(
+    weight: torch.Tensor,
+    value: torch.Tensor,
+    location: TransformLocation,
+) -> torch.Tensor:
+    """
+    Using the transform location, determine how to apply the transform weight to the
+    given value. For more info on input and output transforms, see `TransformLocation`
+
+    The following explains how weights should be applied to values according to location
+
+    let  x          be input activation
+         W          be weight,
+         yh, xh, Wh be transformed output, input, weight
+
+    note that
+         y  = (x W.T)        // torch.nn.Linear
+
+    Choose values for yh, xh, and Wh which incorporate matrix transforms
+
+    let  V, Vi      be transform matrices on input side
+         U, Ui      be transform matrices on output side
+
+    pick xh = (x V)
+         Wh = (U.T W Vi.T)
+         yh = (y U)
+
+    The following shows that `yh = (xh) (Wh).T` for the chosen values of yh, xh, and Wh
+
+    (xh) (Wh).T = (x V) (U.T W Vi.T).T
+                = (x V) (Vi W.T U)        // transpose matrix product identity
+                = (x W.T) U
+                = y U
+                = yh
+
+    :param weight: transform weight to apply
+    :param value: value to apply weight to
+    :param location: determines how weight should be applied
+    :return: value after transform weight has been applied
+    """
+
+    if location == TransformLocation.INPUT:
+        return value @ weight
+
+    elif location == TransformLocation.WEIGHT_INPUT:
+        return value @ weight.T
+
+    elif location == TransformLocation.WEIGHT_OUTPUT:
+        return weight.T @ value
+
+    elif location == TransformLocation.OUTPUT:
+        return value @ weight
+
+    else:
+        raise NotImplementedError(f"{location} has not been implemented yet")
diff --git a/src/compressed_tensors/utils/helpers.py b/src/compressed_tensors/utils/helpers.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
 import warnings
 from functools import wraps
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
@@ -38,6 +39,8 @@
     "shard_tensor",
     "pack_bitmasks",
     "unpack_bitmasks",
+    "patch_attr",
+    "ParameterizedDefaultDict",
 ]
 
 FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -328,3 +331,53 @@ def unpack_bitmasks(
     )
 
     return unpacked_bitmasks_torch
+
+
+@contextlib.contextmanager
+def patch_attr(base: object, attr: str, value: Any):
+    """
+    Patch the value of an object attribute. Original value is restored upon exit
+
+    :param base: object which has the attribute to patch
+    :param attr: name of the the attribute to patch
+    :param value: used to replace original value
+
+    Usage:
+    >>> from types import SimpleNamespace
+    >>> obj = SimpleNamespace()
+    >>> with patch_attr(obj, "attribute", "value"):
+    ...     assert obj.attribute == "value"
+    >>> assert not hasattr(obj, "attribute")
+    """
+    _sentinel = object()
+    original_value = getattr(base, attr, _sentinel)
+
+    setattr(base, attr, value)
+    try:
+        yield
+    finally:
+        if original_value is not _sentinel:
+            setattr(base, attr, original_value)
+        else:
+            delattr(base, attr)
+
+
+class ParameterizedDefaultDict(dict):
+    """
+    Similar to `collections.DefaultDict`, but upon fetching a key which is missing,
+    the key is passed as arguments to the `default_factory`
+
+    :param default_factory: function which takes a key as input and returns the
+        corresponding default value
+    """
+
+    def __init__(self, default_factory: Callable[[Any], Any]):
+        self.default_factory = default_factory
+
+    def __missing__(self, key):
+        if isinstance(key, tuple):
+            value = self.default_factory(*key)
+        else:
+            value = self.default_factory(key)
+        self[key] = value
+        return value
diff --git a/tests/test_transform/utils/test_hadamard.py b/tests/test_transform/utils/test_hadamard.py
diff --git a/tests/test_utils/test_helpers.py b/tests/test_utils/test_helpers.py