neuralmagic
diff --git a/‎.github/actions/test/action.yml
Lines changed: 17 additions & 0 deletions b/‎.github/actions/test/action.yml
Lines changed: 17 additions & 0 deletions
diff --git a/‎.github/workflows/report.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/report.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/test.yml
Lines changed: 29 additions & 17 deletions b/‎.github/workflows/test.yml
Lines changed: 29 additions & 17 deletions
diff --git a/‎.github/workflows/trigger-all.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/trigger-all.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎setup.py
Lines changed: 1 addition & 0 deletions b/‎setup.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/compressed_tensors/transform/factory/hadamard.py
Lines changed: 1 addition & 1 deletion b/‎src/compressed_tensors/transform/factory/hadamard.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/compressed_tensors/transform/factory/random_hadamard.py
Lines changed: 1 addition & 1 deletion b/‎src/compressed_tensors/transform/factory/random_hadamard.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/compressed_tensors/transform/utils/hadamard.py
Lines changed: 91 additions & 92 deletions b/‎src/compressed_tensors/transform/utils/hadamard.py
Lines changed: 91 additions & 92 deletions
diff --git a/‎src/compressed_tensors/transform/utils/hadamards.safetensors
1.37 MB b/‎src/compressed_tensors/transform/utils/hadamards.safetensors
1.37 MB
@@ -22,6 +22,23 @@ runs:
           name: compressed
           extra: "[dev,accelerate]"
 
+    - name: clean up
+      run: |
+          echo "cleaning up disk space..."
+          find . -type f -name '*.whl' -exec rm -rf {} \;
+          python -m pip cache purge
+          sudo rm -rf /usr/local/.ghcup
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo rm -rf /usr/local/lib/android/sdk/ndk
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /usr/local/share/boost
+          if [[ "$(cat /etc/issue)" =~ Ubuntu ]]; then
+              sudo apt-get clean
+          fi
+          df -h
+      shell: bash
+
     - name: test
       id: test
       run: |
 
@@ -120,7 +120,7 @@ jobs:
               shell: bash
 
             - name: report to reportportal
-              uses: neuralmagic/nm-actions/actions/reportportal_submit_execution_results@v1.15.0
+              uses: neuralmagic/nm-actions/actions/reportportal_submit_execution_results@v1.22.0
               with:
                 droute_username: ${{ secrets.DROUTE_USERNAME }}
                 droute_password: ${{ secrets.DROUTE_PASSWORD }}
 
@@ -22,7 +22,9 @@ on:
       whl:
         description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
         type: string
-        required: true
+      run_id:
+        description: run id of the BUILD job that generated the assets
+        type: string
 
   # makes workflow manually callable
   workflow_dispatch:
@@ -44,9 +46,11 @@ on:
         type: string
         required: true
       whl:
-        description: "whl to test (variable appears late binding so unusable outside 'download artifact')"
+        description: "whl to test (provide either whl or run_id)"
+        type: string
+      run_id:
+        description: run id of the BUILD job that generated the assets
         type: string
-        required: true
 
 jobs:
 
@@ -87,11 +91,33 @@ jobs:
 
             - name: download whl
               id: download
+              if: ${{ inputs.whl != '' }}
               uses: actions/download-artifact@v4
               with:
                   name: ${{ inputs.whl }}
                   path: ${{ inputs.whl }}
 
+            # GCP
+            - name: 'Authenticate to Google Cloud'
+              id: auth
+              uses: google-github-actions/auth@v2.1.3
+              with:
+                  project_id: ${{ secrets.GCP_PROJECT }}
+                  workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
+                  service_account: ${{ secrets.GCP_GHA_SA }}
+
+            - name: 'Set up Cloud SDK'
+              uses: 'google-github-actions/setup-gcloud@v2'
+              with:
+                  version: '>= 473.0.0'
+
+            - name: download assets
+              if: ${{ inputs.run_id != '' }}
+              uses: neuralmagic/nm-actions/actions/gcp-download-assets@v1.1.0
+              with:
+                  bucket_source: ${{ secrets.GCP_BUILD_ML_ASSETS2 }}
+                  run_id: ${{ inputs.run_id }}
+
             - name: run tests
               id: test
               uses: ./.github/actions/test/
@@ -109,20 +135,6 @@ jobs:
                   whl: ${{ inputs.whl }}
                   test_status: ${{ steps.test.outputs.status }}
 
-            # GCP
-            - name: 'Authenticate to Google Cloud'
-              id: auth
-              uses: google-github-actions/auth@v2.1.3
-              with:
-                  project_id: ${{ secrets.GCP_PROJECT }}
-                  workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
-                  service_account: ${{ secrets.GCP_GHA_SA }}
-
-            - name: 'Set up Cloud SDK'
-              uses: 'google-github-actions/setup-gcloud@v2'
-              with:
-                  version: '>= 473.0.0'
-
             - name: copy results to GCP
               run: |
                   gcloud storage cp test-results/report.xml ${{ secrets.GCP_BUILD_ML_ASSETS2 }}/${{ github.run_id }}/test-results/report-${{ inputs.test_label }}.xml
 
@@ -32,8 +32,8 @@ jobs:
             wf_category: ${{ inputs.wf_category || 'NIGHTLY' }}
             gitref: ${{ inputs.gitref || 'main' }}
             push_to_pypi: ${{ (github.event.schedule == '30 0 * * *') || inputs.push_to_pypi || false }}
-            test_configs: '[{"python":"3.11.4","label":"ubuntu-22.04","timeout":"40"},
-                            {"python":"3.10.12","label":"ubuntu-24.04","timeout":"40"},
+            test_configs: '[{"python":"3.11.4","label":"ubuntu-24.04","timeout":"40"},
+                            {"python":"3.10.12","label":"ubuntu-22.04","timeout":"40"},
                             {"python":"3.9.17","label":"k8s-h100-solo","timeout":"40"},
                             {"python":"3.12.6","label":"k8s-a100-duo","timeout":"40"}]'
 
 
@@ -113,5 +113,6 @@ def _setup_extras() -> Dict:
     extras_require=_setup_extras(),
     install_requires=_setup_install_requires(),
     package_dir={"": "src"},
+    package_data={"": ["transform/utils/hadamards.safetensors"]},
     packages=_setup_packages(),
 )
@@ -61,7 +61,7 @@ def create_transform(self, module: Module, args: TransformArgs):
         return HadamardTransform(weight, perm, args)
 
     def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
-        data = deterministic_hadamard_matrix(size)
+        data = deterministic_hadamard_matrix(size, dtype, device)
         data = data.to(dtype=dtype, device=device)
         return Parameter(data, requires_grad=self.scheme.requires_grad)
 
 
@@ -29,6 +29,6 @@ class RandomHadamardFactory(HadamardFactory):
     """
 
     def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
-        data = random_hadamard_matrix(size, self.generator)
+        data = random_hadamard_matrix(size, dtype, device, self.generator)
         data = data.to(dtype=dtype, device=device)
         return Parameter(data, requires_grad=self.scheme.requires_grad)
@@ -13,149 +13,148 @@
 # limitations under the License.
 
 import math
-from typing import Optional, Tuple
+from pathlib import Path
+from typing import Optional
 
-import numpy
 import torch
+from safetensors import safe_open
 
 
-__all__ = ["random_hadamard_matrix", "deterministic_hadamard_matrix"]
+REPO_PATH = Path(__file__).parent / "hadamards.safetensors"
 
-# adapted from:
-# https://github.com/scipy/scipy/blob/v1.15.2/scipy/linalg/_special_matrices.py
-def deterministic_hadamard_matrix(size: int) -> torch.Tensor:
+
+__all__ = ["random_hadamard_matrix", "deterministic_hadamard_matrix", "is_pow2"]
+
+
+# note that hadamard matrix multiplication can be accelerated using a library such as
+# https://github.com/Dao-AILab/fast-hadamard-transform/tree/master
+
+
+def deterministic_hadamard_matrix(
+    size: int,
+    dtype: torch.dtype = torch.bfloat16,
+    device: torch.device = torch.device("cpu"),
+) -> torch.Tensor:
     """
     Construct an n-by-n Hadamard matrix, using Sylvester's construction.
     `n` must be a power of 2.
 
+    Adapated from https://github.com/scipy/scipy/blob/v1.15.2/scipy/linalg/_special_matrices.py  # noqa: E501
+
     :param size: order of the matrix, must be a power of 2
+    :param dtype: data type of matrix
+    :param device: device to construct matrix on
     :return: hadamard matrix of size `size`
     """
     if size <= 0:
         raise ValueError("Cannot construct deterministic hadamard of size <= 0")
 
-    log2 = int(math.log(size, 2))
+    log2 = int(math.log2(size))
     if size != 2**log2:
         raise ValueError("Cannot construct deterministic hadamard of size != 2^n")
 
-    H = numpy.array([[1]], dtype=int)
+    H = torch.tensor([[1]], dtype=dtype, device=device)
 
     # Sylvester's construction
-    for i in range(0, log2):
-        H = numpy.vstack((numpy.hstack((H, H)), numpy.hstack((H, -H))))
-
-    return torch.from_numpy(H / math.sqrt(size))
+    for _ in range(log2):
+        H = torch.vstack((torch.hstack((H, H)), torch.hstack((H, -H))))
 
-
-# adapted from:
-# https://github.com/facebookresearch/SpinQuant/blob/main/utils/hadamard_utils.py
-
-# TODO: the following library exists for online rotations and should be considered
-# in the future:
-# https://github.com/Dao-AILab/fast-hadamard-transform/tree/master
+    return H / math.sqrt(size)
 
 
 def random_hadamard_matrix(
-    size: int, gen: Optional[torch.Generator] = None
+    size: int,
+    dtype: torch.dtype = torch.bfloat16,
+    device: torch.device = torch.device("cpu"),
+    gen: Optional[torch.Generator] = None,
 ) -> torch.Tensor:
     """
-    Produces a randomly generated Hadamard matrix.
-    See https://cornell-relaxml.github.io/quip-sharp/ ,
-    Section "Randomized Hadamard Transformation"
+    Produces a randomly generated Hadamard matrix. Differs from
+    `deterministic_hadamard_matrix` in that this function supports non powers of 2
+    and randomization using a seeded generator
+
+    Adapated from https://github.com/facebookresearch/SpinQuant/blob/main/utils/hadamard_utils.py  # noqa: E501
+    Known matrices were retrieved from N. J. A. Sloane's Library of Hadamard Matrices http://www.neilsloane.com/hadamard/  # noqa: E501
 
     :param size: The dimension of the hamadard matrix
+    :param dtype: data type of matrix
+    :param device: device to construct matrix on
     :param gen: Optional generator random values
     :return: randomly generated hadamard matrix
     """
-    # Benefits: support other shapes / non powers of 2, support randomization
-    Q = torch.randint(low=0, high=2, size=(size,), generator=gen, dtype=torch.float64)
+    Q = torch.randint(low=0, high=2, size=(size,), generator=gen, dtype=dtype)  # cpu
+    Q = Q.to(device=device)
     Q = Q * 2 - 1
     Q = torch.diag(Q)
     return _matmul_hadU(Q) / math.sqrt(size)
 
 
-def _get_hadK(n: int, transpose: bool = False) -> Tuple[torch.Tensor, int]:
-    # NOTE: we can easily extend the list of supported shapes/sizes
-    # by adding to these methods
-    hadK, K = None, None
-    if n % 20 == 0:
-        assert _is_pow2(n // 20)
-        K = 20
-        hadK = _get_had20().T if transpose else _get_had20()
-    elif n % 12 == 0:
-        assert _is_pow2(n // 12)
-        K = 12
-        hadK = _get_had12().T if transpose else _get_had12()
-    else:
-        assert _is_pow2(n)
-        K = 1
+def is_pow2(n: int) -> bool:
+    """
+    Check if a number is a power of 2
 
-    return hadK, K
+    :param n: number to check
+    :return: True iff `n` is a power of 2
+    """
+    return n > 0 and (n & (n - 1) == 0)
+
+
+def _fetch_hadamard_divisor(
+    n: int,
+    dtype: torch.dtype,
+    device: torch.device = torch.device("cpu"),
+    file_path: str = REPO_PATH,
+) -> Optional[torch.Tensor]:
+    """
+    Fetch a known hadamard matrix from the given file path. The returned matrix will
+    be of of size `k` such that `n / k` is a power of two. Return None if no such
+    matrix exists.
 
+    Note: This function reopens the safetensors file every time it is called.
+    This is technically inefficient, but a very small runtime cost and simpler
+    than forcing callers to manage the file open context
+
+    :param n: size of known hadamard matrix
+    :return: a known hadamard matrix of size `n` if one exists, else None
+    """
+    with safe_open(file_path, framework="pt", device=str(device)) as file:
+        divisors = sorted((int(key) for key in file.keys()), reverse=True)
+        for divisor in divisors:
+            if n % divisor == 0 and is_pow2(n // divisor):
+                return file.get_tensor(str(divisor)).to(dtype=dtype)
+
+    return None
+
+
+def _matmul_hadU(X: torch.Tensor) -> torch.Tensor:
+    size = X.size(0)
+    dtype = X.dtype
+    device = X.device
 
-def _matmul_hadU(X, transpose=False) -> torch.Tensor:
-    n = X.shape[-1]
     # Check if we have the determined hadamard matrix
-    hadK, K = _get_hadK(n, transpose)
+    hadK = _fetch_hadamard_divisor(size, dtype, device=device)
+    if hadK is None:
+        raise ValueError(f"Cannot construct random hadamard matrix of size {size}")
+    K = hadK.size(0)
+
     # Reshape diag matrix with randomized -1/+1
-    input = X.clone().view(-1, n, 1)
+    input = X.clone().view(-1, size, 1)
     output = input.clone()
-
-    # for cases when hadK is not predetermined, determine hadamard matrix
     while input.shape[1] > K:
         input = input.view(input.shape[0], input.shape[1] // 2, 2, input.shape[2])
         output = output.view(input.shape)
         output[:, :, 0, :] = input[:, :, 0, :] + input[:, :, 1, :]
         output[:, :, 1, :] = input[:, :, 0, :] - input[:, :, 1, :]
         output = output.view(input.shape[0], input.shape[1], -1)
         (input, output) = (output, input)
+    assert input.shape[1] == K
     del output
 
-    # K == 1 when hadK is None; this happens when the size dim (n)
-    # is not comaptible with any of the maintained hadamard matrices
-
-    if K > 1:
-        # Do not explicitly repeat - OOM
-        # input = torch.bmm(
-        #     hadK.repeat(len(input), 1, 1).to(input.device).to(input.dtype), input)
-        # Use bcast instead
-
-        # for cases when hadK is pre-determined
-        input = hadK.view(1, K, K).to(input) @ input
+    # Do not explicitly repeat - OOM
+    # input = torch.bmm(
+    #     hadK.repeat(len(input), 1, 1).to(input.device).to(input.dtype), input)
+    # Use bcast instead
+    input = hadK.view(1, K, K).to(input) @ input
 
     # normalize
     return input.view(X.shape)
-
-
-def _is_pow2(n: int) -> bool:
-    return (n & (n - 1) == 0) and (n > 0)
-
-
-def _reshape_bits(packed_bits: numpy.ndarray, original_size: int) -> numpy.ndarray:
-    had_unpacked = numpy.unpackbits(packed_bits)
-    had_unpacked = [1 if x == 1 else -1 for x in had_unpacked]
-    had_unpacked = numpy.array(had_unpacked).reshape((original_size, original_size))
-    return had_unpacked
-
-
-# http://www.neilsloane.com/hadamard/index.html
-def _get_had12() -> torch.Tensor:
-    # fmt: off
-    had_12 = numpy.array([128,  13,  29, 232, 235,  71, 218,  
-        62, 209, 246, 139, 180, 157, 168, 237, 199, 106,  59], dtype=numpy.uint8)
-    # fmt: on
-    # TODO: just unpack during apply
-    had_12_unpacked = _reshape_bits(had_12, original_size=12)
-    return torch.tensor(had_12_unpacked)
-
-
-def _get_had20() -> torch.Tensor:
-    # fmt: off
-    had_20 = numpy.array([128, 0,  13, 133, 121, 236,  43, 203,  97,  94, 155,  10, 252, 
-        216, 87, 230, 194, 191,  54,  21, 249, 176, 171, 205, 133, 222, 108,  42, 243,  
-        97, 215, 155,  10, 188, 216, 149, 230, 200, 175, 54, 133, 121, 188,  43, 
-        205, 225,  94, 107,  10, 243], dtype=numpy.uint8)
-    # fmt: on
-    # TODO: just unpack during apply
-    had_20_unpacked = _reshape_bits(had_20, original_size=20)
-    return torch.tensor(had_20_unpacked)
Original file line number	Diff line number	Diff line change
`@@ -113,5 +113,6 @@ def _setup_extras() -> Dict:`
`113`	`113`	`extras_require=_setup_extras(),`
`114`	`114`	`install_requires=_setup_install_requires(),`
`115`	`115`	`package_dir={"": "src"},`
	`116`	`+ package_data={"": ["transform/utils/hadamards.safetensors"]},`
`116`	`117`	`packages=_setup_packages(),`
`117`	`118`	`)`