Merge remote-tracking branch 'origin' into kylesayrs/transform_apply

kylesayrs · kylesayrs · commit 853ffcf24a93 · 2025-07-09T13:33:14.000-04:00
diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml
@@ -7,6 +7,10 @@ inputs:
   suitename:
     description: "test suite name"
     required: true
+  code_coverage:
+    description: whether to collect code coverage metrics during test run
+    type: boolean
+    default: false
 outputs:
   status:
     description: "final status from test"
@@ -44,9 +48,37 @@ runs:
       run: |
           source ${{ inputs.venv }}/bin/activate
           rm -rf src
+
+          if [[ "${ENABLE_COVERAGE}" == "true" ]]; then
+            echo "::group::Installing code coverage requirements via pip"
+            pip install bashlex https://github.com/neuralmagic/pytest-nm-releng/archive/v0.4.0.tar.gz
+            pip install coverage pytest-cov
+
+            # Adding Code coverage to the tests
+            nmre-generate-coverage-flags --package "compressed_tensors" --output-file ".coverage_flags.sh"
+            source .coverage_flags.sh
+            echo "::endgroup::"
+          fi
+
+          echo "::group::running tests"
+          echo "PYTEST_ADDOPTS set to: ${PYTEST_ADDOPTS}"
+
           SUCCESS=0
           pytest tests --junitxml=test-results/report.xml -o junit_suite_name="${{ inputs.suitename }}" || SUCCESS=$?
           echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
+          echo "::endgroup::"
+
+          if [[ "${ENABLE_COVERAGE}" == "true" ]]; then
+            echo "::group::consolidating coverage reports"
+            mkdir -p coverage-results
+            mv .coverage coverage-results/ || echo ".coverage file not found"
+            mv coverage-html coverage-results/ || echo "coverage-html folder not found"
+            mv coverage.json coverage-results/ || echo "coverage.json file not found"
+            echo "::endgroup::"
+          fi
+          
           deactivate
           exit ${SUCCESS}
       shell: bash
+      env:
+        ENABLE_COVERAGE: ${{ inputs.code_coverage || false }}
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -25,6 +25,10 @@ on:
       run_id:
         description: run id of the BUILD job that generated the assets
         type: string
+      code_coverage:
+        description: whether to collect code coverage metrics during test run
+        type: boolean
+        default: false
 
   # makes workflow manually callable
   workflow_dispatch:
@@ -51,6 +55,10 @@ on:
       run_id:
         description: run id of the BUILD job that generated the assets
         type: string
+      code_coverage:
+        description: whether to collect code coverage metrics during test run
+        type: boolean
+        default: false
 
 jobs:
 
@@ -124,6 +132,7 @@ jobs:
               with:
                   venv: ${{ steps.create_venv.outputs.penv }}
                   suitename: test-${{ inputs.python }}-${{ inputs.test_label }}
+                  code_coverage: ${{ inputs.code_coverage }}
 
             - name: summary
               uses: neuralmagic/nm-actions/actions/summary-test@v1.13.0
@@ -146,3 +155,11 @@ jobs:
                   name: report-${{ inputs.test_label }}.xml
                   path: test-results/report.xml
                   retention-days: 5
+
+            - name: upload coverage report
+              uses: actions/upload-artifact@v4
+              if: (success() || failure()) && inputs.code_coverage
+              with:
+                  name: coverage-results
+                  path: coverage-results/*
+                  retention-days: 5
diff --git a/src/compressed_tensors/compressors/model_compressors/model_compressor.py b/src/compressed_tensors/compressors/model_compressors/model_compressor.py
@@ -42,10 +42,7 @@
     load_pretrained_quantization_parameters,
 )
 from compressed_tensors.quantization.lifecycle import expand_target_names
-from compressed_tensors.quantization.utils import (
-    is_module_quantized,
-    iter_named_leaf_modules,
-)
+from compressed_tensors.quantization.utils import is_module_quantized
 from compressed_tensors.utils import (
     align_module_device,
     delete_offload_parameter,
@@ -747,7 +744,7 @@ def map_module_to_scheme(model: Module) -> Dict[str, QuantizationScheme]:
     """
     return {
         fix_fsdp_module_name(name): module.quantization_scheme
-        for name, module in iter_named_leaf_modules(model)
+        for name, module in model.named_modules()
         if is_module_quantized(module)
     }
 
diff --git a/src/compressed_tensors/quantization/lifecycle/apply.py b/src/compressed_tensors/quantization/lifecycle/apply.py
@@ -38,8 +38,6 @@
     KV_CACHE_TARGETS,
     infer_quantization_status,
     is_kv_cache_quant_scheme,
-    iter_named_leaf_modules,
-    iter_named_quantizable_modules,
 )
 from compressed_tensors.utils.helpers import fix_fsdp_module_name, replace_module
 from compressed_tensors.utils.offload import update_parameter_data
@@ -87,7 +85,7 @@ def load_pretrained_quantization_parameters(
     model_path = get_safetensors_folder(model_name_or_path)
     mapping = get_quantization_parameter_to_path_mapping(model_path)
 
-    for name, submodule in iter_named_leaf_modules(model):
+    for name, submodule in model.named_modules():
         if not is_module_quantized(submodule):
             continue
         if submodule.quantization_scheme.input_activations is not None:
@@ -152,7 +150,7 @@ def apply_quantization_config(
     # list of submodules to ignore
     ignored_submodules = defaultdict(list)
     # mark appropriate layers for quantization by setting their quantization schemes
-    for name, submodule in model.named_modules():  # child modules and attention modules
+    for name, submodule in model.named_modules():
         # potentially fix module name to remove FSDP wrapper prefix
         name = fix_fsdp_module_name(name)
         if matches := find_name_or_class_matches(name, submodule, config.ignore):
@@ -283,7 +281,7 @@ def expand_target_names(
     """
     return {
         name
-        for name, module in iter_named_leaf_modules(model)
+        for name, module in model.named_modules()
         if is_target(name, module, targets, ignore)
     }
 
@@ -324,6 +322,11 @@ def find_name_or_class_matches(
         2. matches on regex patterns
         3. matches on module names
     """
+    from compressed_tensors import InternalModule
+
+    if isinstance(module, InternalModule):
+        return []
+
     targets = sorted(targets, key=lambda x: ("re:" in x, x))
     if isinstance(targets, Iterable):
         matches = _find_matches(name, targets) + _find_matches(
diff --git a/src/compressed_tensors/quantization/quant_config.py b/src/compressed_tensors/quantization/quant_config.py
@@ -22,9 +22,7 @@
     preset_name_to_scheme,
 )
 from compressed_tensors.quantization.utils import (
-    calculate_compression_ratio,
     is_module_quantized,
-    iter_named_quantizable_modules,
     module_type,
     parse_out_kv_cache_args,
 )
@@ -177,9 +175,7 @@ def from_pretrained(
         quantization_status = None
         ignore = {}
         quantization_type_names = set()
-        for name, submodule in iter_named_quantizable_modules(
-            model, include_children=True, include_attn=True
-        ):
+        for name, submodule in model.named_modules():
             layer_type = module_type(submodule)
             if not is_module_quantized(submodule):
                 if layer_type not in ignore:
diff --git a/src/compressed_tensors/quantization/utils/helpers.py b/src/compressed_tensors/quantization/utils/helpers.py
@@ -26,6 +26,7 @@
     QuantizationType,
 )
 from compressed_tensors.quantization.quant_scheme import QuantizationScheme
+from compressed_tensors.utils import deprecated
 from torch import FloatTensor, IntTensor, Tensor
 from torch.nn import Module
 from tqdm import tqdm
@@ -36,7 +37,6 @@
     "is_module_quantized",
     "is_model_quantized",
     "module_type",
-    "calculate_compression_ratio",
     "get_torch_bit_depth",
     "can_quantize",
     "parse_out_kv_cache_args",
@@ -276,12 +276,7 @@ def is_model_quantized(model: Module) -> bool:
     :param model: pytorch model
     :return: True if model is quantized, False otherwise
     """
-
-    for _, submodule in iter_named_leaf_modules(model):
-        if is_module_quantized(submodule):
-            return True
-
-    return False
+    return any(is_module_quantized(submodule) for submodule in model.modules())
 
 
 def module_type(module: Module) -> str:
@@ -294,6 +289,11 @@ def module_type(module: Module) -> str:
     return type(module).__name__
 
 
+@deprecated(
+    message="This function will be removed in a future release. "
+    "Please use `model.named_modules()` and filter by "
+    "compressed_tensors.InternalModule if neceessary"
+)
 def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None, None]:
     """
     Yields modules that do not have any submodules except observers. The observers
@@ -320,6 +320,11 @@ def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None
                 yield name, submodule
 
 
+@deprecated(
+    message="This function will be removed in a future release. "
+    "Please use `model.named_modules()` and filter by "
+    "compressed_tensors.InternalModule if neceessary"
+)
 def iter_named_quantizable_modules(
     model: Module,
     include_children: bool = True,
@@ -330,7 +335,6 @@ def iter_named_quantizable_modules(
     Yield name and submodule of
     - leaf modules, set by include_children
     - attention modyles, set by include_attn
-
     :param model: model to get leaf modules of
     :param include_children: flag to get the leaf modules
     :param inlcude_attn: flag to get the attention modules
@@ -397,34 +401,6 @@ def can_quantize(value: torch.Tensor, quant_args: "QuantizationArgs") -> bool:
     return bit_depth > quant_args.num_bits
 
 
-def calculate_compression_ratio(model: Module) -> float:
-    """
-    Calculates the quantization compression ratio of a pytorch model, based on the
-    number of bits needed to represent the total weights in compressed form. Does not
-    take into account activation quantizatons.
-
-    :param model: pytorch module to calculate compression ratio for
-    :return: compression ratio of the whole model
-    """
-    total_compressed = 0.0
-    total_uncompressed = 0.0
-    for name, submodule in tqdm(
-        iter_named_leaf_modules(model),
-        desc="Calculating quantization compression ratio",
-    ):
-        for parameter in model.parameters():
-            uncompressed_bits = get_torch_bit_depth(parameter)
-            compressed_bits = uncompressed_bits
-            if is_module_quantized(submodule) and submodule.quantization_scheme.weights:
-                compressed_bits = submodule.quantization_scheme.weights.num_bits
-
-            num_weights = parameter.numel()
-            total_compressed += compressed_bits * num_weights
-            total_uncompressed += uncompressed_bits * num_weights
-
-    return total_uncompressed / total_compressed
-
-
 def is_kv_cache_quant_scheme(scheme: QuantizationScheme) -> bool:
     """
     Check whether the QuantizationScheme targets the kv cache.
diff --git a/src/compressed_tensors/transform/factory/base.py b/src/compressed_tensors/transform/factory/base.py
@@ -17,6 +17,7 @@
 
 import torch
 import torch.nn.utils.parametrize as P
+from compressed_tensors import InternalModule
 from compressed_tensors.quantization.lifecycle import is_target  # TODO: move to utils
 from compressed_tensors.registry.registry import RegistryMixin, T
 from compressed_tensors.transform import (
@@ -146,7 +147,7 @@ def output_hook(_, _input, output):
             raise NotImplementedError()
 
 
-class TransformBase(Module, ABC):
+class TransformBase(InternalModule, ABC):
     """
     Represents the application of a transform accord to TransformArgs
     """
diff --git a/src/compressed_tensors/utils/__init__.py b/src/compressed_tensors/utils/__init__.py
@@ -14,6 +14,7 @@
 # flake8: noqa
 
 from .helpers import *
+from .internal import *
 from .offload import *
 from .permutations_24 import *
 from .permute import *
diff --git a/src/compressed_tensors/utils/internal.py b/src/compressed_tensors/utils/internal.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+
+__all__ = ["InternalModule"]
+
+
+class InternalModule(torch.nn.Module):
+    """
+    Abstract base class for modules which are not a part of the the model definition.
+    `torch.nn.Module`s which inherit from this class will not be targeted by configs
+
+    This is typically used to skip apply configs to `Observers` and `Transforms`
+    """
+
+    pass
diff --git a/tests/test_quantization/lifecycle/test_apply.py b/tests/test_quantization/lifecycle/test_apply.py
@@ -31,7 +31,6 @@
     expand_target_names,
     is_target,
 )
-from compressed_tensors.quantization.utils import iter_named_leaf_modules
 from tests.testing_utils import requires_accelerate
 from transformers import AutoModelForCausalLM
 
@@ -98,7 +97,7 @@ def test_target_prioritization(mock_frozen):
     apply_quantization_config(model, config)
     mock_frozen(model)
 
-    for name, module in iter_named_leaf_modules(model):
+    for name, module in model.named_modules():
         if name == "model.layers.0.mlp.down_proj":
             assert module.quantization_scheme.weights.num_bits == 2
         elif re.match(".*down_proj", name):