vllm-project
diff --git a/‎.github/actions/prepare-code-coverage/action.yml
Lines changed: 19 additions & 0 deletions b/‎.github/actions/prepare-code-coverage/action.yml
Lines changed: 19 additions & 0 deletions
diff --git a/‎.github/workflows/test-check-transformers.yaml
Lines changed: 21 additions & 2 deletions b/‎.github/workflows/test-check-transformers.yaml
Lines changed: 21 additions & 2 deletions
diff --git a/‎.github/workflows/test-check.yaml
Lines changed: 46 additions & 0 deletions b/‎.github/workflows/test-check.yaml
Lines changed: 46 additions & 0 deletions
diff --git a/‎examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
Lines changed: 5 additions & 0 deletions b/‎examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/trl_mixin/ex_trl_constant.py
Lines changed: 5 additions & 0 deletions b/‎examples/trl_mixin/ex_trl_constant.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/trl_mixin/ex_trl_distillation.py
Lines changed: 5 additions & 0 deletions b/‎examples/trl_mixin/ex_trl_distillation.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎setup.py
Lines changed: 1 addition & 0 deletions b/‎setup.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/llmcompressor/args/dataset_arguments.py
Lines changed: 0 additions & 9 deletions b/‎src/llmcompressor/args/dataset_arguments.py
Lines changed: 0 additions & 9 deletions
diff --git a/‎src/llmcompressor/entrypoints/oneshot.py
Lines changed: 0 additions & 3 deletions b/‎src/llmcompressor/entrypoints/oneshot.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎src/llmcompressor/observers/base.py
Lines changed: 2 additions & 2 deletions b/‎src/llmcompressor/observers/base.py
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,19 @@
+name: prepare code coverage
+description: installs code coverage dependencies and exports an updated 'PYTEST_ADDOPTS' env var
+
+runs:
+  using: composite
+  steps:
+    - run: |-
+        # install dependencies
+        pip3 install coverage pytest-cov https://github.com/neuralmagic/pytest-nm-releng/archive/v0.4.0.tar.gz
+
+        # generate and source flags
+        FLAGS_FILE="coverage_flags.sh"
+        nmre-generate-coverage-flags --package "llmcompressor" --output-file "$FLAGS_FILE"
+        source "$FLAGS_FILE"
+        rm "$FLAGS_FILE"
+
+        # export defined/updated 'PYTEST_ADDOPTS' env var
+        echo "PYTEST_ADDOPTS=$PYTEST_ADDOPTS" | tee -a "$GITHUB_ENV"
+      shell: bash
@@ -1,10 +1,16 @@
 name: Test Checks (Transformers)
 on:
   pull_request:
-    branches: main
+    branches: [ main ]
     types: [ labeled, synchronize ]
   push:
-    branches: main
+    branches: [ main ]
+  workflow_dispatch:
+    inputs:
+      code_coverage:
+        description: if enabled, code coverage metrics will be collected during the test run
+        type: boolean
+        default: false
 
 env:
   CADENCE: "commit"
@@ -72,6 +78,9 @@ jobs:
           BUILD_TYPE=nightly pip3 install .
       - name: "Clean compressed-tensors directory"
         run: rm -r compressed-tensors/
+      - name: "⚙️ Prepare code coverage"
+        if: inputs.code_coverage
+        uses: ./.github/actions/prepare-code-coverage
       - name: "🔬 Running transformers tests"
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
@@ -104,3 +113,13 @@ jobs:
         if: (success() || failure()) && steps.install.outcome == 'success'
         run: |
           pytest -v tests/llmcompressor/transformers/kv_cache
+      - name: "Upload coverage report"
+        if: (success() || failure()) && inputs.code_coverage
+        uses: actions/upload-artifact@v4
+        with:
+          name: transformers-tests-coverage-results
+          path: |
+            .coverage
+            coverage-html
+            coverage.json
+          retention-days: 5
@@ -4,6 +4,12 @@ on:
     branches:
       - main
   push:
+  workflow_dispatch:
+    inputs:
+      code_coverage:
+        description: if enabled, code coverage metrics will be collected during the test run
+        type: boolean
+        default: false
 
 env:
   CADENCE: "commit"
@@ -36,8 +42,21 @@ jobs:
           BUILD_TYPE=nightly pip3 install .
       - name: "Clean compressed-tensors directory"
         run: rm -r compressed-tensors/
+      - name: "⚙️ Prepare code coverage"
+        if: inputs.code_coverage
+        uses: ./.github/actions/prepare-code-coverage
       - name: "🔬 Running base tests"
         run: make test
+      - name: "Upload coverage report"
+        if: (success() || failure()) && inputs.code_coverage
+        uses: actions/upload-artifact@v4
+        with:
+          name: base-tests-coverage-results
+          path: |
+            .coverage
+            coverage-html
+            coverage.json
+          retention-days: 5
 
   pytorch-tests:
     runs-on: ubuntu-22.04
@@ -65,9 +84,23 @@ jobs:
           BUILD_TYPE=nightly pip3 install .
       - name: "Clean compressed-tensors directory"
         run: rm -r compressed-tensors/
+      - name: "⚙️ Prepare code coverage"
+        if: inputs.code_coverage
+        uses: ./.github/actions/prepare-code-coverage
       - name: "🔬 Running pytorch tests"
         run: |
           pytest -v tests/llmcompressor/pytorch
+      - name: "Upload coverage report"
+        if: (success() || failure()) && inputs.code_coverage
+        uses: actions/upload-artifact@v4
+        with:
+          name: pytorch-tests-coverage-results
+          path: |
+            .coverage
+            coverage-html
+            coverage.json
+          retention-days: 5
+
 
   compat-pytorch-1_9-pytorch-tests:
     runs-on: ubuntu-22.04
@@ -95,6 +128,19 @@ jobs:
           BUILD_TYPE=nightly pip3 install .
       - name: "Clean compressed-tensors directory"
         run: rm -r compressed-tensors/
+      - name: "⚙️ Prepare code coverage"
+        if: inputs.code_coverage
+        uses: ./.github/actions/prepare-code-coverage
       - name: "🔬 Running pytorch tests"
         run: |
           pytest -v tests/llmcompressor/pytorch
+      - name: "Upload coverage report"
+        if: (success() || failure()) && inputs.code_coverage
+        uses: actions/upload-artifact@v4
+        with:
+          name: compat-pytorch-tests-coverage-results
+          path: |
+            .coverage
+            coverage-html
+            coverage.json
+          retention-days: 5
@@ -1,3 +1,7 @@
+# NOTE: Fine tuning can require more steps than is shown in the example
+# See the Axolotl integration blog post for best fine tuning practices
+# https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
+
 from pathlib import Path
 
 import torch
@@ -74,6 +78,7 @@
 )
 
 # Sparse finetune
+# This step can be supplanted by fine tuning via integrated FT libraries such as Axolotl
 train(
     model=(output_path / "sparsity_stage"),
     **oneshot_kwargs,
 
@@ -1,3 +1,7 @@
+# NOTE: Fine tuning can require more steps than is shown in the example
+# See the Axolotl integration blog post for best fine tuning practices
+# https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
+
 from datasets import load_dataset
 from sft_trainer import SFTTrainer
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -46,6 +50,7 @@ def formatting_prompts_func(example):
 )
 model_args = ModelArguments(model=model)
 
+# This step can be supplanted by fine tuning via integrated FT libraries such as Axolotl
 trainer = SFTTrainer(
     model=model,
     processing_class=tokenizer,
 
@@ -1,3 +1,7 @@
+# NOTE: Fine tuning can require more steps than is shown in the example
+# See the Axolotl integration blog post for best fine tuning practices
+# https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
+
 from sft_trainer import SFTTrainer
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
@@ -60,6 +64,7 @@
 )
 model_args = ModelArguments(model=model, distill_teacher=teacher)
 
+# This step can be supplanted by fine tuning via integrated FT libraries such as Axolotl
 trainer = SFTTrainer(
     model=model,
     teacher=teacher,
 
@@ -146,6 +146,7 @@ def localversion_func(version: ScmVersion) -> str:
             "torchvision",
             "librosa",
             "soundfile",
+            "torchcodec",
             # linting, formatting, and type checking
             "black~=24.4.2",
             "isort~=5.13.2",
 
@@ -172,15 +172,6 @@ class DatasetArguments(CustomDatasetArguments):
             ),
         },
     )
-    trust_remote_code_data: bool = field(
-        default=False,
-        metadata={
-            "help": "Whether or not to allow for datasets defined on the Hub using "
-            "a dataset script. This option should only be set to True for "
-            "repositories you trust and in which you have read the code, as it "
-            "will execute code present on the Hub on your local machine."
-        },
-    )
     # --- pipeline arguments --- #
     pipeline: Optional[str] = field(
         default="independent",
 
@@ -231,7 +231,6 @@ def oneshot(
     overwrite_cache: bool = False,
     preprocessing_num_workers: Optional[int] = None,
     min_tokens_per_module: Optional[float] = None,
-    trust_remote_code_data: bool = False,
     calibrate_moe_context: bool = False,
     # Miscellaneous arguments
     output_dir: Optional[str] = None,
@@ -294,8 +293,6 @@ def oneshot(
         preprocessing.
     :param min_tokens_per_module: Minimum percentage of tokens per
         module, relevant for MoE models.
-    :param trust_remote_code_data: Whether to allow for datasets defined on the Hub
-        using a dataset script.
 
     # Miscellaneous arguments
     :param output_dir: Path to save the output model after calibration.
 
@@ -2,6 +2,7 @@
 from typing import Any, Iterable, Optional, Tuple, Union
 
 import torch
+from compressed_tensors import InternalModule
 from compressed_tensors.quantization.quant_args import (
     FP8_E4M3_DATA,
     QuantizationArgs,
@@ -12,12 +13,11 @@
 from compressed_tensors.utils import safe_permute
 from loguru import logger
 from torch import FloatTensor, IntTensor, Tensor
-from torch.nn import Module
 
 __all__ = ["Observer"]
 
 
-class Observer(Module, RegistryMixin):
+class Observer(InternalModule, RegistryMixin):
     """
     Base Observer class to be subclassed for specific implementation.
     Subclasses should override `calculate_qparams` to return a scale, zero_point